import tweepy

from tweepy import OAuthHandler

from tweepy import Stream

from tweepy.streaming import StreamListener

import sys

import json

from nltk.tokenize import word_tokenize

from nltk.tokenize import TweetTokenizer

import re

from os import path

import matplotlib.pyplot as plt

from wordcloud import WordCloud, STOPWORDS

consumer_key="1I7DImvpcMNikoRO7X388ihbg"

consumer_secret="jhca6YSECJKncLfoajMP4xkBChaj69SuGNqeRNaIJeD06EQFsy"

##Bearer token = AAAAAAAAAAAAAAAAAAAAAKNrGAEAAAAAl7%2BE903hTQeHF9PxDhuhD7XykjY%3DLJ8l7HbYtLuLb04QDMxzjXml7dk8tyIYavktaIgND1dLNistaL"

access_token="1095899256228061184-HllRyiRB8OkSOPpDDTad0xh3477csP"

access_secret="Fjvb16Oaitmi6gYH1tmJFOvQQucpFwvFgNuDUUF1xReex"

auth = OAuthHandler(consumer_key, consumer_secret)

auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth)

class Listener(StreamListener):

print("In Listener...")

tweet_number = 0

# __init__ runs as soon as an instance of the class is created

def __init__(self, max_tweets, hfilename, rawfile):

self.max_tweets = max_tweets

print(self.max_tweets)

# on_data() is a function of StreamListener as is on_error and on_status

def on_data(self, data):

self.tweet_number += 1

print("In on_data", self.tweet_number)

try:

print("In on_data in try")

with open(hfilename, 'a') as f:

with open(rawfile, 'a') as g:

tweet = json.loads(data)

tweet_text = tweet["text"]

print(tweet_text, "\n")

f.write(tweet_text) # the text from the tweet

json.dump(tweet, g) # write the raw tweet

except BaseException:

print("NOPE")

pass

if self.tweet_number >= self.max_tweets:

# sys.exit('Limit of '+str(self.max_tweets)+' tweets reached.')

print("Got ", str(self.max_tweets), "tweets.")

return False

# method for on_error()

def on_error(self, status):

print("ERROR") # machi

if (status == 420):

print("Error ", status, "rate limited")

return False

# ----------------end of class Listener

hashname = input("Enter the hash name, such as #womensrights: ")

numtweets = eval(input("How many tweets do you want to get?: "))

if (hashname[0] == "#"):

nohashname = hashname[1:] # remove the hash

else:

nohashname = hashname

hashname = "#" + hashname

# Create a file for any hash name

hfilename = "file_" + nohashname + ".txt"

rawfile = "file_rawtweets_" + nohashname + ".txt"

twitter_stream = Stream(auth, Listener(numtweets, hfilename, rawfile))

# twitter_stream.filter(track=['#womensrights'])

twitter_stream.filter(track=[hashname])

print("Twitter files created....")

linecount = 0

hashcount = 0

wordcount = 0

BagOfWords = []

BagOfHashes = []

BagOfLinks = []

### SET THE FILE NAME ###

tweetsfile = hfilename

with open(tweetsfile, 'r') as file:

for line in file:

# print(line,"\n")

tweetSplitter = TweetTokenizer(strip_handles=True, reduce_len=True)

WordList = tweetSplitter.tokenize(line)

# WordList2=word_tokenize(line)

# linecount=linecount+1

# print(WordList)

# print(len(WordList))

# print(WordList[0])

# print(WordList2)

# print(len(WordList2))

# print(WordList2[3:6])

# print("NEXT..........\n")

regex1 = re.compile('^#.+')

regex2 = re.compile('[^\W\d]') # no numbers

regex3 = re.compile('^http*')

regex4 = re.compile('.+\..+')

for item in WordList:

if (len(item) > 2):

if ((re.match(regex1, item))):

# print(item)

newitem = item[1:] # remove the hash

BagOfHashes.append(newitem)

hashcount = hashcount + 1

elif (re.match(regex2, item)):

if (re.match(regex3, item) or re.match(regex4, item)):

BagOfLinks.append(item)

else:

BagOfWords.append(item)

wordcount = wordcount + 1

else:

pass

else:

pass

# print(linecount)

# print(BagOfWords)

# print(BagOfHashes)

# print(BagOfLinks)

BigBag = BagOfWords + BagOfHashes

# list of words I have seen

seenit = []

# dict of word counts

WordDict = {}

Rawfilename = "TwitterResultsRaw.txt"

Freqfilename = "TwitterWordFrq.txt"

# FILE=open(Freqfilename,"w")

# FILE2=open(Rawfilename, "w")

R_FILE = open(Rawfilename, "w")

F_FILE = open(Freqfilename, "w")

IgnoreThese = ["and", "And", "AND", "THIS", "This", "this", "for", "FOR", "For",

"THE", "The", "the", "is", "IS", "Is", "or", "OR", "Or", "will",

"Will", "WILL", "Via", "via", "know", "take", "Take", "left", "Left",

"lot", "Lot", "last", "Last", "Wonder", "still", "Still",

'how','How','HOW','many','Many','MANY']

###Look at the words

for w in BigBag:

if (w not in IgnoreThese):

rawWord = w + " "

R_FILE.write(rawWord)

if (w in seenit):

# print(w, seenit)

WordDict[w] = WordDict[w] + 1 # increment the times word is seen

else:

##add word to dict and seenit

seenit.append(w)

WordDict[w] = 1

for key in WordDict:

# print(WordDict[key])

if (WordDict[key] > 1):

if (key not in IgnoreThese):

# print(key)

Key_Value = key + "," + str(WordDict[key]) + "\n"

F_FILE.write(Key_Value)

R_FILE.close()

F_FILE.close()

d = path.dirname(__file__)

Rawfilename = "TwitterResultsRaw.txt"

text = open(path.join(d, Rawfilename)).read()

##print(text)

## lines f.readlines()

##text = "".join(lines)

wordcloud = WordCloud().generate(text)

# Open a plot of the generated image.

# figure(figsize = (20,2))

plt.figure(figsize=(50, 40))

plt.imshow(wordcloud)

# , aspect="auto")

plt.axis("off")

##trumpplt.show()