Sentiment Analysis Tool

Search a Stock

{% block content %}

def sub(*args,**kwargs): document.getElementById("output").innerHTML = "I have changed!";

{{ form.stock.label }}
{{ form.stock(size=32) }} -->

{% endblock %}

Regex: {{reg}}

Text Blob: {{blob}}

Word Embeddings: {{emb}}

Bag of Words: {{bag}}

Stock Mentions

{% for result in title_array %} {% endfor %} {% for result in discussion_array %}
{{result}}
{{result}}
{% endfor %}

Top 5 Bottom 5

Top 5

1. top5[0]

2. top5[1]

3. top5[2]

4. top5[3]

5. top5[4]

Bottom 5

1. {{bottom5[0]}}

2. {{bottom5[1]}}

3. {{bottom5[2]}}

4. {{bottom5[3]}}

5. {{bottom5[4]}}

- scikit-learn - praw from sklearn.feature_extraction.text import CountVectorizer from sklearn import svm from sklearn.preprocessing import MultiLabelBinarizer import re as r import praw positive = "YAY" negative = "NAY" neutral = "MEH" def bag_of_words(text): trainer = ["bring you gains", "this is meh", "do you guys think", "this stock sucks", "Happy", "yay i'm so glad I bought it", "do not get this", "so awesome", "i love this", "green", "red", "killing me", "down", "nice run", "slaughtered", "winner", "up", "going strong", "fucking", "signs deal", "killing it", "dip", "stop", "painful", "it's a bloodbath", "portfolios red"] set_sentiment = [positive, neutral, neutral, negative, positive, positive, negative, positive, positive, positive, negative, negative, negative, positive, negative, positive, positive, positive, negative, positive, positive, negative, negative, negative, negative, negative] #initialize count vectorizer vectorizer = CountVectorizer() #turn our vocabulary into vectors train_vectors = vectorizer.fit_transform(trainer) #classify svm as linear and get sentiment for each vocab classify_vm = svm.SVC(kernel='linear').fit(train_vectors, set_sentiment) #transform our input into a vector transformed_text = vectorizer.transform([text]) #pass vector input into svm model to get sentiment according to our trainers if(classify_vm.predict(transformed_text) == ['YAY']): result = "YES" elif (classify_vm.predict(transformed_text) == ['NAY']): result = "NO" elif (classify_vm.predict(transformed_text) == ['MEH']): result = "MAYBE" return(result) def re_analysis(text): #regexp - positive jargon used for stocks regexp = r.compile(r"like|awesome|good|green|more|up|cash out|going strong|open to|tasty|wave|signs deal") #regexn - negative jargon used for stocks regexn = r.compile(r"losing|hate|awful|down|sick of|red|plummeting|killing|tf|what happened|wtf|no|slaughtered|frustrating|crushed") sentiments = [] sentiments.append(str(text)) result = None #positive matches positive_matches = [] negative_matches = [] #loop through sentiments #if it matches as positive result is True for item in sentiments: if r.search(regexp, item): result = "YES" #else if it matches as positive result is True for item in sentiments: if r.search(regexn, item): result = "NO" print(result) return result def text_blob(text): blob = TextBlob(text) for sentence in blob.sentences: #print(sentence.sentiment.polarity) return sentence.sentiment.polarity reddit = praw.Reddit( client_id="C61jSZCEdHCjWA", client_secret="VeOvnfcdC7vOVnrka5XtT1MXKFz53A", password="paulina123", user_agent="testscript by u/paulina123", username="paulina_coding", ) #function that allows you to search for a specific stock in title, daily or both def specific_stock_search(stock_name, option): emb = "" blob = "" bag = "" reg = "" emb_array = [] blob_array = [] bag_array = [] reg_array = [] stock_titles = [] stock_discussion = [] #search in reddit post title or both if option == 0 or option == 2: submissions = reddit.subreddit("CanadianInvestor").hot(limit=100) for item in submissions: if stock_name in item.title: stock_titles.append(item.title) #emb = emb_array.append(word_embeddings(item.title)) blob = blob_array.append(text_blob(item.title)) bag = bag_array.append(bag_of_words(item.title)) reg = reg_array.append(re_analysis(item.title)) #search in reddit daily discussion or both if option == 1 or option == 2: submissions = reddit.subreddit("CanadianInvestor").hot(limit=100) for item in submissions: if ('Daily' in item.title): url_one = item.url print(item.url) submission = reddit.submission(url=url_one) for top_level_comment in submission.comments: comment = top_level_comment.body #print(comment) if stock_name in comment: print(comment) stock_discussion.append(comment) #emb = emb_array.append(word_embeddings(comment)) blob = blob_array.append(text_blob(comment)) bag = bag_array.append(bag_of_words(comment)) reg = reg_array.append(re_analysis(comment)) #weigh all matched posts to reflect all users #emb = weighted_calculator(emb_array) blob = blob_calculator(blob_array) bag = weighted_calculator(bag_array) reg = weighted_calculator(reg_array) #return emb return blob,bag,reg,stock_titles, stock_discussion def top_bottom_5(): #print("printing top 5 and bottom 5 sentiments from daily discussion thread") submissions = reddit.subreddit("CanadianInvestor").hot(limit=100) positive = [] negative = [] count = 0; for item in submissions: if (count==0): if ('Daily' in item.title): url_one = item.url count+=1 print(item.url) submission = reddit.submission(url=url_one) for top_level_comment in submission.comments: comment = top_level_comment.body if (len(positive) < 5 or len(negative) < 5): sentiment = (bag_of_words(comment)) if sentiment == "YES": if (len(positive) < 5 and comment != "[deleted]"): positive.append(comment) elif sentiment == "NO": if (len(negative) < 5 and comment != "[deleted]"): negative.append(comment) #print("Top 5: ") positive_counter = 1; for i in range(len(positive)): #print(positive_counter, positive[i]) positive_counter+=1; negative_counter = 1; #print("Bottom 5: ") for i in range(len(negative)): #print(negative_counter, negative[i]) negative_counter+=1; return positive, negative #calculates final blob result by weighing all text blob results def blob_calculator(model_array): blob_count = 0 blob = 0 print(model_array) if len(model_array) > 0: for item in model_array: blob_count = item + blob_count blob = blob_count/len(model_array) return blob #calculates final array result by weighing all YES or NO results in the array def weighted_calculator(model_array): no_count = 0 yes_count = 0 print(model_array) if len(model_array) > 0: for item in model_array: no_count = 0 yes_count = 0 if item == "YES": yes_count = yes_count + 1 else: no_count = no_count + 1 if (no_count > yes_count): result = "NO" elif(no_count < yes_count): result = "YES" else: result = "MAYBE" return result def index(): reg = "" emb = "" blob = "" bag = "" top5 = [] bottom5 = [] title_array = [] form = LoginForm() discussion_array = [] #run top 5 bottom 5 top5_bottom5_array = top_bottom_5() top5 = top5_bottom5_array[0] bottom5 = top5_bottom5_array[1] option = form.field_two.data if form.validate_on_submit(): #capture information for based on given stock form = document.getElementById("my-id") specific_stock_array = specific_stock_search(form.stock.data, option) #result from each type of nlp emb = specific_stock_array[0] blob = specific_stock_array[1] bag = specific_stock_array[2] reg = specific_stock_array[3] #titles mentioning stock title_array = specific_stock_array[4] #discussions mentioning stock discussion_array = specific_stock_array[5] return render_template('index.html', title='Stock', form=form,reg=reg, emb=emb, blob=blob, bag=bag, top5=top5, bottom5 = bottom5, title_array = title_array,discussion_array=discussion_array )

To learn more HTML/CSS, check out these tutorials!