aboutsummaryrefslogtreecommitdiff
path: root/SentimentAnalyser.py
diff options
context:
space:
mode:
authorPriyansh <[email protected]>2021-12-25 17:17:02 -0500
committerPriyansh <[email protected]>2021-12-25 17:17:02 -0500
commitcb1e6c587ebef823b8524d72c1a97ab5765901d8 (patch)
tree7ef2d647d44ce2bfbbe66d9c6f1eea62fe5dbea6 /SentimentAnalyser.py
parent4ab10ff5a9588f535a2d8d7f8b7b93a237d1e03b (diff)
downloadKafkaPySpark-cb1e6c587ebef823b8524d72c1a97ab5765901d8.tar.xz
KafkaPySpark-cb1e6c587ebef823b8524d72c1a97ab5765901d8.zip
Analysed Sentiment and Plotted a plot
Diffstat (limited to 'SentimentAnalyser.py')
-rw-r--r--SentimentAnalyser.py49
1 files changed, 38 insertions, 11 deletions
diff --git a/SentimentAnalyser.py b/SentimentAnalyser.py
index ba415dc..48f3432 100644
--- a/SentimentAnalyser.py
+++ b/SentimentAnalyser.py
@@ -1,6 +1,7 @@
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from cassandra.cluster import Cluster
import re
+import pandas as pd
compound = []
@@ -10,17 +11,43 @@ neg = []
def sentence_score(rs):
review_score = SentimentIntensityAnalyzer()
- compound.append(review_score.polarity_scores(rs)['compound'])
- neg.append(review_score.polarity_scores(rs)['neg'])
- neu.append(review_score.polarity_scores(rs)['neu'])
- pos.append(review_score.polarity_scores(rs)['pos'])
+ return review_score.polarity_scores(rs)['compound']
+ # compound.append(review_score.polarity_scores(rs)['compound'])
+ # neg.append(review_score.polarity_scores(rs)['neg'])
+ # neu.append(review_score.polarity_scores(rs)['neu'])
+ # pos.append(review_score.polarity_scores(rs)['pos'])
-cluster = Cluster()
-session = cluster.connect('twitter')
-rows = session.execute('SELECT tweet FROM twitterdata')
-for tweet in rows:
+cluster = Cluster(['127.0.0.1'], port=9042)
+session = cluster.connect()
+session.set_keyspace('twitter')
+session.execute("USE twitter")
+
+# Select all tweets from cassandra database
+query = "SELECT * FROM twitterdata"
+rows = session.execute(query)
+tweets = []
+
+# Iterate through all tweets
+for row in rows:
try:
- sentence_score(tweet)
- except re.error:
- print(tweet) \ No newline at end of file
+ tweets.append({
+ 'tweet_id': row.tweet_id,
+ 'tweet': row.tweet,
+ 'score': sentence_score(row.tweet)
+ })
+ except:
+ print(row.tweet)
+
+
+for tweet in tweets:
+ if tweet.get('score') > 0.5:
+ tweet['sentiment'] = 'positive'
+ elif tweet.get('score') < -0.5:
+ tweet['sentiment'] = 'negative'
+ else:
+ tweet['sentiment'] = 'neutral'
+
+df = pd.DataFrame(tweets)
+df.to_csv('tweets.csv', index=False)
+