diff options
| author | Priyansh <[email protected]> | 2021-12-25 17:17:02 -0500 |
|---|---|---|
| committer | Priyansh <[email protected]> | 2021-12-25 17:17:02 -0500 |
| commit | cb1e6c587ebef823b8524d72c1a97ab5765901d8 (patch) | |
| tree | 7ef2d647d44ce2bfbbe66d9c6f1eea62fe5dbea6 /SentimentAnalyser.py | |
| parent | 4ab10ff5a9588f535a2d8d7f8b7b93a237d1e03b (diff) | |
| download | KafkaPySpark-cb1e6c587ebef823b8524d72c1a97ab5765901d8.tar.xz KafkaPySpark-cb1e6c587ebef823b8524d72c1a97ab5765901d8.zip | |
Analysed Sentiment and Plotted a plot
Diffstat (limited to 'SentimentAnalyser.py')
| -rw-r--r-- | SentimentAnalyser.py | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/SentimentAnalyser.py b/SentimentAnalyser.py index ba415dc..48f3432 100644 --- a/SentimentAnalyser.py +++ b/SentimentAnalyser.py @@ -1,6 +1,7 @@ from nltk.sentiment.vader import SentimentIntensityAnalyzer from cassandra.cluster import Cluster import re +import pandas as pd compound = [] @@ -10,17 +11,43 @@ neg = [] def sentence_score(rs): review_score = SentimentIntensityAnalyzer() - compound.append(review_score.polarity_scores(rs)['compound']) - neg.append(review_score.polarity_scores(rs)['neg']) - neu.append(review_score.polarity_scores(rs)['neu']) - pos.append(review_score.polarity_scores(rs)['pos']) + return review_score.polarity_scores(rs)['compound'] + # compound.append(review_score.polarity_scores(rs)['compound']) + # neg.append(review_score.polarity_scores(rs)['neg']) + # neu.append(review_score.polarity_scores(rs)['neu']) + # pos.append(review_score.polarity_scores(rs)['pos']) -cluster = Cluster() -session = cluster.connect('twitter') -rows = session.execute('SELECT tweet FROM twitterdata') -for tweet in rows: +cluster = Cluster(['127.0.0.1'], port=9042) +session = cluster.connect() +session.set_keyspace('twitter') +session.execute("USE twitter") + +# Select all tweets from cassandra database +query = "SELECT * FROM twitterdata" +rows = session.execute(query) +tweets = [] + +# Iterate through all tweets +for row in rows: try: - sentence_score(tweet) - except re.error: - print(tweet)
\ No newline at end of file + tweets.append({ + 'tweet_id': row.tweet_id, + 'tweet': row.tweet, + 'score': sentence_score(row.tweet) + }) + except: + print(row.tweet) + + +for tweet in tweets: + if tweet.get('score') > 0.5: + tweet['sentiment'] = 'positive' + elif tweet.get('score') < -0.5: + tweet['sentiment'] = 'negative' + else: + tweet['sentiment'] = 'neutral' + +df = pd.DataFrame(tweets) +df.to_csv('tweets.csv', index=False) + |
