diff options
Diffstat (limited to 'SentimentAnalyser.py')
| -rw-r--r-- | SentimentAnalyser.py | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/SentimentAnalyser.py b/SentimentAnalyser.py index ba415dc..48f3432 100644 --- a/SentimentAnalyser.py +++ b/SentimentAnalyser.py @@ -1,6 +1,7 @@ from nltk.sentiment.vader import SentimentIntensityAnalyzer from cassandra.cluster import Cluster import re +import pandas as pd compound = [] @@ -10,17 +11,43 @@ neg = [] def sentence_score(rs): review_score = SentimentIntensityAnalyzer() - compound.append(review_score.polarity_scores(rs)['compound']) - neg.append(review_score.polarity_scores(rs)['neg']) - neu.append(review_score.polarity_scores(rs)['neu']) - pos.append(review_score.polarity_scores(rs)['pos']) + return review_score.polarity_scores(rs)['compound'] + # compound.append(review_score.polarity_scores(rs)['compound']) + # neg.append(review_score.polarity_scores(rs)['neg']) + # neu.append(review_score.polarity_scores(rs)['neu']) + # pos.append(review_score.polarity_scores(rs)['pos']) -cluster = Cluster() -session = cluster.connect('twitter') -rows = session.execute('SELECT tweet FROM twitterdata') -for tweet in rows: +cluster = Cluster(['127.0.0.1'], port=9042) +session = cluster.connect() +session.set_keyspace('twitter') +session.execute("USE twitter") + +# Select all tweets from cassandra database +query = "SELECT * FROM twitterdata" +rows = session.execute(query) +tweets = [] + +# Iterate through all tweets +for row in rows: try: - sentence_score(tweet) - except re.error: - print(tweet)
\ No newline at end of file + tweets.append({ + 'tweet_id': row.tweet_id, + 'tweet': row.tweet, + 'score': sentence_score(row.tweet) + }) + except: + print(row.tweet) + + +for tweet in tweets: + if tweet.get('score') > 0.5: + tweet['sentiment'] = 'positive' + elif tweet.get('score') < -0.5: + tweet['sentiment'] = 'negative' + else: + tweet['sentiment'] = 'neutral' + +df = pd.DataFrame(tweets) +df.to_csv('tweets.csv', index=False) + |
