aboutsummaryrefslogtreecommitdiff
path: root/src/windows
diff options
context:
space:
mode:
authorBobby <[email protected]>2022-12-02 19:02:33 -0500
committerGitHub <[email protected]>2022-12-02 19:02:33 -0500
commit6496fb95432618f64cf23be0cd0eac362b3af3ed (patch)
tree2b55c918af459621d2a0d15f3f345171e6721520 /src/windows
parentaf2ada7022d75411f2d74a4a4d2c95dfe3eb2e3a (diff)
parent2fbbc2c07f306c6edd1ab543b03b6fc905b9fa3a (diff)
downloadRedditEngagementPrediction-6496fb95432618f64cf23be0cd0eac362b3af3ed.tar.xz
RedditEngagementPrediction-6496fb95432618f64cf23be0cd0eac362b3af3ed.zip
Merge pull request #9 from luciferreeves/main
Reqs added
Diffstat (limited to 'src/windows')
-rw-r--r--src/windows/modeltrainer.py2
-rw-r--r--src/windows/plotviewer.py49
-rw-r--r--src/windows/predict.py120
3 files changed, 161 insertions, 10 deletions
diff --git a/src/windows/modeltrainer.py b/src/windows/modeltrainer.py
index 25b49c9..1d332fe 100644
--- a/src/windows/modeltrainer.py
+++ b/src/windows/modeltrainer.py
@@ -148,7 +148,7 @@ class ModelTrainer(customtkinter.CTkToplevel):
# select only text, subreddit, link_flair_text, distinguished, hour, day, ups, num_comments
self.posts_ups = self.posts[self.categorical_features + self.text_features + ['ups']]
self.posts_num_comments = self.posts[self.categorical_features + self.text_features + ['num_comments']]
- self.tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')
+ self.tfidf = TfidfVectorizer()
self.label_binarizer = LabelBinarizer()
self.edit_textbox('Preparing Data (Upvotes)', 1, 'wait')
diff --git a/src/windows/plotviewer.py b/src/windows/plotviewer.py
index da3886e..40af100 100644
--- a/src/windows/plotviewer.py
+++ b/src/windows/plotviewer.py
@@ -14,6 +14,7 @@ from matplotlib.figure import Figure
from helpers.subreddits import SUBREDDITS
from .modeltrainer import ModelTrainer
+from .predict import Predict
import numpy as np
@@ -49,6 +50,7 @@ class PlotViewer(customtkinter.CTk):
def create_tabs(self):
self.tabview = customtkinter.CTkTabview(self)
+ self.tabview.add("View Data / Predictions")
self.tabview.add("Posts")
self.tabview.add("Subscribers")
self.tabview.add("Author Activity")
@@ -58,7 +60,6 @@ class PlotViewer(customtkinter.CTk):
self.tabview.add("Best Time Analysis")
self.tabview.add("Scores Boxplot")
self.tabview.add("Scores vs Comments")
- self.tabview.add("View Data / Predictions")
fig = Figure(figsize=(12, 8), dpi=72)
self.posts_plot = fig.add_subplot(111)
@@ -242,13 +243,46 @@ class PlotViewer(customtkinter.CTk):
self.models_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Train Models", command=self.train_models)
self.models_button.pack()
else:
- self.models_label = customtkinter.CTkLabel(self.tabview.tab("View Data / Predictions"), text="Models found. Predict by entering data on the next screen.", pady= 10)
- self.models_label.pack()
- self.models_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Predict")
- self.models_button.pack()
+ self.show_model_options()
self.tabview.pack(expand=True, fill='both')
+ def show_model_options(self):
+ models = [
+ 'DummyRegressor',
+ 'LinearRegression',
+ 'RidgeCV',
+ 'KNeighborsRegressor',
+ 'DecisionTreeRegressor',
+ 'RandomForestRegressor',
+ 'GradientBoostingRegressor',
+ ]
+ self.model = customtkinter.CTkComboBox(self.tabview.tab("View Data / Predictions"), values=models)
+ self.model.pack(pady=10, padx=10, side=tkinter.LEFT)
+
+ # metrics buttons
+ self.ups_metrics_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Ups Metrics")
+ self.ups_metrics_button.pack(pady=10, padx=10, side=tkinter.LEFT)
+
+ self.num_comments_metrics_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Num Comments Metrics")
+ self.num_comments_metrics_button.pack(pady=10, padx=10, side=tkinter.LEFT)
+
+ # button for model plots
+ self.model_plots_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Model Plots")
+ self.model_plots_button.pack(pady=10, padx=10, side=tkinter.RIGHT)
+
+ # button for predicting
+ self.predict_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Predict a new post", command=self.predict)
+ self.predict_button.pack(pady=10, padx=10, side=tkinter.RIGHT)
+
+
+ def predict(self):
+ # child window to take input of the post - title, selftext, subreddit, day, hour, distinguished
+ pred_win = Predict(self)
+ self.wait_window(pred_win)
+
+
+
def train_models(self):
# open model training child window
mt = ModelTrainer(self, self.posts)
@@ -258,9 +292,6 @@ class PlotViewer(customtkinter.CTk):
self.models_label.destroy()
self.models_button.destroy()
- self.models_label = customtkinter.CTkLabel(self.tabview.tab("View Data / Predictions"), text="Models found. Predict by entering data on the next screen.", pady= 10)
- self.models_label.pack()
- self.models_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Predict")
- self.models_button.pack()
+ self.show_model_options()
diff --git a/src/windows/predict.py b/src/windows/predict.py
new file mode 100644
index 0000000..374ee09
--- /dev/null
+++ b/src/windows/predict.py
@@ -0,0 +1,120 @@
+import random
+import math
+import customtkinter
+import tkinter
+import pandas as pd
+import os
+import pickle
+from scipy.sparse import hstack
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.preprocessing import LabelBinarizer
+from .modeltrainer import preprocess
+import tkinter.messagebox
+
+class Predict(customtkinter.CTkToplevel):
+ model_dir = "models/"
+ def __init__(self, parent):
+ super().__init__(parent)
+ self.parent = parent
+ self.title("Predictions")
+ self.grab_set()
+ self.focus_set()
+ posx = int(self.winfo_screenwidth()/2 - 150)
+ posy = int(self.winfo_screenheight()/2 - 350)
+ self.geometry("300x650+{}+{}".format(posx, posy))
+ self.resizable(False, False)
+
+ # title
+ self.title_label = customtkinter.CTkLabel(self, text="Title", anchor='w')
+ self.title_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
+ self.title_entry = customtkinter.CTkEntry(self, width=240)
+ self.title_entry.pack(pady=10, padx=10)
+ self.title_entry.insert(0, 'A good openCV tutorial?')
+
+ # selftext
+ self.selftext_label = customtkinter.CTkLabel(self, text="Selftext", anchor='w')
+ self.selftext_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
+ self.selftext_entry = customtkinter.CTkTextbox(self, width=240, height=100)
+ self.selftext_entry.pack(pady=10, padx=10)
+ self.selftext_entry.insert("0.0", "So I'm learning openCV in python, and now I want as a project to develop some score calculator for a scrabble game. I watched this tutorial from codecamp, and i read about of functionalities of opencv module (such as medianblur, gaussianblur, addweighted, Canny, threshold, and so on), but i still can't grasp it together. Like, i know how to blur an image, to reduce noise let's say, but i don't know when to do that, and especially, why and how much, so I'm searching for a good openCV tutorial that explains these situations. \n As an example, yesterday I did a project where i would've get a sudoku box from an image(by getting the top left, top right, bottom left, bottom right corners of the sudoku box). However, when I tried the same code for the project with the scrabble board, it's a total mess.")
+
+ # subreddit
+ self.subreddit_label = customtkinter.CTkLabel(self, text="Subreddit", anchor='w')
+ self.subreddit_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
+
+ self.subreddit_entry = customtkinter.CTkOptionMenu(self, values=self.parent.posts['subreddit'].unique(), width=240)
+ self.subreddit_entry.pack(pady=10, padx=10)
+ self.subreddit_entry.set(self.parent.posts['subreddit'].unique()[0])
+
+ # day
+ self.day_label = customtkinter.CTkLabel(self, text="Day", anchor='w')
+ self.day_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
+ self.day_entry = customtkinter.CTkOptionMenu(self, values=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], width=240)
+ self.day_entry.pack(pady=10, padx=10)
+ self.day_entry.set('Monday')
+
+ # hour
+ self.hour_label = customtkinter.CTkLabel(self, text="Hour", anchor='w')
+ self.hour_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
+
+ hours = ['{}:00'.format(i) for i in range(24)]
+
+ self.hour_entry = customtkinter.CTkOptionMenu(self, values=hours, width=240)
+ self.hour_entry.pack(pady=10, padx=10)
+ self.hour_entry.set('10:00')
+
+ # distinguished
+ self.distinguished_entry = customtkinter.CTkCheckBox(self, text="Distinguished")
+ self.distinguished_entry.pack(pady=10, padx=10)
+
+ # button for predicting
+ self.predict_button = customtkinter.CTkButton(self, text="Predict", command=self.predict)
+ self.predict_button.pack(pady=10, padx=10)
+
+ # button for closing the window
+ self.close_button = customtkinter.CTkButton(self, text="Close", command=self.destroy)
+ self.close_button.pack(pady=10, padx=10)
+
+ def predict(self):
+ title = self.title_entry.get()
+ selftext = self.selftext_entry.get("1.0", tkinter.END)
+ subreddit = self.subreddit_entry.get()
+ day = self.day_entry.get()
+ hour = self.hour_entry.get()
+ hour = int(hour.split(':')[0])
+ distinguished = self.distinguished_entry.get()
+
+ if not title or not selftext or not subreddit or not day or not hour:
+ tkinter.messagebox.showerror('Error', 'Please fill all the fields')
+ return
+
+ # load the model
+ ups_model = pickle.load(open(os.path.join(self.model_dir, "DummyRegressor_ups.pkl"), 'rb'))
+ num_comments_model = pickle.load(open(os.path.join(self.model_dir, "DummyRegressor_num_comments.pkl"), 'rb'))
+
+ text = title + " " + selftext
+ text = preprocess(text)
+
+ post = pd.DataFrame({
+ 'text': [text],
+ 'subreddit': [subreddit],
+ 'day': [day],
+ 'hour': [hour],
+ 'distinguished': [distinguished]
+ })
+
+ self.tfidf_vectorizer = TfidfVectorizer()
+ self.label_binarizer = LabelBinarizer()
+
+ post_cat = [self.label_binarizer.fit_transform(post[col]) for col in ['subreddit', 'day', 'hour', 'distinguished']]
+ post_text = self.tfidf_vectorizer.fit_transform(post['text'])
+ postX = hstack([post_text] + post_cat).tocsr()
+
+ ups = int(ups_model.predict(postX)[0])
+ num_comments = int(num_comments_model.predict(postX)[0])
+
+ # random bias from the prediction
+ ups = int(ups // math.log(ups + 1))
+ num_comments = int(num_comments // math.log(num_comments + 1))
+
+ tkinter.messagebox.showinfo('Predictions', 'Predicted ups: {}\nPredicted num_comments: {}'.format(ups, num_comments))