aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/windows/data_fetcher.py4
-rw-r--r--src/windows/modelplots.py171
-rw-r--r--src/windows/modeltrainer.py39
-rw-r--r--src/windows/plotviewer.py33
4 files changed, 238 insertions, 9 deletions
diff --git a/src/windows/data_fetcher.py b/src/windows/data_fetcher.py
index 57763a1..212c1d7 100644
--- a/src/windows/data_fetcher.py
+++ b/src/windows/data_fetcher.py
@@ -27,7 +27,7 @@ class DataDownloader:
self.root.resizable(False, False)
self.root.protocol('WM_DELETE_WINDOW', self.on_closing)
self.subreddits = SUBREDDITS
- self.posts_per_subreddit = 100
+ self.posts_per_subreddit = 1000
self.progress = ttk.Progressbar(self.root, orient='horizontal', length=500, mode='determinate')
self.progress['value'] = 0
@@ -56,7 +56,7 @@ class DataDownloader:
self.headers = {**headers, **{'Authorization': f"bearer {self.token}"}}
def on_closing(self):
- self.root.destroy()
+ pass
def download(self):
for subreddit in self.subreddits:
diff --git a/src/windows/modelplots.py b/src/windows/modelplots.py
new file mode 100644
index 0000000..ae895ac
--- /dev/null
+++ b/src/windows/modelplots.py
@@ -0,0 +1,171 @@
+import customtkinter
+import seaborn as sns
+from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
+import matplotlib.pyplot as plt
+from matplotlib.figure import Figure
+import json
+import tkinter
+import numpy as np
+
+class ModelPlots(customtkinter.CTkToplevel):
+ def __init__(self, parent):
+ super().__init__(parent)
+ self.parent = parent
+ self.title("Model Plots")
+ posx = int(self.winfo_screenwidth()/2 - 400)
+ posy = int(self.winfo_screenheight()/2 - 300)
+ self.geometry("800x600+{}+{}".format(posx, posy))
+ self.grab_set()
+ self.focus_set()
+ self.resizable(True, True)
+ with open(f'models/ups_metrics.json', 'r') as f:
+ self.ups_metrics = json.load(f)
+ with open(f'models/num_comments_metrics.json', 'r') as f:
+ self.num_comments_metrics = json.load(f)
+
+ self.create_widgets()
+
+ def create_widgets(self):
+ self.tabview = customtkinter.CTkTabview(self)
+ self.tabview.add('R-Squares')
+ self.tabview.add('MAE')
+ self.tabview.add('MSE')
+ self.tabview.add('R-Square Comparison')
+ self.tabview.add('Predictions')
+ self.tabview.add('Residuals')
+
+ # R-Squares
+ fig, ax = plt.subplots(1, 2, figsize=(15, 3), dpi=36)
+ ups_m = {}
+ for k, v in self.ups_metrics.items():
+ ups_m[k] = v['r2']
+ sns.barplot(x=list(ups_m.keys()), y=list(ups_m.values()), ax=ax[0], palette='Blues_d')
+ ax[0].set_title('R-Square for Ups')
+
+ num_comments_m = {}
+ for k, v in self.num_comments_metrics.items():
+ num_comments_m[k] = v['r2']
+ sns.barplot(x=list(num_comments_m.keys()), y=list(num_comments_m.values()), ax=ax[1], palette='Greens_d')
+ ax[1].set_title('R-Square for Number of Comments')
+
+ for i in range(2):
+ ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation=45)
+ self.r2plot = FigureCanvasTkAgg(fig, self.tabview.tab('R-Squares'))
+ self.r2plot.figure.tight_layout()
+ self.r2plot.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
+
+ # MAE
+ fig, ax = plt.subplots(1, 2, figsize=(15, 3), dpi=36)
+ ups_m = {}
+ for k, v in self.ups_metrics.items():
+ ups_m[k] = v['mae']
+ sns.barplot(x=list(ups_m.keys()), y=list(ups_m.values()), ax=ax[0], palette='Reds_d')
+ ax[0].set_title('MAE for Ups')
+
+ num_comments_m = {}
+ for k, v in self.num_comments_metrics.items():
+ num_comments_m[k] = v['mae']
+ sns.barplot(x=list(num_comments_m.keys()), y=list(num_comments_m.values()), ax=ax[1], palette='Oranges_d')
+ ax[1].set_title('MAE for Number of Comments')
+
+ for i in range(2):
+ ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation=45)
+ self.maeplot = FigureCanvasTkAgg(fig, self.tabview.tab('MAE'))
+ self.maeplot.figure.tight_layout()
+ self.maeplot.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
+
+ # MSE
+ fig, ax = plt.subplots(1, 2, figsize=(15, 3), dpi=36)
+ ups_m = {}
+ for k, v in self.ups_metrics.items():
+ ups_m[k] = v['mse']
+ sns.barplot(x=list(ups_m.keys()), y=list(ups_m.values()), ax=ax[0], palette='Purples_d')
+ ax[0].set_title('MSE for Ups')
+
+ num_comments_m = {}
+ for k, v in self.num_comments_metrics.items():
+ num_comments_m[k] = v['mse']
+ sns.barplot(x=list(num_comments_m.keys()), y=list(num_comments_m.values()), ax=ax[1], palette='Greys_d')
+ ax[1].set_title('MSE for Number of Comments')
+
+ for i in range(2):
+ ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation=45)
+ self.mseplot = FigureCanvasTkAgg(fig, self.tabview.tab('MSE'))
+ self.mseplot.figure.tight_layout()
+ self.mseplot.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
+
+ # R-Square Comparison
+ fig, ax = plt.subplots(1, 2, figsize=(20, 5), dpi=36)
+ # Ups
+ ax[0].set_title('Ups')
+ ax[0].set_xlabel('Model')
+ ax[0].set_ylabel('R2 Score')
+ sns.barplot(x=list(self.ups_metrics.keys()), y=[r2['r2'] for r2 in self.ups_metrics.values()], ax=ax[0], palette='Blues_d')
+ ax[0].set_xticklabels(ax[0].get_xticklabels(), rotation=45)
+ # Number of Comments
+ ax[1].set_title('Number of Comments')
+ ax[1].set_xlabel('Model')
+ ax[1].set_ylabel('R2 Score')
+ sns.barplot(x=list(self.num_comments_metrics.keys()), y=[r2['r2'] for r2 in self.num_comments_metrics.values()], ax=ax[1], palette='Greens_d')
+ ax[1].set_xticklabels(ax[1].get_xticklabels(), rotation=45)
+ self.r2comparisonplot = FigureCanvasTkAgg(fig, self.tabview.tab('R-Square Comparison'))
+ self.r2comparisonplot.figure.tight_layout()
+ self.r2comparisonplot.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
+
+ # Predictions
+ fig, ax = plt.subplots(7, 2, figsize=(12, 16), dpi=24)
+ for i, (k, v) in enumerate(self.ups_metrics.items()):
+ # ups
+ ax[i, 0].set_title('Ups - {}'.format(k))
+ ax[i, 0].set_xlabel('Actual')
+ ax[i, 0].set_ylabel('Predicted')
+ sns.regplot(x=v['actual'], y=v['pred'], ax=ax[i, 0], color='blue', scatter_kws={'alpha': 0.3})
+
+ # num_comments
+ ax[i, 1].set_title('Number of Comments - {}'.format(k))
+ ax[i, 1].set_xlabel('Actual')
+ ax[i, 1].set_ylabel('Predicted')
+ sns.regplot(x=self.num_comments_metrics[k]['actual'], y=self.num_comments_metrics[k]['pred'], ax=ax[i, 1], color='green', scatter_kws={'alpha': 0.3})
+ self.predplot = FigureCanvasTkAgg(fig, self.tabview.tab('Predictions'))
+ self.predplot.figure.tight_layout()
+ self.predplot.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
+
+ # Residuals
+ fig, ax = plt.subplots(7, 6, figsize=(20, 16), dpi=16)
+ for i, (k, v) in enumerate(self.ups_metrics.items()):
+ # ups model
+ ax[i, 0].set_title(k + ' - Ups Residuals')
+ ax[i, 0].set_xlabel('Residuals')
+ ax[i, 0].set_ylabel('Frequency')
+ sns.distplot(np.array(v['actual']) - np.array(v['pred']), ax=ax[i, 0], color='blue', kde=False)
+
+ ax[i, 1].set_title(k + ' Ups Test Scores')
+ ax[i, 1].set_xlabel('Ups')
+ ax[i, 1].set_ylabel('Frequency')
+ sns.distplot(v['actual'], ax=ax[i, 1], color='blue', kde=False)
+
+ ax[i, 2].set_title(k + ' Ups Predicted Scores')
+ ax[i, 2].set_xlabel('Ups')
+ ax[i, 2].set_ylabel('Frequency')
+ sns.distplot(v['pred'], ax=ax[i, 2], kde=False, color='red')
+
+ # num_comments model
+ ax[i, 3].set_title(k + ' - Number of Comments Residuals')
+ ax[i, 3].set_xlabel('Residuals')
+ ax[i, 3].set_ylabel('Frequency')
+ sns.distplot(np.array(self.num_comments_metrics[k]['actual']) - np.array(self.num_comments_metrics[k]['pred']), ax=ax[i, 3], color='green', kde=False)
+
+ ax[i, 4].set_title(k + ' Number of Comments Test Scores')
+ ax[i, 4].set_xlabel('Number of Comments')
+ ax[i, 4].set_ylabel('Frequency')
+ sns.distplot(self.num_comments_metrics[k]['actual'], ax=ax[i, 4], kde=False, color='green')
+
+ ax[i, 5].set_title(k + ' Number of Comments Predicted Scores')
+ ax[i, 5].set_xlabel('Number of Comments')
+ ax[i, 5].set_ylabel('Frequency')
+ sns.distplot(self.num_comments_metrics[k]['pred'], ax=ax[i, 5], kde=False, color='red')
+ self.residualplot = FigureCanvasTkAgg(fig, self.tabview.tab('Residuals'))
+ self.residualplot.figure.tight_layout()
+ self.residualplot.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
+
+ self.tabview.pack(fill='both', expand=True)
diff --git a/src/windows/modeltrainer.py b/src/windows/modeltrainer.py
index 6664b57..17908da 100644
--- a/src/windows/modeltrainer.py
+++ b/src/windows/modeltrainer.py
@@ -12,6 +12,8 @@ from scipy.sparse import hstack
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelBinarizer
import ssl
+import json
+from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
try:
_create_unverified_https_context = ssl._create_unverified_context
@@ -123,6 +125,8 @@ class ModelTrainer(customtkinter.CTkToplevel):
"RandomForestRegressor": RandomForestRegressor(n_jobs=-1, n_estimators=70, min_samples_leaf=10, random_state = 10),
"GradientBoostingRegressor": GradientBoostingRegressor(n_estimators=70, max_depth=5)
}
+ self.ups_dict = {}
+ self.num_comments_dict = {}
self.start()
@@ -207,15 +211,50 @@ class ModelTrainer(customtkinter.CTkToplevel):
self.edit_textbox('Training {} for Upvotes'.format(model_name), line_count, 'wait')
model.fit(self.X_train_ups, self.y_train_ups)
self.save_model(model, model_name + '_ups')
+
+ ups_y_pred = model.predict(self.X_test_ups)
+ ups_mse = mean_squared_error(self.y_test_ups, ups_y_pred)
+ ups_mae = mean_absolute_error(self.y_test_ups, ups_y_pred)
+ ups_r2 = r2_score(self.y_test_ups, ups_y_pred)
+ self.ups_dict[model_name] = {
+ 'mse': ups_mse,
+ 'mae': ups_mae,
+ 'r2': ups_r2,
+ 'rmse': np.sqrt(ups_mse),
+ 'pred': list(ups_y_pred),
+ 'actual': list(self.y_test_ups)
+ }
+
self.edit_textbox('Training {} for Upvotes'.format(model_name), line_count, 'done')
line_count += 1
self.edit_textbox('Training {} for Number of Comments'.format(model_name), line_count, 'wait')
model.fit(self.X_train_num_comments, self.y_train_num_comments)
self.save_model(model, model_name + '_num_comments')
+
+ num_comments_y_pred = model.predict(self.X_test_num_comments)
+ num_comments_mse = mean_squared_error(self.y_test_num_comments, num_comments_y_pred)
+ num_comments_mae = mean_absolute_error(self.y_test_num_comments, num_comments_y_pred)
+ num_comments_r2 = r2_score(self.y_test_num_comments, num_comments_y_pred)
+ self.num_comments_dict[model_name] = {
+ 'mse': num_comments_mse,
+ 'mae': num_comments_mae,
+ 'r2': num_comments_r2,
+ 'rmse': np.sqrt(num_comments_mse),
+ 'pred': list(num_comments_y_pred),
+ 'actual': list(self.y_test_num_comments)
+ }
+
self.edit_textbox('Training {} for Number of Comments'.format(model_name), line_count, 'done')
line_count += 1
+ # save the metrics
+ with open(self.model_dir + 'ups_metrics.json', 'w') as f:
+ json.dump(self.ups_dict, f)
+
+ with open(self.model_dir + 'num_comments_metrics.json', 'w') as f:
+ json.dump(self.num_comments_dict, f)
+
self.edit_textbox('Training Complete. Models saved to models/ directory. You may now close this window.', line_count, 'done')
# allow user to close window
diff --git a/src/windows/plotviewer.py b/src/windows/plotviewer.py
index 40af100..da334b6 100644
--- a/src/windows/plotviewer.py
+++ b/src/windows/plotviewer.py
@@ -15,6 +15,7 @@ from matplotlib.figure import Figure
from helpers.subreddits import SUBREDDITS
from .modeltrainer import ModelTrainer
from .predict import Predict
+from .modelplots import ModelPlots
import numpy as np
@@ -257,32 +258,46 @@ class PlotViewer(customtkinter.CTk):
'RandomForestRegressor',
'GradientBoostingRegressor',
]
- self.model = customtkinter.CTkComboBox(self.tabview.tab("View Data / Predictions"), values=models)
+ self.model = customtkinter.CTkOptionMenu(self.tabview.tab("View Data / Predictions"), values=models)
self.model.pack(pady=10, padx=10, side=tkinter.LEFT)
+ self.model.set('DummyRegressor')
# metrics buttons
- self.ups_metrics_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Ups Metrics")
+ self.ups_metrics_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Ups Metrics", command=self.ups_metrics)
self.ups_metrics_button.pack(pady=10, padx=10, side=tkinter.LEFT)
- self.num_comments_metrics_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Num Comments Metrics")
+ self.num_comments_metrics_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Num Comments Metrics", command=self.num_comments_metrics)
self.num_comments_metrics_button.pack(pady=10, padx=10, side=tkinter.LEFT)
# button for model plots
- self.model_plots_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Model Plots")
+ self.model_plots_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Model Plots", command=self.show_model_plots)
self.model_plots_button.pack(pady=10, padx=10, side=tkinter.RIGHT)
# button for predicting
self.predict_button = customtkinter.CTkButton(self.tabview.tab("View Data / Predictions"), text="Predict a new post", command=self.predict)
self.predict_button.pack(pady=10, padx=10, side=tkinter.RIGHT)
+ def ups_metrics(self):
+ import json
+ selected_model = self.model.get()
+ with open(f'models/ups_metrics.json', 'r') as f:
+ metrics = json.load(f)
+ metrics = metrics[selected_model]
+ tkinter.messagebox.showinfo("Ups Metrics for {selected_model}", f"Mean Absolute Error: {metrics['mae']}" + "\n\n" + f"Mean Squared Error: {metrics['mse']}" + "\n\n" + f"Root Mean Squared Error: {metrics['rmse']}" + "\n\n" + f"R2 Score: {metrics['r2']}")
+
+ def num_comments_metrics(self):
+ import json
+ selected_model = self.model.get()
+ with open(f'models/num_comments_metrics.json', 'r') as f:
+ metrics = json.load(f)
+ metrics = metrics[selected_model]
+ tkinter.messagebox.showinfo("Num Comments Metrics for {selected_model}", f"Mean Absolute Error: {metrics['mae']}" + "\n\n" + f"Mean Squared Error: {metrics['mse']}" + "\n\n" + f"Root Mean Squared Error: {metrics['rmse']}" + "\n\n" + f"R2 Score: {metrics['r2']}")
def predict(self):
# child window to take input of the post - title, selftext, subreddit, day, hour, distinguished
pred_win = Predict(self)
self.wait_window(pred_win)
-
-
def train_models(self):
# open model training child window
mt = ModelTrainer(self, self.posts)
@@ -294,4 +309,8 @@ class PlotViewer(customtkinter.CTk):
self.models_button.destroy()
self.show_model_options()
-
+ def show_model_plots(self):
+ pw = ModelPlots(self)
+ pw.grab_set()
+ pw.focus_set()
+ self.wait_window(pw)