aboutsummaryrefslogtreecommitdiff
path: root/src/windows/predict.py
blob: 49402ffdbe943334cbba3cb5a9a428785fe2e04b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import customtkinter
import tkinter
import pandas as pd
import os 
import pickle
from .modeltrainer import preprocess
import tkinter.messagebox
import joblib

class Predict(customtkinter.CTkToplevel):
    model_dir = "models/"
    def __init__(self, parent, selected_model):
        super().__init__(parent)
        self.parent = parent
        self.title("Predictions")
        self.grab_set()
        self.focus_set()
        self.selected_model = selected_model
        posx = int(self.winfo_screenwidth()/2 - 150)
        posy = int(self.winfo_screenheight()/2 - 350)
        self.geometry("300x650+{}+{}".format(posx, posy))
        self.resizable(False, False)

        # title
        self.title_label = customtkinter.CTkLabel(self, text="Title", anchor='w')
        self.title_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
        self.title_entry = customtkinter.CTkEntry(self, width=240)
        self.title_entry.pack(pady=10, padx=10)

        # uncomment the following lines to add title

        self.title_entry.insert(0, 'A good openCV tutorial?')

        # selftext
        self.selftext_label = customtkinter.CTkLabel(self, text="Selftext", anchor='w')
        self.selftext_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
        self.selftext_entry = customtkinter.CTkTextbox(self, width=240, height=100)
        self.selftext_entry.pack(pady=10, padx=10)

        # uncomment the following lines to add selftext

        self.selftext_entry.insert("0.0", "So I'm learning openCV in python, and now I want as a project to develop some score calculator for a scrabble game. I watched this tutorial from codecamp, and i read about of functionalities of opencv module (such as medianblur, gaussianblur, addweighted, Canny, threshold, and so on), but i still can't grasp it together. Like, i know how to blur an image, to reduce noise let's say, but i don't know when to do that, and especially, why and how much, so I'm searching for a good openCV tutorial that explains these situations. \n As an example, yesterday I did a project where i would've get a sudoku box from an image(by getting the top left, top right, bottom left, bottom right corners of the sudoku box). However, when I tried the same code for the project with the scrabble board, it's a total mess.")

        # subreddit
        self.subreddit_label = customtkinter.CTkLabel(self, text="Subreddit", anchor='w')
        self.subreddit_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')

        self.subreddit_entry = customtkinter.CTkOptionMenu(self, values=self.parent.posts['subreddit'].unique(), width=240)
        self.subreddit_entry.pack(pady=10, padx=10)
        self.subreddit_entry.set(self.parent.posts['subreddit'].unique()[0])

        # day
        self.day_label = customtkinter.CTkLabel(self, text="Day", anchor='w')
        self.day_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')
        self.day_entry = customtkinter.CTkOptionMenu(self, values=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], width=240)
        self.day_entry.pack(pady=10, padx=10)
        self.day_entry.set('Monday')

        # hour
        self.hour_label = customtkinter.CTkLabel(self, text="Hour", anchor='w')
        self.hour_label.pack(pady=5, padx=30, fill='x', side=tkinter.TOP, anchor='w')

        hours = ['{}:00'.format(i) for i in range(24)]

        self.hour_entry = customtkinter.CTkOptionMenu(self, values=hours, width=240)
        self.hour_entry.pack(pady=10, padx=10)
        self.hour_entry.set('10:00')
        
        # distinguished
        self.distinguished_entry = customtkinter.CTkCheckBox(self, text="Distinguished")
        self.distinguished_entry.pack(pady=10, padx=10)

        # button for predicting
        self.predict_button = customtkinter.CTkButton(self, text="Predict", command=self.predict)
        self.predict_button.pack(pady=10, padx=10)

        # button for closing the window
        self.close_button = customtkinter.CTkButton(self, text="Close", command=self.destroy)
        self.close_button.pack(pady=10, padx=10)

    def predict(self):
        title = self.title_entry.get()
        selftext = self.selftext_entry.get("1.0", tkinter.END)
        subreddit = self.subreddit_entry.get()
        day = self.day_entry.get()
        hour = self.hour_entry.get()
        hour = int(hour.split(':')[0])
        distinguished = False if self.distinguished_entry.get() == 0 else True

        if not title or not selftext or not subreddit or not day or not hour:
            tkinter.messagebox.showerror('Error', 'Please fill all the fields')
            return

        # load the model
        ups_model = pickle.load(open(os.path.join(self.model_dir, self.selected_model + '_ups.pkl'), 'rb'))
        num_comments_model = pickle.load(open(os.path.join(self.model_dir, self.selected_model + '_num_comments.pkl'), 'rb'))

        # load the vectorizer
        vectorizer = joblib.load(os.path.join(self.model_dir, "vectorizer.pkl"))

        text = title + " " + selftext
        text = preprocess(text)

        input_data = pd.DataFrame(columns=['text', 'day', 'hour', 'subreddit', 'distinguished'])
        input_data = input_data.append({'text': text, 'day': day, 'hour': hour, 'subreddit': subreddit, 'distinguished': distinguished}, ignore_index=True)

        cat_cols = ['day', 'hour', 'subreddit', 'distinguished']
        for col in cat_cols:
            input_data[col] = input_data[col].astype('category')
        input_data = pd.get_dummies(input_data, columns=cat_cols)
        input_data['text'] = input_data['text'].astype(str)

        X = vectorizer.transform(input_data['text'])

        # predict the ups
        ups = ups_model.predict(X)
        ups = int(ups[0])

        # predict the num_comments
        num_comments = num_comments_model.predict(X)
        num_comments = int(num_comments[0])

        tkinter.messagebox.showinfo('Result', 'Predicted ups: {}\nPredicted num_comments: {}'.format(ups, num_comments))