Refactor code and time_series_model.py now builds a pickle file

author: Priyansh <[email protected]> 2021-12-09 01:40:24 -0500
committer: Priyansh <[email protected]> 2021-12-09 01:40:24 -0500
commit: 69b81f679d05d3dcffe99fb27612400d4d0b9d7f (patch)
tree: f00f5dee8ed46aecb678f3a3eea7a4b5b4e2a26e
parent: 7ce99172c9265d7b48e43bf3a7b1950aac97a632 (diff)
download: temp_pred_arima-69b81f679d05d3dcffe99fb27612400d4d0b9d7f.tar.xz
temp_pred_arima-69b81f679d05d3dcffe99fb27612400d4d0b9d7f.zip
2 files changed, 30 insertions, 62 deletions
diff --git a/sql_functions.py b/sql_functions.py
index 313c449..54d1d05 100644
--- a/sql_functions.py
+++ b/sql_functions.py
@@ -1,6 +1,7 @@
 import sqlite3 as sql
 import pandas as pd
 
+
 def execute_sql_statement(sql_statement):
     conn = sql.connect("database.db")
     cur = conn.cursor()
@@ -8,11 +9,13 @@ def execute_sql_statement(sql_statement):
     rows = cur.fetchall()
     return rows
 
+
 def run_sql_pandas(sql_statement):
     conn = sql.connect("database.db")
-    df=pd.read_sql_query(sql_statement, conn).to_records(index=False)
+    df = pd.read_sql_query(sql_statement, conn).to_records(index=False)
     return df
 
+
 def get_list_of_dict(keys, list_of_tuples):
-     list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples]
-     return list_of_dict
-\ No newline at end of file
+    list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples]
+    return list_of_dict
diff --git a/time_series_model.py b/time_series_model.py
index 2896381..3fca7c0 100644
--- a/time_series_model.py
+++ b/time_series_model.py
@@ -1,61 +1,26 @@
-import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-from statsmodels.tsa.arima_model import ARIMA
-import statsmodels.api as sm
-import sqlite3 as sql
-import pmdarima as pm
 from sql_functions import execute_sql_statement
-
-def plot_data(dates):
-    sql_stmt = "select date, cast(avg_temperature as real) as temp from temperature"
-    result = execute_sql_statement(sql_stmt)
-    data = pd.DataFrame(result, columns=["date","temp"])
-    data.set_index('date', inplace=True)
-    # print(data)
-    new_model = pm.auto_arima(data.temp, start_p=1, start_q=1,
-                        test='adf',       # use adftest to find optimal 'd'
-                        max_p=3, max_q=3, # maximum p and q
-                        m=5,              # frequency of series
-                        d=None,           # let model determine 'd'
-                        seasonal=False,   # No Seasonality
-                        start_P=0, 
-                        D=0, 
-                        trace=True,
-                        error_action='ignore',  
-                        suppress_warnings=True, 
-                        stepwise=True)
-
-    # print(new_model.summary())
-
-    # new_model.plot_diagnostics(figsize=(10,8))
-    # # plt.show()
-
-    n_periods =30
-    fc, confint = new_model.predict(n_periods = n_periods, return_conf_int = True)
-    # print(fc)
-
-    n_years = ['1960-12-02', '1960-12-03', '1960-12-04', '1960-12-05', '1960-12-06', '1960-12-07', '1960-12-08', '1960-12-09', '1960-12-10', '1960-12-11', '1960-12-12', '1960-12-13', '1960-12-14', '1960-12-15', '1960-12-16', '1960-12-17', '1960-12-18', '1960-12-19', '1960-12-20', '1960-12-21', '1960-12-22', '1960-12-23', '1960-12-24', '1960-12-25', '1960-12-26', '1960-12-27', '1960-12-28', '1960-12-29', '1960-12-30', '1960-12-31']
-    fc_ind = pd.Series(n_years)
-
-    fc_series = pd.Series(fc, index=fc_ind)
-    lower_series = pd.Series(confint[:, 0], index=fc_ind)
-    upper_series = pd.Series(confint[:, 1], index=fc_ind)
-
-    plt.figure(figsize=(12, 5))
-    # # # plt.plot(np.log10(data.temp))
-    # plt.plot(fc_series, color="darkred")
-    # # # plt.xlabel("Year")
-    # # # plt.ylabel(data. + " Rate")
-    # # plt.fill_between(lower_series.index, 
-    # #                     lower_series, 
-    # #                     upper_series, 
-    # #                     color="k", alpha=.35)
-    # # # plt.xticks(np.arange(min(data.index), max(upper_series.index)+3, 3.0))
-    # # # plt.title("Final Forecast of Crude Death Rate")
-    # # # plt.legend(("past", "forecast", "95% confidence interval"), loc="upper right")
-    plt.show()
-
-
-
+import pmdarima as pm
+import pickle
+
+sql_stmt = "select date, city_id, cast(avg_temperature as real) as temp from temperature where date is not null and temp is not null"
+
+result = execute_sql_statement(sql_stmt)
+data = pd.DataFrame(result, columns=["date", "city_id", "temp"])
+data.set_index(["date", "city_id"], inplace=True)
+ts_model = pm.auto_arima(data.temp, start_p=1, start_q=1,
+                         test='adf',
+                         max_p=3, max_q=3,
+                         m=5,
+                         d=None,
+                         seasonal=False,
+                         start_P=0,
+                         D=0,
+                         trace=True,
+                         error_action='ignore',
+                         suppress_warnings=True,
+                         stepwise=True)
+# Best model:  ARIMA(3,0,3)(0,0,0)[0]
+
+with open('arima.pkl', 'wb') as pkl:
+    pickle.dump(ts_model, pkl)
author	Priyansh <[email protected]>	2021-12-09 01:40:24 -0500
committer	Priyansh <[email protected]>	2021-12-09 01:40:24 -0500
commit	69b81f679d05d3dcffe99fb27612400d4d0b9d7f (patch)
tree	f00f5dee8ed46aecb678f3a3eea7a4b5b4e2a26e
parent	7ce99172c9265d7b48e43bf3a7b1950aac97a632 (diff)
download	temp_pred_arima-69b81f679d05d3dcffe99fb27612400d4d0b9d7f.tar.xz temp_pred_arima-69b81f679d05d3dcffe99fb27612400d4d0b9d7f.zip