diff options
| author | Priyansh <[email protected]> | 2021-12-09 01:40:24 -0500 |
|---|---|---|
| committer | Priyansh <[email protected]> | 2021-12-09 01:40:24 -0500 |
| commit | 15b1a734b0b44bebe64df8f0bd514ac68dabb967 (patch) | |
| tree | f00f5dee8ed46aecb678f3a3eea7a4b5b4e2a26e | |
| parent | 42d30d98fc88f81ff58bf66c1a6eec372cb2649c (diff) | |
| download | temp_pred_arima-15b1a734b0b44bebe64df8f0bd514ac68dabb967.tar.xz temp_pred_arima-15b1a734b0b44bebe64df8f0bd514ac68dabb967.zip | |
Refactor code and time_series_model.py now builds a pickle file
| -rw-r--r-- | sql_functions.py | 9 | ||||
| -rw-r--r-- | time_series_model.py | 83 |
2 files changed, 30 insertions, 62 deletions
diff --git a/sql_functions.py b/sql_functions.py index 313c449..54d1d05 100644 --- a/sql_functions.py +++ b/sql_functions.py @@ -1,6 +1,7 @@ import sqlite3 as sql import pandas as pd + def execute_sql_statement(sql_statement): conn = sql.connect("database.db") cur = conn.cursor() @@ -8,11 +9,13 @@ def execute_sql_statement(sql_statement): rows = cur.fetchall() return rows + def run_sql_pandas(sql_statement): conn = sql.connect("database.db") - df=pd.read_sql_query(sql_statement, conn).to_records(index=False) + df = pd.read_sql_query(sql_statement, conn).to_records(index=False) return df + def get_list_of_dict(keys, list_of_tuples): - list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples] - return list_of_dict
\ No newline at end of file + list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples] + return list_of_dict diff --git a/time_series_model.py b/time_series_model.py index 2896381..3fca7c0 100644 --- a/time_series_model.py +++ b/time_series_model.py @@ -1,61 +1,26 @@ -import numpy as np import pandas as pd -import matplotlib.pyplot as plt -import seaborn as sns -from statsmodels.tsa.arima_model import ARIMA -import statsmodels.api as sm -import sqlite3 as sql -import pmdarima as pm from sql_functions import execute_sql_statement - -def plot_data(dates): - sql_stmt = "select date, cast(avg_temperature as real) as temp from temperature" - result = execute_sql_statement(sql_stmt) - data = pd.DataFrame(result, columns=["date","temp"]) - data.set_index('date', inplace=True) - # print(data) - new_model = pm.auto_arima(data.temp, start_p=1, start_q=1, - test='adf', # use adftest to find optimal 'd' - max_p=3, max_q=3, # maximum p and q - m=5, # frequency of series - d=None, # let model determine 'd' - seasonal=False, # No Seasonality - start_P=0, - D=0, - trace=True, - error_action='ignore', - suppress_warnings=True, - stepwise=True) - - # print(new_model.summary()) - - # new_model.plot_diagnostics(figsize=(10,8)) - # # plt.show() - - n_periods =30 - fc, confint = new_model.predict(n_periods = n_periods, return_conf_int = True) - # print(fc) - - n_years = ['1960-12-02', '1960-12-03', '1960-12-04', '1960-12-05', '1960-12-06', '1960-12-07', '1960-12-08', '1960-12-09', '1960-12-10', '1960-12-11', '1960-12-12', '1960-12-13', '1960-12-14', '1960-12-15', '1960-12-16', '1960-12-17', '1960-12-18', '1960-12-19', '1960-12-20', '1960-12-21', '1960-12-22', '1960-12-23', '1960-12-24', '1960-12-25', '1960-12-26', '1960-12-27', '1960-12-28', '1960-12-29', '1960-12-30', '1960-12-31'] - fc_ind = pd.Series(n_years) - - fc_series = pd.Series(fc, index=fc_ind) - lower_series = pd.Series(confint[:, 0], index=fc_ind) - upper_series = pd.Series(confint[:, 1], index=fc_ind) - - plt.figure(figsize=(12, 5)) - # # # plt.plot(np.log10(data.temp)) - # plt.plot(fc_series, color="darkred") - # # # plt.xlabel("Year") - # # # plt.ylabel(data. + " Rate") - # # plt.fill_between(lower_series.index, - # # lower_series, - # # upper_series, - # # color="k", alpha=.35) - # # # plt.xticks(np.arange(min(data.index), max(upper_series.index)+3, 3.0)) - # # # plt.title("Final Forecast of Crude Death Rate") - # # # plt.legend(("past", "forecast", "95% confidence interval"), loc="upper right") - plt.show() - - - +import pmdarima as pm +import pickle + +sql_stmt = "select date, city_id, cast(avg_temperature as real) as temp from temperature where date is not null and temp is not null" + +result = execute_sql_statement(sql_stmt) +data = pd.DataFrame(result, columns=["date", "city_id", "temp"]) +data.set_index(["date", "city_id"], inplace=True) +ts_model = pm.auto_arima(data.temp, start_p=1, start_q=1, + test='adf', + max_p=3, max_q=3, + m=5, + d=None, + seasonal=False, + start_P=0, + D=0, + trace=True, + error_action='ignore', + suppress_warnings=True, + stepwise=True) +# Best model: ARIMA(3,0,3)(0,0,0)[0] + +with open('arima.pkl', 'wb') as pkl: + pickle.dump(ts_model, pkl) |
