From 3b48c04470e2c368ba18716bb789b40942d6989f Mon Sep 17 00:00:00 2001 From: Priyansh Date: Thu, 9 Dec 2021 03:01:49 -0500 Subject: changed project structure --- __dbscripts__/database_creation_script.py | 107 +++++++ __dbscripts__/dbcreation.ipynb | 460 ++++++++++++++++++++++++++++++ app/main.py | 32 --- arima_predictor.py | 26 -- database_creation_script.py | 107 ------- dbcreation.ipynb | 460 ------------------------------ decompressor.py | 15 - functions/array_functions.py | 3 + functions/sql_functions.py | 16 ++ initial_setup.py | 23 ++ libs/decompressor.py | 15 + libs/zipper.py | 14 + model_builder.py | 27 ++ predictor.py | 26 ++ requirements.txt | 2 + sql_functions.py | 21 -- time_series_model.py | 30 -- zipper.py | 14 - 18 files changed, 693 insertions(+), 705 deletions(-) create mode 100644 __dbscripts__/database_creation_script.py create mode 100644 __dbscripts__/dbcreation.ipynb delete mode 100644 app/main.py delete mode 100644 arima_predictor.py delete mode 100644 database_creation_script.py delete mode 100644 dbcreation.ipynb delete mode 100644 decompressor.py create mode 100644 functions/array_functions.py create mode 100644 functions/sql_functions.py create mode 100644 initial_setup.py create mode 100644 libs/decompressor.py create mode 100644 libs/zipper.py create mode 100644 model_builder.py create mode 100644 predictor.py delete mode 100644 sql_functions.py delete mode 100644 time_series_model.py delete mode 100644 zipper.py diff --git a/__dbscripts__/database_creation_script.py b/__dbscripts__/database_creation_script.py new file mode 100644 index 0000000..c0e9252 --- /dev/null +++ b/__dbscripts__/database_creation_script.py @@ -0,0 +1,107 @@ +# Running this script automatically creates the database and normalizes the data. +# This is same as running the 'dbcreation.ipynb' notebook step by step. +# However their are some visualizations in the notebook but this script does not. +# This script is intentended to automate the whole process in a single script. +# This should run faster than the notebook. + +import pandas as pd +import sqlite3 +from sqlite3 import Error +import os + +def create_connection(db_file, delete_db=False): + + if delete_db and os.path.exists(db_file): + os.remove(db_file) + + conn = None + try: + conn = sqlite3.connect(db_file) + conn.execute("PRAGMA foreign_keys = 1") + except Error as e: + print(e) + + return conn + +def create_table(conn, create_table_sql): + try: + c = conn.cursor() + c.execute(create_table_sql) + except Error as e: + print(e) + +def execute_sql_statement(sql_statement, conn): + cur = conn.cursor() + cur.execute(sql_statement) + + rows = cur.fetchall() + + return rows + +df=pd.read_csv("..GlobalLandTemperaturesByCity.csv") +data=df[df.dt>"1949-12-31"] +data_for_loc_table=data[['City','Latitude','Longitude']] +data_for_loc_table=data_for_loc_table.to_records(index=False) +result_data_for_loc_table=list(data_for_loc_table) +records=data.to_records(index=False) +result=list(records) +req_resut=[] +for i in result: + req_resut.append((i[4],)) +req_resut=list((set(req_resut))) +conn = create_connection('database.db') +cur=conn.cursor() +execute_sql_statement("DROP TABLE IF EXISTS City_Table",conn) +execute_sql_statement("DROP TABLE IF EXISTS Country_Table",conn) +create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''') +cur.executemany("INSERT INTO Country_Table ('Country_Name') Values (?)",req_resut) +abc=execute_sql_statement("Select * from Country_Table", conn) +abc1={} +for i in abc: + abc1[i[1]]=i[0] +req_result_2=[] +for i in result: + req_result_2.append((i[3],abc1[i[4]])) +create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER, +FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''') +city=[] +for i in result: + city.append((i[3],abc1[i[4]])) +city_country_unique=list((set(city))) +cur.executemany("INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)",city_country_unique) +city_index=execute_sql_statement("Select * from City_Table", conn) +city_indexes={} +for i in abc: + abc1[i[1]]=i[0] +city_table_values=execute_sql_statement("select * from City_Table ",conn) +city_ids={} +for i in city_table_values: + city_ids[i[1]]=i[0] +create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER, +FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''') +temp_table_values=[] +j=1 +for i in result: + j=j+1 + temp_table_values.append((i[0],i[1],city_ids[i[3]])) +cur.executemany("INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)",temp_table_values) +create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT, +FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''') +data_to_check_loc=result_data_for_loc_table +data_to_input_loc_table=[] +j=0 +for i in data_to_check_loc: + t=[0,0,0] + if i[1][-1]=='N': + t[1]=(float(i[1][:-1])) + if i[1][-1]=='S': + t[1]=float(i[1][:-1])*(-1) + if i[2][-1]=='E': + t[2]=float(i[2][:-1]) + if i[2][-1]=='W': + t[2]=float(i[2][:-1])*(-1) + t[0]=i[0] + tuple1=(city_ids[t[0]],t[1],t[2]) + data_to_input_loc_table.append(tuple1) +cur.executemany("INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)",data_to_input_loc_table) +conn.commit() diff --git a/__dbscripts__/dbcreation.ipynb b/__dbscripts__/dbcreation.ipynb new file mode 100644 index 0000000..d4f34d1 --- /dev/null +++ b/__dbscripts__/dbcreation.ipynb @@ -0,0 +1,460 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "660de5e8", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "\n", + "from IPython.display import display, HTML\n", + "import pandas as pd\n", + "import sqlite3\n", + "from sqlite3 import Error\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1dc168cb", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to create a connection to the database\n", + "\n", + "def create_connection(db_file, delete_db=False):\n", + " \n", + " if delete_db and os.path.exists(db_file):\n", + " os.remove(db_file)\n", + "\n", + " conn = None\n", + " try:\n", + " conn = sqlite3.connect(db_file)\n", + " conn.execute(\"PRAGMA foreign_keys = 1\")\n", + " except Error as e:\n", + " print(e)\n", + "\n", + " return conn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "910875f4", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to create a database\n", + "\n", + "def create_table(conn, create_table_sql):\n", + " try:\n", + " c = conn.cursor()\n", + " c.execute(create_table_sql)\n", + " except Error as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36907a1c", + "metadata": {}, + "outputs": [], + "source": [ + "# Execute SQL statement\n", + "\n", + "def execute_sql_statement(sql_statement, conn):\n", + " cur = conn.cursor()\n", + " cur.execute(sql_statement)\n", + "\n", + " rows = cur.fetchall()\n", + "\n", + " return rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0dfa189", + "metadata": {}, + "outputs": [], + "source": [ + "# Read the Non-Normalized Data\n", + "\n", + "df=pd.read_csv(\"..GlobalLandTemperaturesByCity.csv\")\n", + "data=df[df.dt>\"1949-12-31\"]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77341f9c", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract cities and their latitude and longitude\n", + "\n", + "data_for_loc_table=data[['City','Latitude','Longitude']]\n", + "data_for_loc_table=data_for_loc_table.to_records(index=False)\n", + "result_data_for_loc_table=list(data_for_loc_table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e635279c", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract Countries\n", + "\n", + "records=data.to_records(index=False)\n", + "result=list(records)\n", + "req_resut=[]\n", + "for i in result:\n", + " req_resut.append((i[4],))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74c13a5f", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the list of cities into a list with unique names\n", + "\n", + "req_resut=list((set(req_resut)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21eca337", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a connection to the database\n", + "conn = create_connection('database.db')\n", + "cur=conn.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af74cdef", + "metadata": {}, + "outputs": [], + "source": [ + "# Drop City Table and the Country Table if they exist and create a new Country Table and Insert the data\n", + "\n", + "execute_sql_statement(\"DROP TABLE IF EXISTS City_Table\",conn)\n", + "execute_sql_statement(\"DROP TABLE IF EXISTS Country_Table\",conn)\n", + "create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''')\n", + "cur.executemany(\"INSERT INTO Country_Table ('Country_Name') Values (?)\",req_resut)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59930452", + "metadata": {}, + "outputs": [], + "source": [ + "# Make a dictionary of the country data and their IDs\n", + "abc=execute_sql_statement(\"Select * from Country_Table\", conn)\n", + "abc1={}\n", + "for i in abc:\n", + " abc1[i[1]]=i[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c77691", + "metadata": {}, + "outputs": [], + "source": [ + "req_result_2=[]\n", + "for i in result:\n", + " req_result_2.append((i[3],abc1[i[4]]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5b64360", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the City Table\n", + "create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER,\n", + "FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1206d273", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract the data for City Table\n", + "city=[]\n", + "for i in result:\n", + " # j=j+1\n", + " city.append((i[3],abc1[i[4]]))\n", + "city_country_unique=list((set(city)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfca5230", + "metadata": {}, + "outputs": [], + "source": [ + "# Insert the data for City Table\n", + "cur.executemany(\"INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)\",city_country_unique)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9f71f49", + "metadata": {}, + "outputs": [], + "source": [ + "#df=execute_sql_statement(\"Select * from City_Table\", conn)\n", + "df=pd.read_sql_query(\"Select * from City_Table\", conn)\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3692d2ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Build a dictionary of the city data and their IDs\n", + "city_index=execute_sql_statement(\"Select * from City_Table\", conn)\n", + "city_indexes={}\n", + "for i in abc:\n", + " abc1[i[1]]=i[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dd4ef62", + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the country data\n", + "df1=pd.read_sql_query(\"Select * from Country_Table\", conn)\n", + "display(df1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c2a9868", + "metadata": {}, + "outputs": [], + "source": [ + "# Join the data from the City Table and the Country Table and display the data\n", + "\n", + "execute_sql_statement(\"select * from City_Table join Country_Table on Country_Table.PK = City_Table.Country_ID AND Country_Table.Country_Name='India' LIMIT 10\",conn)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3932f5a5", + "metadata": {}, + "outputs": [], + "source": [ + "# City IDs Dictionary\n", + "\n", + "city_table_values=execute_sql_statement(\"select * from City_Table \",conn)\n", + "city_ids={}\n", + "for i in city_table_values:\n", + " city_ids[i[1]]=i[0]\n", + "city_ids\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcd18ad9", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the Temperature Table\n", + "\n", + "create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER,\n", + "FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1a0d17e", + "metadata": {}, + "outputs": [], + "source": [ + "# Allocate the data to the Temperature Table and their corresponding City IDs\n", + "\n", + "temp_table_values=[]\n", + "j=1\n", + "for i in result:\n", + " j=j+1\n", + " temp_table_values.append((i[0],i[1],city_ids[i[3]]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb218911", + "metadata": {}, + "outputs": [], + "source": [ + "# Insert the data into the Temperature Table\n", + "\n", + "cur.executemany(\"INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)\",temp_table_values)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e34f625", + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the Temperature Table\n", + "\n", + "df1=pd.read_sql_query(\"Select * from Temperature\", conn)\n", + "display(df1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f08f8148", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the Location Table\n", + "\n", + "create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT,\n", + "FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee98ee19", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract the data for the Location Table, Database consists of N,W,E,S notation, we are going to convert them to the decimal notation\n", + "\n", + "#Lat= North is positive, South is negative\n", + "#Long= West is negative, East is positive\n", + "data_to_check_loc=result_data_for_loc_table\n", + "data_to_input_loc_table=[]\n", + "j=0\n", + "for i in data_to_check_loc:\n", + " t=[0,0,0]\n", + " if i[1][-1]=='N':\n", + " t[1]=(float(i[1][:-1]))\n", + " if i[1][-1]=='S':\n", + " t[1]=float(i[1][:-1])*(-1)\n", + " if i[2][-1]=='E':\n", + " t[2]=float(i[2][:-1]) \n", + " if i[2][-1]=='W':\n", + " t[2]=float(i[2][:-1])*(-1)\n", + " t[0]=i[0]\n", + " tuple1=(city_ids[t[0]],t[1],t[2])\n", + " data_to_input_loc_table.append(tuple1)\n", + "data_to_input_loc_table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc0fba0c", + "metadata": {}, + "outputs": [], + "source": [ + "# Insert the data into the Location Table\n", + "\n", + "cur.executemany(\"INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)\",data_to_input_loc_table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aead2755", + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the Location Table\n", + "\n", + "df1=pd.read_sql_query(\"Select * from Loc_Table \", conn)\n", + "display(df1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f67e8d5b", + "metadata": {}, + "outputs": [], + "source": [ + "# df1=pd.read_sql_query(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id group by Date LIMIT 10\", conn)\n", + "# display(df1)\n", + "#(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id LIMIT 10\",conn)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbc5ad63", + "metadata": {}, + "outputs": [], + "source": [ + "# Commit the changes\n", + "\n", + "conn.commit()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/app/main.py b/app/main.py deleted file mode 100644 index c2308f9..0000000 --- a/app/main.py +++ /dev/null @@ -1,32 +0,0 @@ -from flask import Flask, render_template -from flask import Markup -import sqlite3 as sql -import pandas as pd - -app = Flask(__name__) - - -def execute_sql_statement(sql_statement, conn): - cur = conn.cursor() - cur.execute(sql_statement) - rows = cur.fetchall() - return rows - -def get_list_of_dict(keys, list_of_tuples): - list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples] - return list_of_dict - -def get_cities_list(): - conn = sql.connect("database.db") - sql_statement = 'SELECT DISTINCT(City_Name) as City_Name, Lat, Long FROM City_table join Loc_Table on City_Table.City_Id = Loc_table.City_Id' - df=pd.read_sql_query(sql_statement, conn).to_records(index=False) - return df - -@app.route("/") -@app.route("/index") -def index(): - - cities_list = get_cities_list() - keys = ("city", "latitude", "longitude") - cities_list = get_list_of_dict(keys, cities_list) - return render_template("index.html", cities_list=cities_list) \ No newline at end of file diff --git a/arima_predictor.py b/arima_predictor.py deleted file mode 100644 index 1b38a0f..0000000 --- a/arima_predictor.py +++ /dev/null @@ -1,26 +0,0 @@ -import pandas as pd -import matplotlib.pyplot as plt -import pickle -from decompressor import decompress_arima - -decompress_arima() - -with open('arima.pkl', 'rb') as pkl: - n_periods = 30 - fc, confint = pickle.load(pkl).predict( - n_periods=n_periods, return_conf_int=True) - n_years = ['1960-12-02', '1960-12-03', '1960-12-04', '1960-12-05', '1960-12-06', '1960-12-07', '1960-12-08', '1960-12-09', '1960-12-10', '1960-12-11', '1960-12-12', '1960-12-13', '1960-12-14', '1960-12-15', '1960-12-16', - '1960-12-17', '1960-12-18', '1960-12-19', '1960-12-20', '1960-12-21', '1960-12-22', '1960-12-23', '1960-12-24', '1960-12-25', '1960-12-26', '1960-12-27', '1960-12-28', '1960-12-29', '1960-12-30', '1960-12-31'] - city_ids = ["1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031"] - fc_ind = pd.Series(n_years, city_ids) - - fc_series = pd.Series(fc, index=fc_ind) - lower_series = pd.Series(confint[:, 0], index=fc_ind) - upper_series = pd.Series(confint[:, 1], index=fc_ind) - plt.figure(figsize=(12, 5)) - plt.plot(fc_series, color="darkred") - plt.fill_between(lower_series.index, - lower_series, - upper_series, - color="k", alpha=.35) - plt.show() diff --git a/database_creation_script.py b/database_creation_script.py deleted file mode 100644 index bfc2ae5..0000000 --- a/database_creation_script.py +++ /dev/null @@ -1,107 +0,0 @@ -# Running this script automatically creates the database and normalizes the data. -# This is same as running the 'dbcreation.ipynb' notebook step by step. -# However their are some visualizations in the notebook but this script does not. -# This script is intentended to automate the whole process in a single script. -# This should run faster than the notebook. - -import pandas as pd -import sqlite3 -from sqlite3 import Error -import os - -def create_connection(db_file, delete_db=False): - - if delete_db and os.path.exists(db_file): - os.remove(db_file) - - conn = None - try: - conn = sqlite3.connect(db_file) - conn.execute("PRAGMA foreign_keys = 1") - except Error as e: - print(e) - - return conn - -def create_table(conn, create_table_sql): - try: - c = conn.cursor() - c.execute(create_table_sql) - except Error as e: - print(e) - -def execute_sql_statement(sql_statement, conn): - cur = conn.cursor() - cur.execute(sql_statement) - - rows = cur.fetchall() - - return rows - -df=pd.read_csv("GlobalLandTemperaturesByCity.csv") -data=df[df.dt>"1949-12-31"] -data_for_loc_table=data[['City','Latitude','Longitude']] -data_for_loc_table=data_for_loc_table.to_records(index=False) -result_data_for_loc_table=list(data_for_loc_table) -records=data.to_records(index=False) -result=list(records) -req_resut=[] -for i in result: - req_resut.append((i[4],)) -req_resut=list((set(req_resut))) -conn = create_connection('database.db') -cur=conn.cursor() -execute_sql_statement("DROP TABLE IF EXISTS City_Table",conn) -execute_sql_statement("DROP TABLE IF EXISTS Country_Table",conn) -create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''') -cur.executemany("INSERT INTO Country_Table ('Country_Name') Values (?)",req_resut) -abc=execute_sql_statement("Select * from Country_Table", conn) -abc1={} -for i in abc: - abc1[i[1]]=i[0] -req_result_2=[] -for i in result: - req_result_2.append((i[3],abc1[i[4]])) -create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER, -FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''') -city=[] -for i in result: - city.append((i[3],abc1[i[4]])) -city_country_unique=list((set(city))) -cur.executemany("INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)",city_country_unique) -city_index=execute_sql_statement("Select * from City_Table", conn) -city_indexes={} -for i in abc: - abc1[i[1]]=i[0] -city_table_values=execute_sql_statement("select * from City_Table ",conn) -city_ids={} -for i in city_table_values: - city_ids[i[1]]=i[0] -create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER, -FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''') -temp_table_values=[] -j=1 -for i in result: - j=j+1 - temp_table_values.append((i[0],i[1],city_ids[i[3]])) -cur.executemany("INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)",temp_table_values) -create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT, -FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''') -data_to_check_loc=result_data_for_loc_table -data_to_input_loc_table=[] -j=0 -for i in data_to_check_loc: - t=[0,0,0] - if i[1][-1]=='N': - t[1]=(float(i[1][:-1])) - if i[1][-1]=='S': - t[1]=float(i[1][:-1])*(-1) - if i[2][-1]=='E': - t[2]=float(i[2][:-1]) - if i[2][-1]=='W': - t[2]=float(i[2][:-1])*(-1) - t[0]=i[0] - tuple1=(city_ids[t[0]],t[1],t[2]) - data_to_input_loc_table.append(tuple1) -cur.executemany("INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)",data_to_input_loc_table) -conn.commit() diff --git a/dbcreation.ipynb b/dbcreation.ipynb deleted file mode 100644 index f847fab..0000000 --- a/dbcreation.ipynb +++ /dev/null @@ -1,460 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "660de5e8", - "metadata": {}, - "outputs": [], - "source": [ - "# Imports\n", - "\n", - "from IPython.display import display, HTML\n", - "import pandas as pd\n", - "import sqlite3\n", - "from sqlite3 import Error\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1dc168cb", - "metadata": {}, - "outputs": [], - "source": [ - "# Function to create a connection to the database\n", - "\n", - "def create_connection(db_file, delete_db=False):\n", - " \n", - " if delete_db and os.path.exists(db_file):\n", - " os.remove(db_file)\n", - "\n", - " conn = None\n", - " try:\n", - " conn = sqlite3.connect(db_file)\n", - " conn.execute(\"PRAGMA foreign_keys = 1\")\n", - " except Error as e:\n", - " print(e)\n", - "\n", - " return conn" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "910875f4", - "metadata": {}, - "outputs": [], - "source": [ - "# Function to create a database\n", - "\n", - "def create_table(conn, create_table_sql):\n", - " try:\n", - " c = conn.cursor()\n", - " c.execute(create_table_sql)\n", - " except Error as e:\n", - " print(e)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36907a1c", - "metadata": {}, - "outputs": [], - "source": [ - "# Execute SQL statement\n", - "\n", - "def execute_sql_statement(sql_statement, conn):\n", - " cur = conn.cursor()\n", - " cur.execute(sql_statement)\n", - "\n", - " rows = cur.fetchall()\n", - "\n", - " return rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0dfa189", - "metadata": {}, - "outputs": [], - "source": [ - "# Read the Non-Normalized Data\n", - "\n", - "df=pd.read_csv(\"GlobalLandTemperaturesByCity.csv\")\n", - "data=df[df.dt>\"1949-12-31\"]\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "77341f9c", - "metadata": {}, - "outputs": [], - "source": [ - "# Extract cities and their latitude and longitude\n", - "\n", - "data_for_loc_table=data[['City','Latitude','Longitude']]\n", - "data_for_loc_table=data_for_loc_table.to_records(index=False)\n", - "result_data_for_loc_table=list(data_for_loc_table)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e635279c", - "metadata": {}, - "outputs": [], - "source": [ - "# Extract Countries\n", - "\n", - "records=data.to_records(index=False)\n", - "result=list(records)\n", - "req_resut=[]\n", - "for i in result:\n", - " req_resut.append((i[4],))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "74c13a5f", - "metadata": {}, - "outputs": [], - "source": [ - "# Convert the list of cities into a list with unique names\n", - "\n", - "req_resut=list((set(req_resut)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21eca337", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a connection to the database\n", - "conn = create_connection('database.db')\n", - "cur=conn.cursor()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af74cdef", - "metadata": {}, - "outputs": [], - "source": [ - "# Drop City Table and the Country Table if they exist and create a new Country Table and Insert the data\n", - "\n", - "execute_sql_statement(\"DROP TABLE IF EXISTS City_Table\",conn)\n", - "execute_sql_statement(\"DROP TABLE IF EXISTS Country_Table\",conn)\n", - "create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''')\n", - "cur.executemany(\"INSERT INTO Country_Table ('Country_Name') Values (?)\",req_resut)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59930452", - "metadata": {}, - "outputs": [], - "source": [ - "# Make a dictionary of the country data and their IDs\n", - "abc=execute_sql_statement(\"Select * from Country_Table\", conn)\n", - "abc1={}\n", - "for i in abc:\n", - " abc1[i[1]]=i[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "52c77691", - "metadata": {}, - "outputs": [], - "source": [ - "req_result_2=[]\n", - "for i in result:\n", - " req_result_2.append((i[3],abc1[i[4]]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f5b64360", - "metadata": {}, - "outputs": [], - "source": [ - "# Create the City Table\n", - "create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER,\n", - "FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1206d273", - "metadata": {}, - "outputs": [], - "source": [ - "# Extract the data for City Table\n", - "city=[]\n", - "for i in result:\n", - " # j=j+1\n", - " city.append((i[3],abc1[i[4]]))\n", - "city_country_unique=list((set(city)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfca5230", - "metadata": {}, - "outputs": [], - "source": [ - "# Insert the data for City Table\n", - "cur.executemany(\"INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)\",city_country_unique)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9f71f49", - "metadata": {}, - "outputs": [], - "source": [ - "#df=execute_sql_statement(\"Select * from City_Table\", conn)\n", - "df=pd.read_sql_query(\"Select * from City_Table\", conn)\n", - "display(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3692d2ea", - "metadata": {}, - "outputs": [], - "source": [ - "# Build a dictionary of the city data and their IDs\n", - "city_index=execute_sql_statement(\"Select * from City_Table\", conn)\n", - "city_indexes={}\n", - "for i in abc:\n", - " abc1[i[1]]=i[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9dd4ef62", - "metadata": {}, - "outputs": [], - "source": [ - "# Displaying the country data\n", - "df1=pd.read_sql_query(\"Select * from Country_Table\", conn)\n", - "display(df1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4c2a9868", - "metadata": {}, - "outputs": [], - "source": [ - "# Join the data from the City Table and the Country Table and display the data\n", - "\n", - "execute_sql_statement(\"select * from City_Table join Country_Table on Country_Table.PK = City_Table.Country_ID AND Country_Table.Country_Name='India' LIMIT 10\",conn)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3932f5a5", - "metadata": {}, - "outputs": [], - "source": [ - "# City IDs Dictionary\n", - "\n", - "city_table_values=execute_sql_statement(\"select * from City_Table \",conn)\n", - "city_ids={}\n", - "for i in city_table_values:\n", - " city_ids[i[1]]=i[0]\n", - "city_ids\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fcd18ad9", - "metadata": {}, - "outputs": [], - "source": [ - "# Create the Temperature Table\n", - "\n", - "create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER,\n", - "FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e1a0d17e", - "metadata": {}, - "outputs": [], - "source": [ - "# Allocate the data to the Temperature Table and their corresponding City IDs\n", - "\n", - "temp_table_values=[]\n", - "j=1\n", - "for i in result:\n", - " j=j+1\n", - " temp_table_values.append((i[0],i[1],city_ids[i[3]]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb218911", - "metadata": {}, - "outputs": [], - "source": [ - "# Insert the data into the Temperature Table\n", - "\n", - "cur.executemany(\"INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)\",temp_table_values)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e34f625", - "metadata": {}, - "outputs": [], - "source": [ - "# Displaying the Temperature Table\n", - "\n", - "df1=pd.read_sql_query(\"Select * from Temperature\", conn)\n", - "display(df1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f08f8148", - "metadata": {}, - "outputs": [], - "source": [ - "# Create the Location Table\n", - "\n", - "create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT,\n", - "FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ee98ee19", - "metadata": {}, - "outputs": [], - "source": [ - "# Extract the data for the Location Table, Database consists of N,W,E,S notation, we are going to convert them to the decimal notation\n", - "\n", - "#Lat= North is positive, South is negative\n", - "#Long= West is negative, East is positive\n", - "data_to_check_loc=result_data_for_loc_table\n", - "data_to_input_loc_table=[]\n", - "j=0\n", - "for i in data_to_check_loc:\n", - " t=[0,0,0]\n", - " if i[1][-1]=='N':\n", - " t[1]=(float(i[1][:-1]))\n", - " if i[1][-1]=='S':\n", - " t[1]=float(i[1][:-1])*(-1)\n", - " if i[2][-1]=='E':\n", - " t[2]=float(i[2][:-1]) \n", - " if i[2][-1]=='W':\n", - " t[2]=float(i[2][:-1])*(-1)\n", - " t[0]=i[0]\n", - " tuple1=(city_ids[t[0]],t[1],t[2])\n", - " data_to_input_loc_table.append(tuple1)\n", - "data_to_input_loc_table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc0fba0c", - "metadata": {}, - "outputs": [], - "source": [ - "# Insert the data into the Location Table\n", - "\n", - "cur.executemany(\"INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)\",data_to_input_loc_table)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aead2755", - "metadata": {}, - "outputs": [], - "source": [ - "# Displaying the Location Table\n", - "\n", - "df1=pd.read_sql_query(\"Select * from Loc_Table \", conn)\n", - "display(df1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f67e8d5b", - "metadata": {}, - "outputs": [], - "source": [ - "# df1=pd.read_sql_query(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id group by Date LIMIT 10\", conn)\n", - "# display(df1)\n", - "#(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id LIMIT 10\",conn)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cbc5ad63", - "metadata": {}, - "outputs": [], - "source": [ - "# Commit the changes\n", - "\n", - "conn.commit()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/decompressor.py b/decompressor.py deleted file mode 100644 index c54dec9..0000000 --- a/decompressor.py +++ /dev/null @@ -1,15 +0,0 @@ -from os.path import exists -import zlib - -def decompress_arima(): - if not exists('arima.pkl'): - if not exists('arima.compressed'): - raise FileNotFoundError('arima.compressed not found') - else: - print('Decompressing arima.compressed') - with open('arima.compressed', 'rb') as f: - data = zlib.decompress(f.read()) - with open('arima.pkl', 'wb') as f: - f.write(data) - -decompress_arima() \ No newline at end of file diff --git a/functions/array_functions.py b/functions/array_functions.py new file mode 100644 index 0000000..be15abd --- /dev/null +++ b/functions/array_functions.py @@ -0,0 +1,3 @@ +def get_list_of_dict(keys, list_of_tuples): + list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples] + return list_of_dict diff --git a/functions/sql_functions.py b/functions/sql_functions.py new file mode 100644 index 0000000..5cc2ae9 --- /dev/null +++ b/functions/sql_functions.py @@ -0,0 +1,16 @@ +import sqlite3 as sql +import pandas as pd + + +def execute_sql_statement(sql_statement): + conn = sql.connect("database.db") + cur = conn.cursor() + cur.execute(sql_statement) + rows = cur.fetchall() + return rows + + +def run_sql_pandas(sql_statement): + conn = sql.connect("database.db") + df = pd.read_sql_query(sql_statement, conn).to_records(index=False) + return df diff --git a/initial_setup.py b/initial_setup.py new file mode 100644 index 0000000..0384a70 --- /dev/null +++ b/initial_setup.py @@ -0,0 +1,23 @@ +import sys +import requests + + +def download(url, filename): + with open(filename, 'wb') as f: + response = requests.get(url, stream=True) + total = response.headers.get('content-length') + + if total is None: + f.write(response.content) + else: + downloaded = 0 + total = int(total) + for data in response.iter_content(chunk_size=max(int(total/1000), 1024*1024)): + downloaded += len(data) + f.write(data) + done = int(50*downloaded/total) + sys.stdout.write('\r[{}{}]'.format('█' * done, '.' * (50-done))) + sys.stdout.flush() + sys.stdout.write('\n') + +print('[*] Downloading test file of size 100 MB...') \ No newline at end of file diff --git a/libs/decompressor.py b/libs/decompressor.py new file mode 100644 index 0000000..c54dec9 --- /dev/null +++ b/libs/decompressor.py @@ -0,0 +1,15 @@ +from os.path import exists +import zlib + +def decompress_arima(): + if not exists('arima.pkl'): + if not exists('arima.compressed'): + raise FileNotFoundError('arima.compressed not found') + else: + print('Decompressing arima.compressed') + with open('arima.compressed', 'rb') as f: + data = zlib.decompress(f.read()) + with open('arima.pkl', 'wb') as f: + f.write(data) + +decompress_arima() \ No newline at end of file diff --git a/libs/zipper.py b/libs/zipper.py new file mode 100644 index 0000000..7e5b476 --- /dev/null +++ b/libs/zipper.py @@ -0,0 +1,14 @@ +import zlib +from os.path import exists +from os import remove + + +filename_in = "arima.pkl" +filename_out = "arima.compressed" +if exists(filename_out): + remove(filename_out) +with open(filename_in, mode="rb") as fin, open(filename_out, mode="wb") as fout: + data = fin.read() + print("Compressing Pickle File...") + compressed_data = zlib.compress(data, zlib.Z_BEST_COMPRESSION) + fout.write(compressed_data) \ No newline at end of file diff --git a/model_builder.py b/model_builder.py new file mode 100644 index 0000000..b12897a --- /dev/null +++ b/model_builder.py @@ -0,0 +1,27 @@ +import pandas as pd +from functions.sql_functions import execute_sql_statement +import pmdarima as pm +import pickle + +sql_stmt = "select date, city_id, cast(avg_temperature as real) as temp from temperature where date is not null and temp is not null" + +result = execute_sql_statement(sql_stmt) +data = pd.DataFrame(result, columns=["date", "city_id", "temp"]) +data.set_index(["date", "city_id"], inplace=True) +ts_model = pm.auto_arima(data.temp, start_p=1, start_q=1, + test='adf', + max_p=3, max_q=3, + m=5, + d=None, + seasonal=False, + start_P=0, + D=0, + trace=True, + error_action='ignore', + suppress_warnings=True, + stepwise=True) +# Best model: ARIMA(3,0,3)(0,0,0)[0] + +with open('arima.pkl', 'wb') as pkl: + pickle.dump(ts_model, pkl) + diff --git a/predictor.py b/predictor.py new file mode 100644 index 0000000..18a1708 --- /dev/null +++ b/predictor.py @@ -0,0 +1,26 @@ +import pandas as pd +import matplotlib.pyplot as plt +import pickle +from libs.decompressor import decompress_arima + +decompress_arima() + +with open('arima.pkl', 'rb') as pkl: + n_periods = 30 + fc, confint = pickle.load(pkl).predict( + n_periods=n_periods, return_conf_int=True) + n_years = ['1960-12-02', '1960-12-03', '1960-12-04', '1960-12-05', '1960-12-06', '1960-12-07', '1960-12-08', '1960-12-09', '1960-12-10', '1960-12-11', '1960-12-12', '1960-12-13', '1960-12-14', '1960-12-15', '1960-12-16', + '1960-12-17', '1960-12-18', '1960-12-19', '1960-12-20', '1960-12-21', '1960-12-22', '1960-12-23', '1960-12-24', '1960-12-25', '1960-12-26', '1960-12-27', '1960-12-28', '1960-12-29', '1960-12-30', '1960-12-31'] + city_ids = ["1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031", "1031"] + fc_ind = pd.Series(n_years, city_ids) + + fc_series = pd.Series(fc, index=fc_ind) + lower_series = pd.Series(confint[:, 0], index=fc_ind) + upper_series = pd.Series(confint[:, 1], index=fc_ind) + plt.figure(figsize=(12, 5)) + plt.plot(fc_series, color="darkred") + plt.fill_between(lower_series.index, + lower_series, + upper_series, + color="k", alpha=.35) + plt.show() diff --git a/requirements.txt b/requirements.txt index dd9f47b..e0a8db6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ flask pandas +pmdarima + diff --git a/sql_functions.py b/sql_functions.py deleted file mode 100644 index 54d1d05..0000000 --- a/sql_functions.py +++ /dev/null @@ -1,21 +0,0 @@ -import sqlite3 as sql -import pandas as pd - - -def execute_sql_statement(sql_statement): - conn = sql.connect("database.db") - cur = conn.cursor() - cur.execute(sql_statement) - rows = cur.fetchall() - return rows - - -def run_sql_pandas(sql_statement): - conn = sql.connect("database.db") - df = pd.read_sql_query(sql_statement, conn).to_records(index=False) - return df - - -def get_list_of_dict(keys, list_of_tuples): - list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples] - return list_of_dict diff --git a/time_series_model.py b/time_series_model.py deleted file mode 100644 index 769fa11..0000000 --- a/time_series_model.py +++ /dev/null @@ -1,30 +0,0 @@ -import pandas as pd -from sql_functions import execute_sql_statement -import pmdarima as pm -import pickle -import zlib -from os.path import exists -from os import remove - -sql_stmt = "select date, city_id, cast(avg_temperature as real) as temp from temperature where date is not null and temp is not null" - -result = execute_sql_statement(sql_stmt) -data = pd.DataFrame(result, columns=["date", "city_id", "temp"]) -data.set_index(["date", "city_id"], inplace=True) -ts_model = pm.auto_arima(data.temp, start_p=1, start_q=1, - test='adf', - max_p=3, max_q=3, - m=5, - d=None, - seasonal=False, - start_P=0, - D=0, - trace=True, - error_action='ignore', - suppress_warnings=True, - stepwise=True) -# Best model: ARIMA(3,0,3)(0,0,0)[0] - -with open('arima.pkl', 'wb') as pkl: - pickle.dump(ts_model, pkl) - diff --git a/zipper.py b/zipper.py deleted file mode 100644 index 7e5b476..0000000 --- a/zipper.py +++ /dev/null @@ -1,14 +0,0 @@ -import zlib -from os.path import exists -from os import remove - - -filename_in = "arima.pkl" -filename_out = "arima.compressed" -if exists(filename_out): - remove(filename_out) -with open(filename_in, mode="rb") as fin, open(filename_out, mode="wb") as fout: - data = fin.read() - print("Compressing Pickle File...") - compressed_data = zlib.compress(data, zlib.Z_BEST_COMPRESSION) - fout.write(compressed_data) \ No newline at end of file -- cgit v1.2.3