diff options
| author | Priyansh <[email protected]> | 2021-12-09 03:01:49 -0500 |
|---|---|---|
| committer | Priyansh <[email protected]> | 2021-12-09 03:01:49 -0500 |
| commit | 3b48c04470e2c368ba18716bb789b40942d6989f (patch) | |
| tree | 1cb97d5271f2233fbf9d0ef864b91fc8a61efb17 /__dbscripts__ | |
| parent | 22e1d761027501bfa59b92776cf1c13eef3a0004 (diff) | |
| download | temp_pred_arima-3b48c04470e2c368ba18716bb789b40942d6989f.tar.xz temp_pred_arima-3b48c04470e2c368ba18716bb789b40942d6989f.zip | |
changed project structure
Diffstat (limited to '__dbscripts__')
| -rw-r--r-- | __dbscripts__/database_creation_script.py | 107 | ||||
| -rw-r--r-- | __dbscripts__/dbcreation.ipynb | 460 |
2 files changed, 567 insertions, 0 deletions
diff --git a/__dbscripts__/database_creation_script.py b/__dbscripts__/database_creation_script.py new file mode 100644 index 0000000..c0e9252 --- /dev/null +++ b/__dbscripts__/database_creation_script.py @@ -0,0 +1,107 @@ +# Running this script automatically creates the database and normalizes the data. +# This is same as running the 'dbcreation.ipynb' notebook step by step. +# However their are some visualizations in the notebook but this script does not. +# This script is intentended to automate the whole process in a single script. +# This should run faster than the notebook. + +import pandas as pd +import sqlite3 +from sqlite3 import Error +import os + +def create_connection(db_file, delete_db=False): + + if delete_db and os.path.exists(db_file): + os.remove(db_file) + + conn = None + try: + conn = sqlite3.connect(db_file) + conn.execute("PRAGMA foreign_keys = 1") + except Error as e: + print(e) + + return conn + +def create_table(conn, create_table_sql): + try: + c = conn.cursor() + c.execute(create_table_sql) + except Error as e: + print(e) + +def execute_sql_statement(sql_statement, conn): + cur = conn.cursor() + cur.execute(sql_statement) + + rows = cur.fetchall() + + return rows + +df=pd.read_csv("..GlobalLandTemperaturesByCity.csv") +data=df[df.dt>"1949-12-31"] +data_for_loc_table=data[['City','Latitude','Longitude']] +data_for_loc_table=data_for_loc_table.to_records(index=False) +result_data_for_loc_table=list(data_for_loc_table) +records=data.to_records(index=False) +result=list(records) +req_resut=[] +for i in result: + req_resut.append((i[4],)) +req_resut=list((set(req_resut))) +conn = create_connection('database.db') +cur=conn.cursor() +execute_sql_statement("DROP TABLE IF EXISTS City_Table",conn) +execute_sql_statement("DROP TABLE IF EXISTS Country_Table",conn) +create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''') +cur.executemany("INSERT INTO Country_Table ('Country_Name') Values (?)",req_resut) +abc=execute_sql_statement("Select * from Country_Table", conn) +abc1={} +for i in abc: + abc1[i[1]]=i[0] +req_result_2=[] +for i in result: + req_result_2.append((i[3],abc1[i[4]])) +create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER, +FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''') +city=[] +for i in result: + city.append((i[3],abc1[i[4]])) +city_country_unique=list((set(city))) +cur.executemany("INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)",city_country_unique) +city_index=execute_sql_statement("Select * from City_Table", conn) +city_indexes={} +for i in abc: + abc1[i[1]]=i[0] +city_table_values=execute_sql_statement("select * from City_Table ",conn) +city_ids={} +for i in city_table_values: + city_ids[i[1]]=i[0] +create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER, +FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''') +temp_table_values=[] +j=1 +for i in result: + j=j+1 + temp_table_values.append((i[0],i[1],city_ids[i[3]])) +cur.executemany("INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)",temp_table_values) +create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT, +FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''') +data_to_check_loc=result_data_for_loc_table +data_to_input_loc_table=[] +j=0 +for i in data_to_check_loc: + t=[0,0,0] + if i[1][-1]=='N': + t[1]=(float(i[1][:-1])) + if i[1][-1]=='S': + t[1]=float(i[1][:-1])*(-1) + if i[2][-1]=='E': + t[2]=float(i[2][:-1]) + if i[2][-1]=='W': + t[2]=float(i[2][:-1])*(-1) + t[0]=i[0] + tuple1=(city_ids[t[0]],t[1],t[2]) + data_to_input_loc_table.append(tuple1) +cur.executemany("INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)",data_to_input_loc_table) +conn.commit() diff --git a/__dbscripts__/dbcreation.ipynb b/__dbscripts__/dbcreation.ipynb new file mode 100644 index 0000000..d4f34d1 --- /dev/null +++ b/__dbscripts__/dbcreation.ipynb @@ -0,0 +1,460 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "660de5e8", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "\n", + "from IPython.display import display, HTML\n", + "import pandas as pd\n", + "import sqlite3\n", + "from sqlite3 import Error\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1dc168cb", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to create a connection to the database\n", + "\n", + "def create_connection(db_file, delete_db=False):\n", + " \n", + " if delete_db and os.path.exists(db_file):\n", + " os.remove(db_file)\n", + "\n", + " conn = None\n", + " try:\n", + " conn = sqlite3.connect(db_file)\n", + " conn.execute(\"PRAGMA foreign_keys = 1\")\n", + " except Error as e:\n", + " print(e)\n", + "\n", + " return conn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "910875f4", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to create a database\n", + "\n", + "def create_table(conn, create_table_sql):\n", + " try:\n", + " c = conn.cursor()\n", + " c.execute(create_table_sql)\n", + " except Error as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36907a1c", + "metadata": {}, + "outputs": [], + "source": [ + "# Execute SQL statement\n", + "\n", + "def execute_sql_statement(sql_statement, conn):\n", + " cur = conn.cursor()\n", + " cur.execute(sql_statement)\n", + "\n", + " rows = cur.fetchall()\n", + "\n", + " return rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0dfa189", + "metadata": {}, + "outputs": [], + "source": [ + "# Read the Non-Normalized Data\n", + "\n", + "df=pd.read_csv(\"..GlobalLandTemperaturesByCity.csv\")\n", + "data=df[df.dt>\"1949-12-31\"]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77341f9c", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract cities and their latitude and longitude\n", + "\n", + "data_for_loc_table=data[['City','Latitude','Longitude']]\n", + "data_for_loc_table=data_for_loc_table.to_records(index=False)\n", + "result_data_for_loc_table=list(data_for_loc_table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e635279c", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract Countries\n", + "\n", + "records=data.to_records(index=False)\n", + "result=list(records)\n", + "req_resut=[]\n", + "for i in result:\n", + " req_resut.append((i[4],))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74c13a5f", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the list of cities into a list with unique names\n", + "\n", + "req_resut=list((set(req_resut)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21eca337", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a connection to the database\n", + "conn = create_connection('database.db')\n", + "cur=conn.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af74cdef", + "metadata": {}, + "outputs": [], + "source": [ + "# Drop City Table and the Country Table if they exist and create a new Country Table and Insert the data\n", + "\n", + "execute_sql_statement(\"DROP TABLE IF EXISTS City_Table\",conn)\n", + "execute_sql_statement(\"DROP TABLE IF EXISTS Country_Table\",conn)\n", + "create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''')\n", + "cur.executemany(\"INSERT INTO Country_Table ('Country_Name') Values (?)\",req_resut)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59930452", + "metadata": {}, + "outputs": [], + "source": [ + "# Make a dictionary of the country data and their IDs\n", + "abc=execute_sql_statement(\"Select * from Country_Table\", conn)\n", + "abc1={}\n", + "for i in abc:\n", + " abc1[i[1]]=i[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c77691", + "metadata": {}, + "outputs": [], + "source": [ + "req_result_2=[]\n", + "for i in result:\n", + " req_result_2.append((i[3],abc1[i[4]]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5b64360", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the City Table\n", + "create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER,\n", + "FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1206d273", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract the data for City Table\n", + "city=[]\n", + "for i in result:\n", + " # j=j+1\n", + " city.append((i[3],abc1[i[4]]))\n", + "city_country_unique=list((set(city)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfca5230", + "metadata": {}, + "outputs": [], + "source": [ + "# Insert the data for City Table\n", + "cur.executemany(\"INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)\",city_country_unique)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9f71f49", + "metadata": {}, + "outputs": [], + "source": [ + "#df=execute_sql_statement(\"Select * from City_Table\", conn)\n", + "df=pd.read_sql_query(\"Select * from City_Table\", conn)\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3692d2ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Build a dictionary of the city data and their IDs\n", + "city_index=execute_sql_statement(\"Select * from City_Table\", conn)\n", + "city_indexes={}\n", + "for i in abc:\n", + " abc1[i[1]]=i[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dd4ef62", + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the country data\n", + "df1=pd.read_sql_query(\"Select * from Country_Table\", conn)\n", + "display(df1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c2a9868", + "metadata": {}, + "outputs": [], + "source": [ + "# Join the data from the City Table and the Country Table and display the data\n", + "\n", + "execute_sql_statement(\"select * from City_Table join Country_Table on Country_Table.PK = City_Table.Country_ID AND Country_Table.Country_Name='India' LIMIT 10\",conn)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3932f5a5", + "metadata": {}, + "outputs": [], + "source": [ + "# City IDs Dictionary\n", + "\n", + "city_table_values=execute_sql_statement(\"select * from City_Table \",conn)\n", + "city_ids={}\n", + "for i in city_table_values:\n", + " city_ids[i[1]]=i[0]\n", + "city_ids\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcd18ad9", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the Temperature Table\n", + "\n", + "create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER,\n", + "FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1a0d17e", + "metadata": {}, + "outputs": [], + "source": [ + "# Allocate the data to the Temperature Table and their corresponding City IDs\n", + "\n", + "temp_table_values=[]\n", + "j=1\n", + "for i in result:\n", + " j=j+1\n", + " temp_table_values.append((i[0],i[1],city_ids[i[3]]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb218911", + "metadata": {}, + "outputs": [], + "source": [ + "# Insert the data into the Temperature Table\n", + "\n", + "cur.executemany(\"INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)\",temp_table_values)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e34f625", + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the Temperature Table\n", + "\n", + "df1=pd.read_sql_query(\"Select * from Temperature\", conn)\n", + "display(df1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f08f8148", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the Location Table\n", + "\n", + "create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT,\n", + "FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee98ee19", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract the data for the Location Table, Database consists of N,W,E,S notation, we are going to convert them to the decimal notation\n", + "\n", + "#Lat= North is positive, South is negative\n", + "#Long= West is negative, East is positive\n", + "data_to_check_loc=result_data_for_loc_table\n", + "data_to_input_loc_table=[]\n", + "j=0\n", + "for i in data_to_check_loc:\n", + " t=[0,0,0]\n", + " if i[1][-1]=='N':\n", + " t[1]=(float(i[1][:-1]))\n", + " if i[1][-1]=='S':\n", + " t[1]=float(i[1][:-1])*(-1)\n", + " if i[2][-1]=='E':\n", + " t[2]=float(i[2][:-1]) \n", + " if i[2][-1]=='W':\n", + " t[2]=float(i[2][:-1])*(-1)\n", + " t[0]=i[0]\n", + " tuple1=(city_ids[t[0]],t[1],t[2])\n", + " data_to_input_loc_table.append(tuple1)\n", + "data_to_input_loc_table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc0fba0c", + "metadata": {}, + "outputs": [], + "source": [ + "# Insert the data into the Location Table\n", + "\n", + "cur.executemany(\"INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)\",data_to_input_loc_table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aead2755", + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the Location Table\n", + "\n", + "df1=pd.read_sql_query(\"Select * from Loc_Table \", conn)\n", + "display(df1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f67e8d5b", + "metadata": {}, + "outputs": [], + "source": [ + "# df1=pd.read_sql_query(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id group by Date LIMIT 10\", conn)\n", + "# display(df1)\n", + "#(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id LIMIT 10\",conn)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbc5ad63", + "metadata": {}, + "outputs": [], + "source": [ + "# Commit the changes\n", + "\n", + "conn.commit()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} |
