diff options
Diffstat (limited to 'database_creation_script.py')
| -rw-r--r-- | database_creation_script.py | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/database_creation_script.py b/database_creation_script.py new file mode 100644 index 0000000..bfc2ae5 --- /dev/null +++ b/database_creation_script.py @@ -0,0 +1,107 @@ +# Running this script automatically creates the database and normalizes the data. +# This is same as running the 'dbcreation.ipynb' notebook step by step. +# However their are some visualizations in the notebook but this script does not. +# This script is intentended to automate the whole process in a single script. +# This should run faster than the notebook. + +import pandas as pd +import sqlite3 +from sqlite3 import Error +import os + +def create_connection(db_file, delete_db=False): + + if delete_db and os.path.exists(db_file): + os.remove(db_file) + + conn = None + try: + conn = sqlite3.connect(db_file) + conn.execute("PRAGMA foreign_keys = 1") + except Error as e: + print(e) + + return conn + +def create_table(conn, create_table_sql): + try: + c = conn.cursor() + c.execute(create_table_sql) + except Error as e: + print(e) + +def execute_sql_statement(sql_statement, conn): + cur = conn.cursor() + cur.execute(sql_statement) + + rows = cur.fetchall() + + return rows + +df=pd.read_csv("GlobalLandTemperaturesByCity.csv") +data=df[df.dt>"1949-12-31"] +data_for_loc_table=data[['City','Latitude','Longitude']] +data_for_loc_table=data_for_loc_table.to_records(index=False) +result_data_for_loc_table=list(data_for_loc_table) +records=data.to_records(index=False) +result=list(records) +req_resut=[] +for i in result: + req_resut.append((i[4],)) +req_resut=list((set(req_resut))) +conn = create_connection('database.db') +cur=conn.cursor() +execute_sql_statement("DROP TABLE IF EXISTS City_Table",conn) +execute_sql_statement("DROP TABLE IF EXISTS Country_Table",conn) +create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''') +cur.executemany("INSERT INTO Country_Table ('Country_Name') Values (?)",req_resut) +abc=execute_sql_statement("Select * from Country_Table", conn) +abc1={} +for i in abc: + abc1[i[1]]=i[0] +req_result_2=[] +for i in result: + req_result_2.append((i[3],abc1[i[4]])) +create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER, +FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''') +city=[] +for i in result: + city.append((i[3],abc1[i[4]])) +city_country_unique=list((set(city))) +cur.executemany("INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)",city_country_unique) +city_index=execute_sql_statement("Select * from City_Table", conn) +city_indexes={} +for i in abc: + abc1[i[1]]=i[0] +city_table_values=execute_sql_statement("select * from City_Table ",conn) +city_ids={} +for i in city_table_values: + city_ids[i[1]]=i[0] +create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER, +FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''') +temp_table_values=[] +j=1 +for i in result: + j=j+1 + temp_table_values.append((i[0],i[1],city_ids[i[3]])) +cur.executemany("INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)",temp_table_values) +create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT, +FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''') +data_to_check_loc=result_data_for_loc_table +data_to_input_loc_table=[] +j=0 +for i in data_to_check_loc: + t=[0,0,0] + if i[1][-1]=='N': + t[1]=(float(i[1][:-1])) + if i[1][-1]=='S': + t[1]=float(i[1][:-1])*(-1) + if i[2][-1]=='E': + t[2]=float(i[2][:-1]) + if i[2][-1]=='W': + t[2]=float(i[2][:-1])*(-1) + t[0]=i[0] + tuple1=(city_ids[t[0]],t[1],t[2]) + data_to_input_loc_table.append(tuple1) +cur.executemany("INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)",data_to_input_loc_table) +conn.commit() |
