{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "660de5e8", "metadata": {}, "outputs": [], "source": [ "# Imports\n", "\n", "from IPython.display import display, HTML\n", "import pandas as pd\n", "import sqlite3\n", "from sqlite3 import Error\n", "import os" ] }, { "cell_type": "code", "execution_count": null, "id": "1dc168cb", "metadata": {}, "outputs": [], "source": [ "# Function to create a connection to the database\n", "\n", "def create_connection(db_file, delete_db=False):\n", " \n", " if delete_db and os.path.exists(db_file):\n", " os.remove(db_file)\n", "\n", " conn = None\n", " try:\n", " conn = sqlite3.connect(db_file)\n", " conn.execute(\"PRAGMA foreign_keys = 1\")\n", " except Error as e:\n", " print(e)\n", "\n", " return conn" ] }, { "cell_type": "code", "execution_count": null, "id": "910875f4", "metadata": {}, "outputs": [], "source": [ "# Function to create a database\n", "\n", "def create_table(conn, create_table_sql):\n", " try:\n", " c = conn.cursor()\n", " c.execute(create_table_sql)\n", " except Error as e:\n", " print(e)" ] }, { "cell_type": "code", "execution_count": null, "id": "36907a1c", "metadata": {}, "outputs": [], "source": [ "# Execute SQL statement\n", "\n", "def execute_sql_statement(sql_statement, conn):\n", " cur = conn.cursor()\n", " cur.execute(sql_statement)\n", "\n", " rows = cur.fetchall()\n", "\n", " return rows" ] }, { "cell_type": "code", "execution_count": null, "id": "a0dfa189", "metadata": {}, "outputs": [], "source": [ "# Read the Non-Normalized Data\n", "\n", "df=pd.read_csv(\"..GlobalLandTemperaturesByCity.csv\")\n", "data=df[df.dt>\"1949-12-31\"]\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "77341f9c", "metadata": {}, "outputs": [], "source": [ "# Extract cities and their latitude and longitude\n", "\n", "data_for_loc_table=data[['City','Latitude','Longitude']]\n", "data_for_loc_table=data_for_loc_table.to_records(index=False)\n", "result_data_for_loc_table=list(data_for_loc_table)" ] }, { "cell_type": "code", "execution_count": null, "id": "e635279c", "metadata": {}, "outputs": [], "source": [ "# Extract Countries\n", "\n", "records=data.to_records(index=False)\n", "result=list(records)\n", "req_resut=[]\n", "for i in result:\n", " req_resut.append((i[4],))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "74c13a5f", "metadata": {}, "outputs": [], "source": [ "# Convert the list of cities into a list with unique names\n", "\n", "req_resut=list((set(req_resut)))" ] }, { "cell_type": "code", "execution_count": null, "id": "21eca337", "metadata": {}, "outputs": [], "source": [ "# Create a connection to the database\n", "conn = create_connection('database.db')\n", "cur=conn.cursor()" ] }, { "cell_type": "code", "execution_count": null, "id": "af74cdef", "metadata": {}, "outputs": [], "source": [ "# Drop City Table and the Country Table if they exist and create a new Country Table and Insert the data\n", "\n", "execute_sql_statement(\"DROP TABLE IF EXISTS City_Table\",conn)\n", "execute_sql_statement(\"DROP TABLE IF EXISTS Country_Table\",conn)\n", "create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''')\n", "cur.executemany(\"INSERT INTO Country_Table ('Country_Name') Values (?)\",req_resut)" ] }, { "cell_type": "code", "execution_count": null, "id": "59930452", "metadata": {}, "outputs": [], "source": [ "# Make a dictionary of the country data and their IDs\n", "abc=execute_sql_statement(\"Select * from Country_Table\", conn)\n", "abc1={}\n", "for i in abc:\n", " abc1[i[1]]=i[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "52c77691", "metadata": {}, "outputs": [], "source": [ "req_result_2=[]\n", "for i in result:\n", " req_result_2.append((i[3],abc1[i[4]]))" ] }, { "cell_type": "code", "execution_count": null, "id": "f5b64360", "metadata": {}, "outputs": [], "source": [ "# Create the City Table\n", "create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER,\n", "FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''')" ] }, { "cell_type": "code", "execution_count": null, "id": "1206d273", "metadata": {}, "outputs": [], "source": [ "# Extract the data for City Table\n", "city=[]\n", "for i in result:\n", " # j=j+1\n", " city.append((i[3],abc1[i[4]]))\n", "city_country_unique=list((set(city)))" ] }, { "cell_type": "code", "execution_count": null, "id": "cfca5230", "metadata": {}, "outputs": [], "source": [ "# Insert the data for City Table\n", "cur.executemany(\"INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)\",city_country_unique)" ] }, { "cell_type": "code", "execution_count": null, "id": "d9f71f49", "metadata": {}, "outputs": [], "source": [ "#df=execute_sql_statement(\"Select * from City_Table\", conn)\n", "df=pd.read_sql_query(\"Select * from City_Table\", conn)\n", "display(df)" ] }, { "cell_type": "code", "execution_count": null, "id": "3692d2ea", "metadata": {}, "outputs": [], "source": [ "# Build a dictionary of the city data and their IDs\n", "city_index=execute_sql_statement(\"Select * from City_Table\", conn)\n", "city_indexes={}\n", "for i in abc:\n", " abc1[i[1]]=i[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "9dd4ef62", "metadata": {}, "outputs": [], "source": [ "# Displaying the country data\n", "df1=pd.read_sql_query(\"Select * from Country_Table\", conn)\n", "display(df1)" ] }, { "cell_type": "code", "execution_count": null, "id": "4c2a9868", "metadata": {}, "outputs": [], "source": [ "# Join the data from the City Table and the Country Table and display the data\n", "\n", "execute_sql_statement(\"select * from City_Table join Country_Table on Country_Table.PK = City_Table.Country_ID AND Country_Table.Country_Name='India' LIMIT 10\",conn)" ] }, { "cell_type": "code", "execution_count": null, "id": "3932f5a5", "metadata": {}, "outputs": [], "source": [ "# City IDs Dictionary\n", "\n", "city_table_values=execute_sql_statement(\"select * from City_Table \",conn)\n", "city_ids={}\n", "for i in city_table_values:\n", " city_ids[i[1]]=i[0]\n", "city_ids\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "fcd18ad9", "metadata": {}, "outputs": [], "source": [ "# Create the Temperature Table\n", "\n", "create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER,\n", "FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''')" ] }, { "cell_type": "code", "execution_count": null, "id": "e1a0d17e", "metadata": {}, "outputs": [], "source": [ "# Allocate the data to the Temperature Table and their corresponding City IDs\n", "\n", "temp_table_values=[]\n", "j=1\n", "for i in result:\n", " j=j+1\n", " temp_table_values.append((i[0],i[1],city_ids[i[3]]))" ] }, { "cell_type": "code", "execution_count": null, "id": "fb218911", "metadata": {}, "outputs": [], "source": [ "# Insert the data into the Temperature Table\n", "\n", "cur.executemany(\"INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)\",temp_table_values)" ] }, { "cell_type": "code", "execution_count": null, "id": "8e34f625", "metadata": {}, "outputs": [], "source": [ "# Displaying the Temperature Table\n", "\n", "df1=pd.read_sql_query(\"Select * from Temperature\", conn)\n", "display(df1)" ] }, { "cell_type": "code", "execution_count": null, "id": "f08f8148", "metadata": {}, "outputs": [], "source": [ "# Create the Location Table\n", "\n", "create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT,\n", "FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''')" ] }, { "cell_type": "code", "execution_count": null, "id": "ee98ee19", "metadata": {}, "outputs": [], "source": [ "# Extract the data for the Location Table, Database consists of N,W,E,S notation, we are going to convert them to the decimal notation\n", "\n", "#Lat= North is positive, South is negative\n", "#Long= West is negative, East is positive\n", "data_to_check_loc=result_data_for_loc_table\n", "data_to_input_loc_table=[]\n", "j=0\n", "for i in data_to_check_loc:\n", " t=[0,0,0]\n", " if i[1][-1]=='N':\n", " t[1]=(float(i[1][:-1]))\n", " if i[1][-1]=='S':\n", " t[1]=float(i[1][:-1])*(-1)\n", " if i[2][-1]=='E':\n", " t[2]=float(i[2][:-1]) \n", " if i[2][-1]=='W':\n", " t[2]=float(i[2][:-1])*(-1)\n", " t[0]=i[0]\n", " tuple1=(city_ids[t[0]],t[1],t[2])\n", " data_to_input_loc_table.append(tuple1)\n", "data_to_input_loc_table" ] }, { "cell_type": "code", "execution_count": null, "id": "cc0fba0c", "metadata": {}, "outputs": [], "source": [ "# Insert the data into the Location Table\n", "\n", "cur.executemany(\"INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)\",data_to_input_loc_table)" ] }, { "cell_type": "code", "execution_count": null, "id": "aead2755", "metadata": {}, "outputs": [], "source": [ "# Displaying the Location Table\n", "\n", "df1=pd.read_sql_query(\"Select * from Loc_Table \", conn)\n", "display(df1)" ] }, { "cell_type": "code", "execution_count": null, "id": "f67e8d5b", "metadata": {}, "outputs": [], "source": [ "# df1=pd.read_sql_query(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id group by Date LIMIT 10\", conn)\n", "# display(df1)\n", "#(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id LIMIT 10\",conn)" ] }, { "cell_type": "code", "execution_count": null, "id": "cbc5ad63", "metadata": {}, "outputs": [], "source": [ "# Commit the changes\n", "\n", "conn.commit()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }