aboutsummaryrefslogtreecommitdiff
path: root/dbcreation.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'dbcreation.ipynb')
-rw-r--r--dbcreation.ipynb460
1 files changed, 460 insertions, 0 deletions
diff --git a/dbcreation.ipynb b/dbcreation.ipynb
new file mode 100644
index 0000000..f847fab
--- /dev/null
+++ b/dbcreation.ipynb
@@ -0,0 +1,460 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "660de5e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Imports\n",
+ "\n",
+ "from IPython.display import display, HTML\n",
+ "import pandas as pd\n",
+ "import sqlite3\n",
+ "from sqlite3 import Error\n",
+ "import os"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1dc168cb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to create a connection to the database\n",
+ "\n",
+ "def create_connection(db_file, delete_db=False):\n",
+ " \n",
+ " if delete_db and os.path.exists(db_file):\n",
+ " os.remove(db_file)\n",
+ "\n",
+ " conn = None\n",
+ " try:\n",
+ " conn = sqlite3.connect(db_file)\n",
+ " conn.execute(\"PRAGMA foreign_keys = 1\")\n",
+ " except Error as e:\n",
+ " print(e)\n",
+ "\n",
+ " return conn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "910875f4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to create a database\n",
+ "\n",
+ "def create_table(conn, create_table_sql):\n",
+ " try:\n",
+ " c = conn.cursor()\n",
+ " c.execute(create_table_sql)\n",
+ " except Error as e:\n",
+ " print(e)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36907a1c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Execute SQL statement\n",
+ "\n",
+ "def execute_sql_statement(sql_statement, conn):\n",
+ " cur = conn.cursor()\n",
+ " cur.execute(sql_statement)\n",
+ "\n",
+ " rows = cur.fetchall()\n",
+ "\n",
+ " return rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a0dfa189",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Read the Non-Normalized Data\n",
+ "\n",
+ "df=pd.read_csv(\"GlobalLandTemperaturesByCity.csv\")\n",
+ "data=df[df.dt>\"1949-12-31\"]\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "77341f9c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extract cities and their latitude and longitude\n",
+ "\n",
+ "data_for_loc_table=data[['City','Latitude','Longitude']]\n",
+ "data_for_loc_table=data_for_loc_table.to_records(index=False)\n",
+ "result_data_for_loc_table=list(data_for_loc_table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e635279c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extract Countries\n",
+ "\n",
+ "records=data.to_records(index=False)\n",
+ "result=list(records)\n",
+ "req_resut=[]\n",
+ "for i in result:\n",
+ " req_resut.append((i[4],))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "74c13a5f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Convert the list of cities into a list with unique names\n",
+ "\n",
+ "req_resut=list((set(req_resut)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21eca337",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a connection to the database\n",
+ "conn = create_connection('database.db')\n",
+ "cur=conn.cursor()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "af74cdef",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Drop City Table and the Country Table if they exist and create a new Country Table and Insert the data\n",
+ "\n",
+ "execute_sql_statement(\"DROP TABLE IF EXISTS City_Table\",conn)\n",
+ "execute_sql_statement(\"DROP TABLE IF EXISTS Country_Table\",conn)\n",
+ "create_table(conn,'''CREATE TABLE Country_Table (PK INTEGER PRIMARY KEY, Country_Name VARCHAR)''')\n",
+ "cur.executemany(\"INSERT INTO Country_Table ('Country_Name') Values (?)\",req_resut)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59930452",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Make a dictionary of the country data and their IDs\n",
+ "abc=execute_sql_statement(\"Select * from Country_Table\", conn)\n",
+ "abc1={}\n",
+ "for i in abc:\n",
+ " abc1[i[1]]=i[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "52c77691",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "req_result_2=[]\n",
+ "for i in result:\n",
+ " req_result_2.append((i[3],abc1[i[4]]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f5b64360",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create the City Table\n",
+ "create_table(conn,'''CREATE TABLE City_Table (City_Id INTEGER PRIMARY KEY, City_Name VARCHAR , Country_ID INTEGER,\n",
+ "FOREIGN KEY(Country_ID) REFERENCES Country_Table(PK))''')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1206d273",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extract the data for City Table\n",
+ "city=[]\n",
+ "for i in result:\n",
+ " # j=j+1\n",
+ " city.append((i[3],abc1[i[4]]))\n",
+ "city_country_unique=list((set(city)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cfca5230",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Insert the data for City Table\n",
+ "cur.executemany(\"INSERT INTO City_Table ('City_Name','Country_ID') Values (?,?)\",city_country_unique)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d9f71f49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df=execute_sql_statement(\"Select * from City_Table\", conn)\n",
+ "df=pd.read_sql_query(\"Select * from City_Table\", conn)\n",
+ "display(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3692d2ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Build a dictionary of the city data and their IDs\n",
+ "city_index=execute_sql_statement(\"Select * from City_Table\", conn)\n",
+ "city_indexes={}\n",
+ "for i in abc:\n",
+ " abc1[i[1]]=i[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9dd4ef62",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Displaying the country data\n",
+ "df1=pd.read_sql_query(\"Select * from Country_Table\", conn)\n",
+ "display(df1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4c2a9868",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Join the data from the City Table and the Country Table and display the data\n",
+ "\n",
+ "execute_sql_statement(\"select * from City_Table join Country_Table on Country_Table.PK = City_Table.Country_ID AND Country_Table.Country_Name='India' LIMIT 10\",conn)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3932f5a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# City IDs Dictionary\n",
+ "\n",
+ "city_table_values=execute_sql_statement(\"select * from City_Table \",conn)\n",
+ "city_ids={}\n",
+ "for i in city_table_values:\n",
+ " city_ids[i[1]]=i[0]\n",
+ "city_ids\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fcd18ad9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create the Temperature Table\n",
+ "\n",
+ "create_table(conn,'''CREATE TABLE Temperature (PK INTEGER PRIMARY KEY , Date VARCHAR, Avg_Temperature VARCHAR, City_ID INTEGER,\n",
+ "FOREIGN KEY(City_ID) REFERENCES City_Table(City_Id))''')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e1a0d17e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Allocate the data to the Temperature Table and their corresponding City IDs\n",
+ "\n",
+ "temp_table_values=[]\n",
+ "j=1\n",
+ "for i in result:\n",
+ " j=j+1\n",
+ " temp_table_values.append((i[0],i[1],city_ids[i[3]]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fb218911",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Insert the data into the Temperature Table\n",
+ "\n",
+ "cur.executemany(\"INSERT INTO Temperature ('Date','Avg_Temperature','City_ID') Values (?,?,?)\",temp_table_values)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8e34f625",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Displaying the Temperature Table\n",
+ "\n",
+ "df1=pd.read_sql_query(\"Select * from Temperature\", conn)\n",
+ "display(df1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f08f8148",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create the Location Table\n",
+ "\n",
+ "create_table(conn,'''CREATE TABLE Loc_Table (ID INTEGER PRIMARY KEY , City_ID INTEGER, LAT FLOAT,LONG FLOAT,\n",
+ "FOREIGN KEY(City_ID) REFERENCES City_Table(City_ID))''')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ee98ee19",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extract the data for the Location Table, Database consists of N,W,E,S notation, we are going to convert them to the decimal notation\n",
+ "\n",
+ "#Lat= North is positive, South is negative\n",
+ "#Long= West is negative, East is positive\n",
+ "data_to_check_loc=result_data_for_loc_table\n",
+ "data_to_input_loc_table=[]\n",
+ "j=0\n",
+ "for i in data_to_check_loc:\n",
+ " t=[0,0,0]\n",
+ " if i[1][-1]=='N':\n",
+ " t[1]=(float(i[1][:-1]))\n",
+ " if i[1][-1]=='S':\n",
+ " t[1]=float(i[1][:-1])*(-1)\n",
+ " if i[2][-1]=='E':\n",
+ " t[2]=float(i[2][:-1]) \n",
+ " if i[2][-1]=='W':\n",
+ " t[2]=float(i[2][:-1])*(-1)\n",
+ " t[0]=i[0]\n",
+ " tuple1=(city_ids[t[0]],t[1],t[2])\n",
+ " data_to_input_loc_table.append(tuple1)\n",
+ "data_to_input_loc_table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cc0fba0c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Insert the data into the Location Table\n",
+ "\n",
+ "cur.executemany(\"INSERT INTO Loc_Table ('City_ID','LAT','LONG') Values (?,?,?)\",data_to_input_loc_table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aead2755",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Displaying the Location Table\n",
+ "\n",
+ "df1=pd.read_sql_query(\"Select * from Loc_Table \", conn)\n",
+ "display(df1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f67e8d5b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df1=pd.read_sql_query(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id group by Date LIMIT 10\", conn)\n",
+ "# display(df1)\n",
+ "#(\"select * from Loc_Table join Temperature on Loc_Table.City_ID=Temperature.City_ID join city_table on city_table.city_id=Temperature.City_ID join country_table on country_table.pk=city_table.country_id LIMIT 10\",conn)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cbc5ad63",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Commit the changes\n",
+ "\n",
+ "conn.commit()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}