aboutsummaryrefslogtreecommitdiff
path: root/pySparkExample.py
diff options
context:
space:
mode:
Diffstat (limited to 'pySparkExample.py')
-rw-r--r--pySparkExample.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/pySparkExample.py b/pySparkExample.py
new file mode 100644
index 0000000..4037b74
--- /dev/null
+++ b/pySparkExample.py
@@ -0,0 +1,24 @@
+from pyspark import SparkContext,StorageLevel
+from pyspark.sql import SparkSession
+from pyspark.conf import SparkConf
+from pyspark.sql.types import *
+
+
+# Enabling Spark Configuration and SparkSession
+sconf=SparkConf().setAppName("test")
+spark=SparkSession.builder.config(conf=sconf).getOrCreate()
+
+# RDD as a list of tuples
+rdd = spark.sparkContext.parallelize([('Alex',21),('Bob',44)])
+
+# creating a schema using StructType
+schema = StructType([
+ StructField("name", StringType(), True),
+ StructField("age", IntegerType(), True)])
+
+# Creating a dataframe from rdd using schema
+df=spark.createDataFrame(rdd, schema)
+
+# displaying dataframe
+df.show(truncate=False)
+