From 101072ea24cc0670bbd689d039d5144fa5216a4b Mon Sep 17 00:00:00 2001 From: jmreddy2106 Date: Sat, 25 Dec 2021 18:25:17 -0500 Subject: PySpark example --- pySparkExample.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 pySparkExample.py (limited to 'pySparkExample.py') diff --git a/pySparkExample.py b/pySparkExample.py new file mode 100644 index 0000000..4037b74 --- /dev/null +++ b/pySparkExample.py @@ -0,0 +1,24 @@ +from pyspark import SparkContext,StorageLevel +from pyspark.sql import SparkSession +from pyspark.conf import SparkConf +from pyspark.sql.types import * + + +# Enabling Spark Configuration and SparkSession +sconf=SparkConf().setAppName("test") +spark=SparkSession.builder.config(conf=sconf).getOrCreate() + +# RDD as a list of tuples +rdd = spark.sparkContext.parallelize([('Alex',21),('Bob',44)]) + +# creating a schema using StructType +schema = StructType([ + StructField("name", StringType(), True), + StructField("age", IntegerType(), True)]) + +# Creating a dataframe from rdd using schema +df=spark.createDataFrame(rdd, schema) + +# displaying dataframe +df.show(truncate=False) + -- cgit v1.2.3