diff options
| author | jmreddy2106 <[email protected]> | 2021-12-25 18:25:17 -0500 |
|---|---|---|
| committer | jmreddy2106 <[email protected]> | 2021-12-25 18:25:17 -0500 |
| commit | 101072ea24cc0670bbd689d039d5144fa5216a4b (patch) | |
| tree | 75674afa8c9a68e2a94b683e654a1403105af67b /pySparkExample.py | |
| parent | 7ceb40394bd40075e7fe41b8c125a5d2dcedc781 (diff) | |
| download | KafkaPySpark-101072ea24cc0670bbd689d039d5144fa5216a4b.tar.xz KafkaPySpark-101072ea24cc0670bbd689d039d5144fa5216a4b.zip | |
PySpark example
Diffstat (limited to 'pySparkExample.py')
| -rw-r--r-- | pySparkExample.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/pySparkExample.py b/pySparkExample.py new file mode 100644 index 0000000..4037b74 --- /dev/null +++ b/pySparkExample.py @@ -0,0 +1,24 @@ +from pyspark import SparkContext,StorageLevel +from pyspark.sql import SparkSession +from pyspark.conf import SparkConf +from pyspark.sql.types import * + + +# Enabling Spark Configuration and SparkSession +sconf=SparkConf().setAppName("test") +spark=SparkSession.builder.config(conf=sconf).getOrCreate() + +# RDD as a list of tuples +rdd = spark.sparkContext.parallelize([('Alex',21),('Bob',44)]) + +# creating a schema using StructType +schema = StructType([ + StructField("name", StringType(), True), + StructField("age", IntegerType(), True)]) + +# Creating a dataframe from rdd using schema +df=spark.createDataFrame(rdd, schema) + +# displaying dataframe +df.show(truncate=False) + |
