CSV File Analysis In Spark SQL - Java Big Data

Java examples for Big Data:apache spark

Description

CSV File Analysis In Spark SQL

Demo Code



import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

public class CSVFileAnalysisInSparkSQL {
    public static void main(String[] args) {
        final SparkSession sparkSession = SparkSession.builder()
                .appName("Spark CSV Analysis Demo").master("local[5]")
                .getOrCreate();//w w  w.j a va2 s  .com

        final DataFrameReader dataFrameReader = sparkSession.read();
        dataFrameReader.option("header", "true");
        final Dataset<Row> csvDataFrame = dataFrameReader
                .csv("src/main/resources/data.csv");

        csvDataFrame.printSchema();

        csvDataFrame.createOrReplaceTempView("ROOM_OCCUPANCY_RAW");
        final Dataset<Row> roomOccupancyData = sparkSession
                .sql("SELECT CAST(id as int) id, CAST(date as string) date, CAST(Temperature as float) Temperature, "
                        + " FROM ROOM_OCCUPANCY_RAW");

        roomOccupancyData.printSchema();
        roomOccupancyData.createOrReplaceTempView("ROOM_OCCUPANCY");
        sparkSession
                .sql("SELECT * FROM ROOM_OCCUPANCY WHERE Temperature >= 23.6 AND Humidity > 27 AND Light > 500 "
                        + "AND CO2 BETWEEN 920 and 950").show();
    }
}

Related Tutorials