Multiple Horizontal CSV File Analysis In Spark SQL - Java Big Data

Java examples for Big Data:apache spark

Description

Multiple Horizontal CSV File Analysis In Spark SQL

Demo Code



import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

public class MultipleHorizontalCSVFileAnalysisInSparkSQL {


    public static void main(String[] args) {
        final SparkSession sparkSession = SparkSession.builder()
                .appName("Spark CSV Analysis Demo").master("local[5]")
                .getOrCreate();//ww  w . ja  v a2s . c o  m

        final DataFrameReader dataFrameReader = sparkSession.read();
        dataFrameReader.option("header", "true");

        final Dataset<Row> csvDataFrame = dataFrameReader.csv(
                "src/main/resources/data-horiz-part1.csv",
                "src/main/resources/data-horiz-part2.csv");
        csvDataFrame.printSchema();
        csvDataFrame.createOrReplaceTempView("ROOM_OCCUPANCY_RAW");
        final Dataset<Row> roomOccupancyData = sparkSession
                .sql("SELECT CAST(id as int) id, CAST(date as string) date, CAST(Occupancy as int) Occupancy FROM ROOM_OCCUPANCY_RAW");

        roomOccupancyData.printSchema();
        roomOccupancyData.createOrReplaceTempView("ROOM_OCCUPANCY");
        sparkSession
                .sql("SELECT * FROM ROOM_OCCUPANCY WHERE Temperature >= 23.6 AND Humidity > 27 AND Light > 500 "
                        + "AND CO2 BETWEEN 920 and 950").show();
    }
}

Related Tutorials