Java examples for Big Data:apache spark
Multiple Horizontal CSV File Analysis In Spark SQL
import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; public class MultipleHorizontalCSVFileAnalysisInSparkSQL { public static void main(String[] args) { final SparkSession sparkSession = SparkSession.builder() .appName("Spark CSV Analysis Demo").master("local[5]") .getOrCreate();//ww w . ja v a2s . c o m final DataFrameReader dataFrameReader = sparkSession.read(); dataFrameReader.option("header", "true"); final Dataset<Row> csvDataFrame = dataFrameReader.csv( "src/main/resources/data-horiz-part1.csv", "src/main/resources/data-horiz-part2.csv"); csvDataFrame.printSchema(); csvDataFrame.createOrReplaceTempView("ROOM_OCCUPANCY_RAW"); final Dataset<Row> roomOccupancyData = sparkSession .sql("SELECT CAST(id as int) id, CAST(date as string) date, CAST(Occupancy as int) Occupancy FROM ROOM_OCCUPANCY_RAW"); roomOccupancyData.printSchema(); roomOccupancyData.createOrReplaceTempView("ROOM_OCCUPANCY"); sparkSession .sql("SELECT * FROM ROOM_OCCUPANCY WHERE Temperature >= 23.6 AND Humidity > 27 AND Light > 500 " + "AND CO2 BETWEEN 920 and 950").show(); } }