Java examples for Big Data:apache spark
Multiple Verticle CSV File Analysis In Spark SQL
import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; public class MultipleVerticleCSVFileAnalysisInSparkSQL { public static void main(String[] args) { final SparkSession sparkSession = SparkSession.builder() .appName("Spark CSV Analysis Demo").master("local[5]") .getOrCreate();// w ww .j a v a2s.co m final DataFrameReader dataFrameReader = sparkSession.read(); dataFrameReader.option("header", "true"); final Dataset<Row> csvDataFramePart1 = dataFrameReader .csv("src/main/resources/data-part1.csv"); final Dataset<Row> csvDataFramePart2 = dataFrameReader .csv("src/main/resources/data-part2.csv"); final Dataset<Row> csvDataFrame = csvDataFramePart1.join( csvDataFramePart2, "id"); csvDataFrame.printSchema(); csvDataFrame.createOrReplaceTempView("ROOM_OCCUPANCY_RAW"); final Dataset<Row> roomOccupancyData = sparkSession .sql("SELECT CAST(id as int) id, CAST(date as string) date, " + " Occupancy FROM ROOM_OCCUPANCY_RAW"); roomOccupancyData.printSchema(); roomOccupancyData.createOrReplaceTempView("ROOM_OCCUPANCY"); sparkSession .sql("SELECT * FROM ROOM_OCCUPANCY WHERE Temperature >= 23.6 " + "AND CO2 BETWEEN 920 and 950").show(); } }