Java examples for Big Data:apache spark
CSV File Analysis In Spark SQL
import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; public class CSVFileAnalysisInSparkSQL { public static void main(String[] args) { final SparkSession sparkSession = SparkSession.builder() .appName("Spark CSV Analysis Demo").master("local[5]") .getOrCreate();//w w w.j a va2 s .com final DataFrameReader dataFrameReader = sparkSession.read(); dataFrameReader.option("header", "true"); final Dataset<Row> csvDataFrame = dataFrameReader .csv("src/main/resources/data.csv"); csvDataFrame.printSchema(); csvDataFrame.createOrReplaceTempView("ROOM_OCCUPANCY_RAW"); final Dataset<Row> roomOccupancyData = sparkSession .sql("SELECT CAST(id as int) id, CAST(date as string) date, CAST(Temperature as float) Temperature, " + " FROM ROOM_OCCUPANCY_RAW"); roomOccupancyData.printSchema(); roomOccupancyData.createOrReplaceTempView("ROOM_OCCUPANCY"); sparkSession .sql("SELECT * FROM ROOM_OCCUPANCY WHERE Temperature >= 23.6 AND Humidity > 27 AND Light > 500 " + "AND CO2 BETWEEN 920 and 950").show(); } }