Java examples for Big Data:apache spark
Linear Regression Applicances Energy Prediction via apache spark
import java.io.IOException; import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineModel; import org.apache.spark.ml.PipelineStage; import org.apache.spark.ml.feature.VectorAssembler; import org.apache.spark.ml.regression.LinearRegression; import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; public class LinearRegessionApplicancesEnergyPrediction { public static void main(String[] args) throws IOException { final SparkSession sparkSession = SparkSession.builder() .appName("Spark Linear Regression Demo").master("local[5]") .getOrCreate();// w w w . j a v a2 s . c om final DataFrameReader dataFrameReader = sparkSession.read().option( "header", true); final Dataset<Row> trainingData = dataFrameReader .csv("src/main/resources/energydata_complete.csv"); trainingData.createOrReplaceTempView("TRAINING_DATA"); final Dataset<Row> typedTrainingData = sparkSession .sql("SELECT cast(Appliances as float) Appl_Energy, cast(T1 as float) T1, cast(RH_1 as float) RH_1, " + "cast(Visibility as float) VIS FROM TRAINING_DATA"); final VectorAssembler vectorAssembler = new VectorAssembler() .setInputCols( new String[] { "T1", "RH_1", "VIS" }).setOutputCol("features"); final Dataset<Row> featuresData = vectorAssembler .transform(typedTrainingData); featuresData.printSchema(); Dataset<Row>[] splits = featuresData.randomSplit(new double[] { 0.7, 0.3 }); Dataset<Row> trainingFeaturesData = splits[0]; Dataset<Row> testFeaturesData = splits[1]; PipelineModel model = null; try { model = PipelineModel .load("./resources/applianceenergyprediction"); } catch (Exception exception) { } if (model == null) { final LinearRegression regression = new LinearRegression() .setLabelCol("Appl_Energy").setFeaturesCol("features"); final Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[] { regression }); model = pipeline.fit(trainingFeaturesData); model.save("src/main/resources/applianceenergyprediction"); } final Dataset<Row> predictions = model.transform(testFeaturesData); predictions.show(); } }