Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package hu.sztaki.incremental.ml.streaming.imsr; import java.io.File; import java.util.Locale; import java.util.Scanner; import java.util.regex.Pattern; import org.apache.commons.math.linear.Array2DRowRealMatrix; import org.apache.commons.math.linear.RealMatrix; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.function.source.SourceFunction; import org.apache.flink.util.Collector; public class MatrixVectorPairSource implements SourceFunction<Tuple2<double[][], double[][]>> { private static final long serialVersionUID = 2725224422071102334L; private String path; private int indepDim; private int batchSize; public MatrixVectorPairSource(String path, int batchSize) { this.path = path; this.batchSize = batchSize; } private static Scanner initCsvScanner(Scanner s) { s.useLocale(Locale.ENGLISH); s.useDelimiter("(\\s+|\\s*,\\s*)"); return s; } @Override public void invoke(Collector<Tuple2<double[][], double[][]>> out) throws Exception { File f = new File(path); if (!f.exists()) { System.err.println(path + " does not exist."); System.exit(1); } Scanner s = initCsvScanner(new Scanner(f)); String firstLine = s.nextLine(); Scanner firstLineScanner = initCsvScanner(new Scanner(firstLine)); for (indepDim = 0; firstLineScanner.hasNext(); firstLineScanner.next(), indepDim++) ; indepDim--; while (s.hasNext()) { Array2DRowRealMatrix X = new Array2DRowRealMatrix(batchSize, indepDim); Array2DRowRealMatrix y = new Array2DRowRealMatrix(batchSize, 1); readMatricesSideBySide(s, X, y); out.collect(new Tuple2<double[][], double[][]>(X.getDataRef(), y.getDataRef())); } s.close(); out.close(); } private void readMatricesSideBySide(Scanner scanner, RealMatrix... matrices) { for (int i = 0; i < matrices[0].getRowDimension(); i++) { if (!scanner.hasNextLine()) { return; //there will be some 0 rows } String line = scanner.nextLine(); Scanner lineScanner = initCsvScanner(new Scanner(line)); for (RealMatrix m : matrices) { for (int j = 0; j < m.getColumnDimension(); j++) { double d = lineScanner.nextDouble(); m.setEntry(i, j, d); } } lineScanner.close(); } } }