regression.data.GenerateData.java Source code

Java tutorial

Introduction

Here is the source code for regression.data.GenerateData.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package regression.data;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import regression.Point;
import regression.logisticRegression.Instance;

/**
 *
 * @author Cyga
 */
public class GenerateData {

    List<Point> points = new ArrayList<>();
    List<Point> trainingInstances = new ArrayList<>();
    int numberOfInstances;

    public List<Point> getTrainingInstances() {
        return trainingInstances;
    }

    public void setTrainingInstances(List<Point> trainingInstances) {
        this.trainingInstances = trainingInstances;
    }

    public List<Point> getInstances() {
        return points;
    }

    public int getNumberOfInstances() {
        return numberOfInstances;
    }

    public void setNumberOfInstances(int numberOfInstances) {
        this.numberOfInstances = numberOfInstances;
    }

    public void setInstances(List<Point> instances) {
        this.points = instances;
    }

    public GenerateData() throws FileNotFoundException {

    }

    //    public void readDataSet(File file) throws FileNotFoundException {
    //        List<Instance> dataset = new ArrayList<Instance>();
    //        Scanner scanner = new Scanner(file);
    //        while (scanner.hasNextLine()) {
    //            String line = scanner.nextLine();
    //
    //            String[] columns = line.split("\\s+");
    //            if (line.startsWith("T")) {
    //                try {
    //                    int i = 1;
    //                    Double[] data = new Double[columns.length - 2];
    //
    //                    for (i = 1; i < columns.length - 1; i++) {
    //                        try {
    //                            data[i - 1] = Double.parseDouble(columns[i]);
    //                        } catch (Exception e) {
    //                            continue;
    //                        }
    //                    }
    //                    Double label = Double.parseDouble(columns[i]);
    //                    Point instance = new Point(label, data);
    //                    this.instances.add(instance);
    //                } catch (Exception e) {
    //                    continue;
    //                }
    //            } else {
    //                try {
    //                    // skip first column and last column is the label
    //                    int i = 0;
    //                    Double[] data = new Double[columns.length - 1];
    //                    this.numberOfInstances = data.length;
    //                    for (i = 0; i < columns.length - 1; i++) {
    //                        try {
    //                            data[i] = Double.parseDouble(columns[i]);
    //                        } catch (Exception e) {
    //                            continue;
    //                        }
    //                    }
    //                    Double label = Double.parseDouble(columns[i]);
    //                    Instance instance = new Instance(label, data);
    //                    this.trainingInstances.add(instance);
    //                } catch (Exception e) {
    //                    continue;
    //                }
    //            }
    //        }
    //
    //    }
    /**
     * Read data from excel
     *
     * @param file
     * @throws FileNotFoundException
     * @throws IOException
     * @throws InvalidFormatException
     */
    public void readExcelDataSet(File file) throws FileNotFoundException, IOException, InvalidFormatException {
        FileInputStream excelFile = new FileInputStream(file);
        XSSFWorkbook workbook = new XSSFWorkbook(file);
        XSSFSheet firstSheet = workbook.getSheetAt(0);
        Iterator<Row> rowIterator = firstSheet.iterator();
        while (rowIterator.hasNext()) {
            Row row = rowIterator.next();
            Cell firstCell = row.getCell(row.getFirstCellNum());
            Cell secondCell = row.getCell(row.getFirstCellNum() + 1);

            if (firstCell.getCellType() == Cell.CELL_TYPE_NUMERIC
                    && secondCell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
                try {
                    this.points.add(new Point(firstCell.getNumericCellValue(), secondCell.getNumericCellValue()));
                } catch (Exception e) {
                    System.err.println("Cannot convert data in row: " + row.getRowNum());
                }
            }

        }
        this.numberOfInstances = points.size();
    }
    //    public void readExcelDataSet(File file) throws FileNotFoundException, IOException, InvalidFormatException {
    //        FileInputStream excelFile = new FileInputStream(file);
    //        XSSFWorkbook workbook = new XSSFWorkbook(file);
    //        XSSFSheet firstSheet = workbook.getSheetAt(0);
    //        Iterator<Row> rowIterator = firstSheet.iterator();
    ////        List<Instance> dataset = new ArrayList<Instance>();
    ////        Scanner scanner = new Scanner(file);
    //        while (rowIterator.hasNext()) {
    //            Row row = rowIterator.next();
    //             Iterator<Cell> cellIterator = row.cellIterator();
    //                 
    //                while (cellIterator.hasNext()) 
    //                {
    //                    Cell cell = cellIterator.next();
    //                    if(cell.getCellType()==Cell.CELL_TYPE_NUMERIC){
    //                        row.getCell(row.getFirstCellNum());
    //                        row.getCell(row.getLastCellNum());
    //                    }
    //                }
    //                }
    //           
    //
    //           
    //        
    //                try {
    //                    // skip first column and last column is the label
    //                    int i = 0;
    //                    Double[] data = new Double[columns.length - 1];
    //                    this.numberOfInstances = data.length;
    //                    for (i = 0; i < columns.length - 1; i++) {
    //                        try {
    //                            data[i] = Double.parseDouble(columns[i]);
    //                        } catch (Exception e) {
    //                            continue;
    //                        }
    //                    }
    //                    Double label = Double.parseDouble(columns[i]);
    //                    Instance instance = new Instance(label, data);
    //                    this.trainingInstances.add(instance);
    //                } catch (Exception e) {
    //                    continue;
    //                }
    //            }

    //}
    public void createWekaFile(File f) throws FileNotFoundException, IOException, InvalidFormatException {
        FileInputStream excelFile = new FileInputStream(f);
        File wekaFile = new File("weka.arff");
        XSSFWorkbook workbook = new XSSFWorkbook(f);
        XSSFSheet firstSheet = workbook.getSheetAt(0);
        Iterator<Row> rowIterator = firstSheet.iterator();
        PrintWriter writer = new PrintWriter(wekaFile);
        writer.println("@RELATION logistic");
        writer.println("@ATTRIBUTE x NUMERIC");
        writer.println("@ATTRIBUTE class {1,0}");
        writer.println("@DATA");
        while (rowIterator.hasNext()) {
            Row row = rowIterator.next();
            Cell firstCell = row.getCell(row.getFirstCellNum());
            Cell secondCell = row.getCell(row.getFirstCellNum() + 1);

            if (firstCell.getCellType() == Cell.CELL_TYPE_NUMERIC
                    && secondCell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
                try {
                    writer.println(firstCell.getNumericCellValue() + "," + (int) secondCell.getNumericCellValue());
                } catch (Exception e) {
                    System.err.println("Cannot convert data in row: " + row.getRowNum());
                }
            }

        }
        writer.close();
        this.numberOfInstances = points.size();

    }
}