edu.iu.daal_pca.PCAUtil.java Source code

Java tutorial

Introduction

Here is the source code for edu.iu.daal_pca.PCAUtil.java

Source

/*
 * Copyright 2013-2016 Indiana University
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package edu.iu.daal_pca;

import it.unimi.dsi.fastutil.ints.IntArrays;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import edu.iu.harp.schdynamic.DynamicScheduler;
import edu.iu.harp.partition.Partition;
import edu.iu.harp.partition.Table;
import edu.iu.harp.resource.DoubleArray;

import edu.iu.data_gen.*;

public class PCAUtil {

    protected static final Log LOG = LogFactory.getLog(PCAUtil.class);

    /**
     * Generate data and upload to the data dir.
     *
     * @param numOfDataPoints
     * @param vectorSize
     * @param numPointFiles
     * @param localInputDir
     * @param fs
     * @param dataDir
     * @throws IOException
     * @throws InterruptedException
     * @throws ExecutionException
     */
    static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir,
            FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException {
        int pointsPerFile = numOfDataPoints / numPointFiles;
        System.out.println("Writing " + pointsPerFile + " vectors to a file");
        // Check data directory
        if (fs.exists(dataDir)) {
            fs.delete(dataDir, true);
        }
        // Check local directory
        File localDir = new File(localInputDir);
        // If existed, regenerate data
        if (localDir.exists() && localDir.isDirectory()) {
            for (File file : localDir.listFiles()) {
                file.delete();
            }
            localDir.delete();
        }
        boolean success = localDir.mkdir();
        if (success) {
            System.out.println("Directory: " + localInputDir + " created");
        }
        if (pointsPerFile == 0) {
            throw new IOException("No point to write.");
        }
        // Create random data points
        int poolSize = Runtime.getRuntime().availableProcessors();
        ExecutorService service = Executors.newFixedThreadPool(poolSize);
        List<Future<?>> futures = new LinkedList<Future<?>>();
        for (int k = 0; k < numPointFiles; k++) {
            // Future<?> f = service.submit(new DataGenRunnable(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));
            Future<?> f = service
                    .submit(new DataGenMMDense(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));
            futures.add(f); // add a new thread
        }
        for (Future<?> f : futures) {
            f.get();
        }
        // Shut down the executor service so that this
        // thread can exit
        service.shutdownNow();
        // Wrap to path object
        Path localInput = new Path(localInputDir);
        fs.copyFromLocalFile(localInput, dataDir);
        DeleteFileFolder(localInputDir);
    }

    public static void generateData(int numDataPoints, int vectorSize, int numPointFiles,
            Configuration configuration, FileSystem fs, Path dataDir, String localDir)
            throws IOException, InterruptedException, ExecutionException {
        System.out.println("Generating data..... ");
        generatePoints(numDataPoints, vectorSize, numPointFiles, localDir, fs, dataDir);
    }

    public static List<double[]> loadPoints(List<String> fileNames, int pointsPerFile, int cenVecSize,
            Configuration conf, int numThreads) {
        long startTime = System.currentTimeMillis();
        List<PointLoadTask> tasks = new LinkedList<>();
        List<double[]> arrays = new LinkedList<>();
        for (int i = 0; i < numThreads; i++) {
            tasks.add(new PointLoadTask(pointsPerFile, cenVecSize, conf));
        }
        DynamicScheduler<String, double[], PointLoadTask> compute = new DynamicScheduler<>(tasks);
        for (String fileName : fileNames) {
            compute.submit(fileName);
        }
        compute.start();
        compute.stop();

        while (compute.hasOutput()) {
            double[] output = compute.waitForOutput();
            if (output != null) {
                // for (int i = 0; i < output.length; i++)
                // {
                //   if (i > 0)
                //   {
                //     System.out.print(", ");
                //   }
                //   System.out.print(output[i]);
                // }
                arrays.add(output);
            }
        }

        long endTime = System.currentTimeMillis();
        LOG.info("File read (ms): " + (endTime - startTime) + ", number of point arrays: " + arrays.size());
        return arrays;
    }

    public static void DeleteFileFolder(String path) {

        File file = new File(path);
        if (file.exists()) {
            do {
                delete(file);
            } while (file.exists());
        } else {
            System.out.println("File or Folder not found : " + path);
        }

    }

    private static void delete(File file) {
        if (file.isDirectory()) {
            String fileList[] = file.list();
            if (fileList.length == 0) {
                System.out.println("Deleting Directory : " + file.getPath());
                file.delete();
            } else {
                int size = fileList.length;
                for (int i = 0; i < size; i++) {
                    String fileName = fileList[i];
                    System.out.println("File path : " + file.getPath() + " and name :" + fileName);
                    String fullPath = file.getPath() + "/" + fileName;
                    File fileOrFolder = new File(fullPath);
                    System.out.println("Full Path :" + fileOrFolder.getPath());
                    delete(fileOrFolder);
                }
            }
        } else {
            System.out.println("Deleting file : " + file.getPath());
            file.delete();
        }
    }

}