org.apache.mahout.classifier.df.DFUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mahout.classifier.df.DFUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.classifier.df;

import com.google.common.collect.Lists;
import com.google.common.io.Closeables;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.node.Node;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;

/**
 * Utility class that contains various helper methods
 */
public final class DFUtils {
    private DFUtils() {
    }

    /**
     * Writes an Node[] into a DataOutput
     * @throws java.io.IOException
     */
    public static void writeArray(DataOutput out, Node[] array) throws IOException {
        out.writeInt(array.length);
        for (Node w : array) {
            w.write(out);
        }
    }

    /**
     * Reads a Node[] from a DataInput
     * @throws java.io.IOException
     */
    public static Node[] readNodeArray(DataInput in) throws IOException {
        int length = in.readInt();
        Node[] nodes = new Node[length];
        for (int index = 0; index < length; index++) {
            nodes[index] = Node.read(in);
        }

        return nodes;
    }

    /**
     * Writes a double[] into a DataOutput
     * @throws java.io.IOException
     */
    public static void writeArray(DataOutput out, double[] array) throws IOException {
        out.writeInt(array.length);
        for (double value : array) {
            out.writeDouble(value);
        }
    }

    /**
     * Reads a double[] from a DataInput
     * @throws java.io.IOException
     */
    public static double[] readDoubleArray(DataInput in) throws IOException {
        int length = in.readInt();
        double[] array = new double[length];
        for (int index = 0; index < length; index++) {
            array[index] = in.readDouble();
        }

        return array;
    }

    /**
     * Writes an int[] into a DataOutput
     * @throws java.io.IOException
     */
    public static void writeArray(DataOutput out, int[] array) throws IOException {
        out.writeInt(array.length);
        for (int value : array) {
            out.writeInt(value);
        }
    }

    /**
     * Reads an int[] from a DataInput
     * @throws java.io.IOException
     */
    public static int[] readIntArray(DataInput in) throws IOException {
        int length = in.readInt();
        int[] array = new int[length];
        for (int index = 0; index < length; index++) {
            array[index] = in.readInt();
        }

        return array;
    }

    /**
     * Return a list of all files in the output directory
     * @throws IOException if no file is found
     */
    public static Path[] listOutputFiles(FileSystem fs, Path outputPath) throws IOException {
        List<Path> outputFiles = Lists.newArrayList();
        for (FileStatus s : fs.listStatus(outputPath, PathFilters.logsCRCFilter())) {
            if (!s.isDir() && !s.getPath().getName().startsWith("_")) {
                outputFiles.add(s.getPath());
            }
        }
        if (outputFiles.isEmpty()) {
            throw new IOException("No output found !");
        }
        return outputFiles.toArray(new Path[outputFiles.size()]);
    }

    /**
     * Formats a time interval in milliseconds to a String in the form "hours:minutes:seconds:millis"
     */
    public static String elapsedTime(long milli) {
        long seconds = milli / 1000;
        milli %= 1000;

        long minutes = seconds / 60;
        seconds %= 60;

        long hours = minutes / 60;
        minutes %= 60;

        return hours + "h " + minutes + "m " + seconds + "s " + milli;
    }

    public static void storeWritable(Configuration conf, Path path, Writable writable) throws IOException {
        FileSystem fs = path.getFileSystem(conf);

        FSDataOutputStream out = fs.create(path);
        try {
            writable.write(out);
        } finally {
            Closeables.closeQuietly(out);
        }
    }

    public static void store(Configuration conf, Path path, Data data) throws IOException {
        FileSystem fs = path.getFileSystem(conf);
        Dataset dataset = data.getDataset();
        FSDataOutputStream out = null;
        try {
            if (out == null) {
                out = fs.create(path);
            }

            int size = data.size();
            for (int i = 0; i < size; i++) {
                StringBuilder returnString = new StringBuilder();
                returnString.append(data.get(i).toString(dataset))
                        .append(dataset.getLabelString(dataset.getLabel(data.get(i)))).append('\n');
                String output = returnString.toString();
                byte[] b = output.getBytes("utf-8");
                out.write(b);
            }

            out.close();

        } finally {
            Closeables.closeQuietly(out);
        }
    }
}