Example usage for org.apache.mahout.common.iterator.sequencefile SequenceFileDirIterable SequenceFileDirIterable

Introduction

In this page you can find the example usage for org.apache.mahout.common.iterator.sequencefile SequenceFileDirIterable SequenceFileDirIterable.

Prototype

public SequenceFileDirIterable(Path path, PathType pathType, PathFilter filter, Configuration conf)

Source Link

Usage

From source file:org.pigml.classify.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path modelDir, Configuration conf) throws IOException {
    OpenIntDoubleHashMap weightsPerLabel = new OpenIntDoubleHashMap();
    OpenIntDoubleHashMap weightsPerFeature = new OpenIntDoubleHashMap();

    SequenceFileDirIterable<IntWritable, DoubleWritable> kvs;
    kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "label_weights"),
            PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, DoubleWritable> kv : kvs) {
        weightsPerLabel.put(kv.getFirst().get(), kv.getSecond().get());
    }/* w  ww.j a va 2 s . c o  m*/

    kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "feature_weights"),
            PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, DoubleWritable> kv : kvs) {
        weightsPerFeature.put(kv.getFirst().get(), kv.getSecond().get());
    }

    Matrix weightsPerLabelAndFeature = null;
    SequenceFileDirIterable<IntWritable, VectorWritable> labelVectors = new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(modelDir, "label_feature_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, VectorWritable> labelVector : labelVectors) {
        int label = labelVector.getFirst().get();
        Vector vector = labelVector.getSecond().get();
        if (weightsPerLabelAndFeature == null) {
            weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), vector.size());
        }
        weightsPerLabelAndFeature.assignRow(label, vector);
    }

    // TODO alphaI is hard-coded to 1.0
    // TODO perLabelThetaNormalizer is not supported yet
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            1.0f);
    model.validate();
    return model;
}

From source file:tk.summerway.mahout9.tools.MyClusterDumper.java

License:Apache License

public static Map<Integer, List<WeightedPropertyVectorWritable>> readPoints(Path pointsPathDir,
        long maxPointsPerCluster, Configuration conf) {
    Map<Integer, List<WeightedPropertyVectorWritable>> result = Maps.newTreeMap();
    for (Pair<IntWritable, WeightedPropertyVectorWritable> record : new SequenceFileDirIterable<IntWritable, WeightedPropertyVectorWritable>(
            pointsPathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
        // value is the cluster id as an int, key is the name/id of the
        // vector, but that doesn't matter because we only care about
        // printing it
        // String clusterId = value.toString();
        int keyValue = record.getFirst().get();
        List<WeightedPropertyVectorWritable> pointList = result.get(keyValue);
        if (pointList == null) {
            pointList = Lists.newArrayList();
            result.put(keyValue, pointList);
        }/*from  ww  w .  j a  v a2s.  co m*/
        if (pointList.size() < maxPointsPerCluster) {
            pointList.add(record.getSecond());
        }
    }
    return result;
}