Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.yahoo.spaclu.data.index.IndexFeatureValueSpark.java

License:Apache License

public static void main(String[] args) throws IOException {
    IndexFeatureValueOptions optionsFormatRawToDatabase = new IndexFeatureValueOptions(args);

    String inputPathString = optionsFormatRawToDatabase.getInputPath();
    String outputPathString = optionsFormatRawToDatabase.getOutputPath();
    String indexPathString = optionsFormatRawToDatabase.getIndexPath();
    int numberOfPartitions = optionsFormatRawToDatabase.getNumberOfPartitions();
    int maxCutoffThreshold = optionsFormatRawToDatabase.getMaximumCutoffThreshold();
    int minCutoffThreshold = optionsFormatRawToDatabase.getMinimumCutoffThreshold();

    /*//w ww  .  j a v  a2s .  com
     * Set<String> excludingFeatureNames = new HashSet<String>();
     * excludingFeatureNames.add("login");
     * excludingFeatureNames.add("time"); excludingFeatureNames.add("day");
     * excludingFeatureNames.add("hms"); excludingFeatureNames.add("fail");
     */

    sLogger.info("Tool: " + IndexFeatureValueSpark.class.getSimpleName());
    sLogger.info(" - input path: " + inputPathString);
    sLogger.info(" - output path: " + outputPathString);
    sLogger.info(" - index path: " + indexPathString);
    sLogger.info(" - number of partitions: " + numberOfPartitions);
    sLogger.info(" - maximum cutoff: " + maxCutoffThreshold);
    sLogger.info(" - minimum cutoff: " + minCutoffThreshold);

    // Create a default hadoop configuration
    Configuration conf = new Configuration();

    // Parse created config to the HDFS
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = new Path(outputPathString);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }

    SparkConf sparkConf = new SparkConf().setAppName(optionsFormatRawToDatabase.toString());

    JavaSparkContext sc = new JavaSparkContext(sparkConf);

    Map<Integer, String> featureIndices = getFeatureIndices(sc.textFile(indexPathString));

    List<Integer> listOfAllFeatureIndices = new LinkedList<Integer>();
    List<String> listOfAllFeatureInfo = new LinkedList<String>();
    Iterator<Integer> indexIter = featureIndices.keySet().iterator();
    while (indexIter.hasNext()) {
        Integer tempKey = indexIter.next();
        listOfAllFeatureIndices.add(tempKey);
        listOfAllFeatureInfo.add(featureIndices.get(tempKey));
    }

    /*
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     */

    JavaRDD<String> rawLines = sc.textFile(inputPathString).repartition(numberOfPartitions);

    JavaRDD<String[]> tokenizedLines = rawLines.map(new LineFilter(listOfAllFeatureIndices));
    JavaPairRDD<Entry<Integer, String>, Long> featureValuesCounts = tokenizedLines
            .flatMapToPair(new FeatureValueMapper()).reduceByKey(new FeatureValueReducer());

    Map<Integer, Builder<String, Long>> featureValueMapping = new Hashtable<Integer, Builder<String, Long>>();
    Iterator<Tuple2<Entry<Integer, String>, Long>> iter = featureValuesCounts.collect().iterator();
    while (iter.hasNext()) {
        Tuple2<Entry<Integer, String>, Long> temp = iter.next();
        Entry<Integer, String> featureValueEntry = temp._1;
        int featureIndex = featureValueEntry.getKey();
        String featureValue = featureValueEntry.getValue();
        long featureValueCount = temp._2;

        if (!featureValueMapping.containsKey(featureIndex)) {
            Builder<String, Long> mapBuilder = new Builder<String, Long>(Ordering.natural());

            featureValueMapping.put(featureIndex, mapBuilder);
        }

        featureValueMapping.get(featureIndex).put(featureValue, featureValueCount);
    }

    Preconditions.checkArgument(featureValueMapping.size() == listOfAllFeatureIndices.size());

    String outputFeaturePathString = outputPathString + "feature" + Settings.SEPERATOR;
    fs.mkdirs(new Path(outputFeaturePathString));

    String outputFeatureNamePathString = outputPathString + "feature.dat";
    Path outputFeatureNamePath = new Path(outputFeatureNamePathString);
    PrintWriter featureNamePrinterWriter = new PrintWriter(fs.create(outputFeatureNamePath), true);

    List<Integer> listOfFeatureIndicesToKeep = new LinkedList<Integer>();

    Map<Integer, Map<String, Integer>> featureValueIndex = new Hashtable<Integer, Map<String, Integer>>();
    for (int d = 0; d < featureValueMapping.size(); d++) {
        Map<String, Integer> valueToIndex = new Hashtable<String, Integer>();
        Map<Integer, String> indexToValue = new Hashtable<Integer, String>();

        ImmutableSortedMap<String, Long> immutableSortedMap = featureValueMapping.get(d).build();
        for (String keyString : immutableSortedMap.keySet()) {
            valueToIndex.put(keyString, valueToIndex.size());
            indexToValue.put(indexToValue.size(), keyString);
        }

        if (valueToIndex.size() <= minCutoffThreshold || valueToIndex.size() > maxCutoffThreshold) {
            sLogger.info("Feature (" + listOfAllFeatureInfo.get(d) + ") contains " + valueToIndex.size()
                    + " values, skip...");

            continue;
        } else {
            sLogger.info("Feature (" + listOfAllFeatureInfo.get(d) + ") contains " + valueToIndex.size()
                    + " values.");

            listOfFeatureIndicesToKeep.add(listOfAllFeatureIndices.get(d));
            featureNamePrinterWriter.println(listOfAllFeatureInfo.get(d));
        }

        String outputFeatureIndexPathString = outputFeaturePathString + "index" + Settings.UNDER_SCORE
                + featureValueIndex.size() + ".dat";
        Path outputIndexPath = new Path(outputFeatureIndexPathString);

        featureValueIndex.put(featureValueIndex.size(), valueToIndex);

        PrintWriter featureValueIndexPrinterWriter = new PrintWriter(fs.create(outputIndexPath), true);
        for (int i = 0; i < indexToValue.size(); i++) {
            featureValueIndexPrinterWriter.println("" + i + Settings.TAB + indexToValue.get(i) + Settings.TAB
                    + immutableSortedMap.get(indexToValue.get(i)));
        }
        featureValueIndexPrinterWriter.close();
    }

    featureNamePrinterWriter.close();

    JavaRDD<String[]> filteredLines = rawLines.map(new LineFilter(listOfFeatureIndicesToKeep));
    JavaRDD<FeatureIntegerVector> indexedData = filteredLines.map(new FeatureValueIndexer(featureValueIndex));

    String outputDataPathString = outputPathString + "data";
    Path outputDataPath = new Path(outputDataPathString);
    if (fs.exists(outputDataPath)) {
        fs.delete(outputDataPath, true);
    }
    indexedData.saveAsTextFile(outputDataPathString);

    sc.stop();
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

@SuppressWarnings("rawtypes")
static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf,
        YarnConfiguration yarnConf) throws IOException {
    // dump stringwriter's content into FS conf/storm.yaml
    Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING);
    Path dirDst = confDst.getParent();
    fs.mkdirs(dirDst);//from  w w  w . j  av  a  2s . com

    //storm.yaml
    FSDataOutputStream out = fs.create(confDst);
    Yaml yaml = new Yaml();
    OutputStreamWriter writer = new OutputStreamWriter(out);
    rmNulls(stormConf);
    yaml.dump(stormConf, writer);
    writer.close();
    out.close();

    //yarn-site.xml
    Path yarn_site_xml = new Path(dirDst, "yarn-site.xml");
    out = fs.create(yarn_site_xml);
    writer = new OutputStreamWriter(out);
    yarnConf.writeXml(writer);
    writer.close();
    out.close();

    //logback.xml
    Path logback_xml = new Path(dirDst, "logback.xml");
    out = fs.create(logback_xml);
    CreateLogbackXML(out);
    out.close();

    return dirDst;
}

From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java

License:Apache License

/** Create the input file used for launching the maps */
void createInputFile(Job job, String workdir) throws IOException {
    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Path inpath = new Path(workdir + "/inputkeyranges.txt");
    PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(inpath)));
    long start = conf.getLong(ARG_KEY_RANGE_START, 0);
    long end = conf.getLong(ARG_KEY_RANGE_END, 0);
    int parts = conf.getInt(ARG_KEY_RANGE_PARTITIONS, 1);

    writeRanges(start, end, parts, out);
    out.close();/* w  w w .  ja  va  2s . c  om*/

    TextInputFormat.setInputPaths(job, inpath);
    // NLineInputFormat.setInputPaths(job, inpath);

    /* compute the max input split size */
    //        long max_split = fs.getFileStatus( inpath ).getLen() / parts;
    //        TextInputFormat.setMaxInputSplitSize(job, max_split);

    // JobConf jc = new JobConf(conf);
    // jc.setNumMapTasks(parts);
}

From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java

License:Apache License

int createSplitsFile(Configuration conf, String splitsFile) throws IOException, InvalidInputException {
    int splitCount = conf.getInt(ARG_KEY_SPLIT_COUNT, 0);

    if (splitCount <= 0) {
        throw new InvalidInputException(
                "Invalid or unspecified split count:" + splitCount + "\nSpecify it in: " + ARG_KEY_SPLIT_COUNT);
    }/*from   w w w.j a  v  a2  s . c o m*/

    String rowPrefix = conf.get(ARG_KEY_ROW_PREFIX, "row");
    String rowFormat = DataGenerator.getKeyFormat(rowPrefix);
    boolean hashKeys = conf.getBoolean(ARG_KEY_HASH_KEYS, false);
    long start = conf.getInt(ARG_KEY_RANGE_START, 0);
    long end = conf.getInt(ARG_KEY_RANGE_END, 0);

    FileSystem fs = FileSystem.get(conf);
    Path splitsPath = new Path(splitsFile);
    Path plainPath = new Path(splitsFile + "-debug");
    PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(splitsPath)));
    PrintStream plain = new PrintStream(new BufferedOutputStream(fs.create(plainPath)));

    if (hashKeys) {
        start = conf.getInt(ARG_KEY_HASHED_RANGE_START, 0);
        end = conf.getInt(ARG_KEY_HASHED_RANGE_END, Integer.MAX_VALUE);
    }

    long rangeSize = Math.max(1, (end - start + 1) / (splitCount + 1));
    long rangeStart = start + rangeSize;

    System.err.println("Generating splits file: " + splitsFile + "\nrangeStart:" + rangeStart + "\nrangeSize: "
            + rangeSize + "\nsplitCount: " + splitCount + "\nrangeEnd: " + end);

    int i = 0;
    try {
        while (rangeStart < end && splitCount > 0) {
            out.println(new String(Base64.encodeBase64(String.format(rowFormat, rangeStart).getBytes())));
            plain.println(String.format(rowFormat, rangeStart));
            rangeStart += rangeSize;
            splitCount--;
            i++;
        }
    } finally {
        out.close();
        plain.close();
    }
    System.err.println("Splits created: " + i);
    return i;
}

From source file:com.yss.util.YarnUtil.java

License:Open Source License

@SuppressWarnings("rawtypes")
public static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf,
        YarnConfiguration yarnConf) throws IOException {
    // dump stringwriter's content into FS conf/storm.yaml
    Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING);
    Path dirDst = confDst.getParent();
    fs.mkdirs(dirDst);//from w  w w.  j a  v  a 2  s .  c  o m

    //storm.yaml
    FSDataOutputStream out = fs.create(confDst);
    Yaml yaml = new Yaml();
    OutputStreamWriter writer = new OutputStreamWriter(out);
    rmNulls(stormConf);

    yaml.dump(stormConf, writer);

    writer.close();
    out.close();

    //yarn-site.xml
    Path yarn_site_xml = new Path(dirDst, "yarn-site.xml");
    out = fs.create(yarn_site_xml);
    writer = new OutputStreamWriter(out);
    yarnConf.writeXml(writer);
    writer.close();
    out.close();
    return dirDst;
}

From source file:contrail.stages.GraphStats.java

License:Open Source License

protected void writeN50StatsToFile(ArrayList<GraphN50StatsData> records) {
    String outputDir = (String) stage_options.get("outputpath");
    Path outputPath = new Path(outputDir, "n50stats.avro");

    FileSystem fs = null;
    try {/*  ww w  . j  a  va  2s  .  c  o m*/
        fs = FileSystem.get(getConf());
    } catch (IOException e) {
        throw new RuntimeException("Can't get filesystem: " + e.getMessage());
    }

    // Write the data to the file.
    Schema schema = records.get(0).getSchema();
    DatumWriter<GraphN50StatsData> datumWriter = new SpecificDatumWriter<GraphN50StatsData>(schema);
    DataFileWriter<GraphN50StatsData> writer = new DataFileWriter<GraphN50StatsData>(datumWriter);

    try {
        FSDataOutputStream outputStream = fs.create(outputPath);
        writer.create(schema, outputStream);
        for (GraphN50StatsData stats : records) {
            writer.append(stats);
        }
        writer.close();
    } catch (IOException exception) {
        fail("There was a problem writing the N50 stats to an avro file. " + "Exception: "
                + exception.getMessage());
    }
}

From source file:contrail.stages.GraphStats.java

License:Open Source License

protected void writeTopNContigs(List<Integer> lengths) {
    String outputDir = (String) stage_options.get("outputpath");
    Path outputPath = new Path(outputDir, "topn_contigs.avro");

    FileSystem fs = null;
    try {//from   w  w w  .  j a  v  a  2 s  .  co  m
        fs = FileSystem.get(getConf());
    } catch (IOException e) {
        throw new RuntimeException("Can't get filesystem: " + e.getMessage());
    }

    // Write the data to the file.
    Schema schema = Schema.create(Schema.Type.INT);
    DatumWriter<Integer> datumWriter = new SpecificDatumWriter<Integer>(schema);
    DataFileWriter<Integer> writer = new DataFileWriter<Integer>(datumWriter);

    try {
        FSDataOutputStream outputStream = fs.create(outputPath);
        writer.create(schema, outputStream);
        for (Integer record : lengths) {
            writer.append(record);
        }
        writer.close();
    } catch (IOException exception) {
        fail("There was a problem writing the top N lengths to an avro file. " + "Exception: "
                + exception.getMessage());
    }
}

From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java

License:Apache License

/**
 * @see corner.hadoop.services.DistributedResourceAccessor#putFile(java.lang.String,
 *      java.io.InputStream)/*from w  ww. j ava2s . c o m*/
 */
@Override
public void putFile(String filePath, InputStream is) throws IOException {
    Path dstPath = new Path(filePath);
    FileSystem dstFs = dstPath.getFileSystem(getConf());
    FSDataOutputStream out = dstFs.create(dstPath);
    try {
        IOUtils.copyBytes(is, out, getConf(), false);
    } finally {
        out.close();
    }
}

From source file:corner.services.hadoop.impl.HdfsAccessorProxy.java

License:Apache License

/**
 * @see corner.services.hadoop.DistributedResourceAccessor#putFile(java.lang.String,
 *      java.io.InputStream)/*from   w ww. j a  v  a2 s.  c  o m*/
 */
public void putFile(String filePath, InputStream is) throws IOException {
    Path dstPath = new Path(filePath);
    FileSystem dstFs = dstPath.getFileSystem(getConf());
    FSDataOutputStream out = dstFs.create(dstPath);
    try {
        IOUtils.copyBytes(is, out, getConf(), false);
    } finally {
        out.close();
    }
}

From source file:crunch.MaxTemperature.java

License:Apache License

  private void writeFile(FileSystem fileSys, Path name) throws IOException {
  FSDataOutputStream stm = fileSys.create(name); // XXX FileSystem.create(Path)
  stm.close();//ww  w  .ja  v  a2  s.c  o m
}