Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:Importer.java

License:Open Source License

public static void copyFile(File file) throws Exception {
    //    String TEST_PREFIX = "";
    File destFile = new File(outDir, file.getName() + ".seq");
    Path dest = new Path(destFile.getAbsolutePath());

    Configuration conf = new Configuration();
    FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")),
            conf);//from   w w  w. j  a v a 2s .com
    CompressionCodec codec = new DefaultCodec();
    fileSys.mkdirs(dest.getParent());
    FSDataOutputStream outputStr = fileSys.create(dest);
    seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, codec);
    String filename = file.getName();
    InputStream in = new BufferedInputStream(new FileInputStream(file));
    if (filename.endsWith(".bz2")) {
        in.read();
        in.read(); //snarf header
        in = new CBZip2InputStream(in);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII"));

    System.out.println("working on file " + file);
    int records = 0;
    long bytes = 0, bytes_since_status = 0;
    long startTime = System.currentTimeMillis();
    String s = null;
    Text content = new Text();
    while ((s = br.readLine()) != null) {
        if (s.startsWith("---END.OF.DOCUMENT---")) {
            Text name = new Text(hash(content));
            seqFileWriter.append(name, content);
            records++;
            content = new Text();
        } else {
            byte[] line_as_bytes = (s + " ").getBytes();
            for (byte b : line_as_bytes) {
                assert b < 128 : "found an unexpected high-bit set";
            }

            content.append(line_as_bytes, 0, line_as_bytes.length);
            bytes += line_as_bytes.length;
            /*
            bytes_since_status += line_as_bytes.length;
            if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB
              System.err.print('.');
              bytes_since_status = 0;
            }*/
        }
    } //end while
    if (content.getLength() > 5) {
        Text name = new Text(hash(content));
        seqFileWriter.append(name, content);
        records++;
    }
    totalBytes += bytes;
    totalRecords += records;
    long time = (System.currentTimeMillis() - startTime) / 1000 + 1;
    long kbSec = bytes / 1024 / time;
    System.out.println(new java.util.Date());
    System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time
            + " seconds (" + kbSec + " KB/sec).");
    in.close();
    seqFileWriter.close();
    outputStr.close();
}

From source file:Vectors.java

License:Apache License

public static void write(Vector vector, Path path, Configuration conf, boolean laxPrecision)
        throws IOException {
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    FSDataOutputStream out = fs.create(path);
    try {//from w  w w.ja va  2s  . c o m
        VectorWritable vectorWritable = new VectorWritable(vector);
        vectorWritable.setWritesLaxPrecision(laxPrecision);
        vectorWritable.write(out);
    } finally {
        Closeables.closeQuietly(out);
    }
}

From source file:acromusashi.stream.bolt.hdfs.HdfsStreamWriter.java

License:Open Source License

/**
 * ??HDFS??Open?/*from   w  ww  .  j  a v a  2  s . c om*/
 * 
 * @param filePath HDFS
 * @param fs 
 * @param isFileSyncEachTime ?????????
 * @throws IOException Open
 */
public void open(String filePath, FileSystem fs, boolean isFileSyncEachTime) throws IOException {
    Path dstPath = new Path(filePath);

    if (fs.exists(dstPath) == true) {
        this.delegateStream = fs.append(dstPath);
    } else {
        this.delegateStream = fs.create(dstPath);
    }

    this.isFileSyncEachTime = isFileSyncEachTime;
}

From source file:alluxio.hadoop.FileSystemAclIntegrationTest.java

License:Apache License

private static void create(org.apache.hadoop.fs.FileSystem fs, Path path) throws IOException {
    FSDataOutputStream o = fs.create(path);
    o.writeBytes("Test Bytes");
    o.close();// w  ww  .  ja  v  a  2 s  . c o  m
}

From source file:Assignment3_P2_MergeStockAverageCount.StockPriceMergeDriver.java

/**
 * @param args the command line arguments
 */// w  ww . j  a  v a 2s.  co  m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();

    // local file system handle
    FileSystem local = FileSystem.getLocal(conf);

    // hdfs file system handle
    FileSystem hdfs = FileSystem.get(conf);

    // local input directory
    Path inputDir = new Path(args[0]);

    // hdfs i/p  directory
    Path inputDir1 = new Path(args[1]);

    // local input files in local dir
    FileStatus[] inputFiles = local.listStatus(inputDir);

    // o/p stream
    FSDataOutputStream out = hdfs.create(inputDir1);

    // open each file and extract contents of file
    for (int i = 0; i < inputFiles.length; i++) {
        System.out.println("File name ----------------------------------------------------------------> "
                + inputFiles[i].getPath().getName());
        FSDataInputStream in = local.open(inputFiles[i].getPath());
        byte buffer[] = new byte[256];
        int bytesRead = 0;

        // extract all contents of file
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }

        // close input stream
        in.close();
    }

    Job job = Job.getInstance(conf, "Average Stock Price");
    job.setJarByClass(StockPriceMergeDriver.class);
    job.setMapperClass(StockPriceMerge_Mapper.class);
    job.setCombinerClass(StockPriceMerge_Reducer.class);
    job.setReducerClass(StockPriceMerge_Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[1])); // above programs output will be input for mapper
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

@Override
public void bsp(BSPPeer<IntWritable, NullWritable, IntWritable, NullWritable, NullWritable> peer)
        throws IOException, SyncException, InterruptedException {

    BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration());
    FileSystem fs = FileSystem.get(peer.getConfiguration());
    FSDataOutputStream outStream = fs
            .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log"));

    outStream.writeChars("HelloHybrid.bsp executed on CPU!\n");

    ArrayList<Integer> summation = new ArrayList<Integer>();

    // test input
    IntWritable key = new IntWritable();
    NullWritable nullValue = NullWritable.get();

    while (peer.readNext(key, nullValue)) {
        outStream.writeChars("input: key: '" + key.get() + "'\n");
        summation.add(key.get());//from ww w .j a  v a2 s. c  o  m
    }

    // test sequenceFileReader
    Path example = new Path(peer.getConfiguration().get(CONF_EXAMPLE_PATH));
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, example, peer.getConfiguration());

        int i = 0;
        while (reader.next(key, nullValue)) {
            outStream.writeChars("sequenceFileReader: key: '" + key.get() + "'\n");
            if (i < summation.size()) {
                summation.set(i, summation.get(i) + key.get());
            }
            i++;
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    // test output
    for (Integer i : summation) {
        key.set(i);
        outStream.writeChars("output: key: '" + key.get() + "'\n");
        peer.write(key, nullValue);
    }

    // test getAllPeerNames
    outStream.writeChars("getAllPeerNames: '" + Arrays.toString(peer.getAllPeerNames()) + "'\n");

    // test String.split
    String splitString = "boo:and:foo";
    String[] splits;

    outStream.writeChars("splitString: '" + splitString + "'\n");

    splits = splitString.split(":");
    outStream.writeChars("split(\":\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(":", 2);
    outStream.writeChars(
            "split(\":\",2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(":", 5);
    outStream.writeChars(
            "split(\":\",5) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(":", -2);
    outStream.writeChars(
            "split(\":\",-2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(";");
    outStream.writeChars("split(\";\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    outStream.close();
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

@Override
public void bspGpu(BSPPeer<IntWritable, NullWritable, IntWritable, NullWritable, NullWritable> peer,
        Rootbeer rootbeer) throws IOException, SyncException, InterruptedException {

    BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration());
    FileSystem fs = FileSystem.get(peer.getConfiguration());
    FSDataOutputStream outStream = fs
            .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log"));

    outStream.writeChars("HelloHybrid.bspGpu executed on GPU!\n");

    HelloHybridKernel kernel = new HelloHybridKernel(peer.getConfiguration().get(CONF_EXAMPLE_PATH), CONF_N,
            "boo:and:foo", ":");

    // Run GPU Kernels
    Context context = rootbeer.createDefaultContext();
    Stopwatch watch = new Stopwatch();
    watch.start();//from  ww w  .  j  a  v  a 2s  . c om
    // 1 Kernel within 1 Block
    rootbeer.run(kernel, new ThreadConfig(1, 1, 1), context);
    watch.stop();

    List<StatsRow> stats = context.getStats();
    for (StatsRow row : stats) {
        outStream.writeChars("  StatsRow:\n");
        outStream.writeChars("    serial time: " + row.getSerializationTime() + "\n");
        outStream.writeChars("    exec time: " + row.getExecutionTime() + "\n");
        outStream.writeChars("    deserial time: " + row.getDeserializationTime() + "\n");
        outStream.writeChars("    num blocks: " + row.getNumBlocks() + "\n");
        outStream.writeChars("    num threads: " + row.getNumThreads() + "\n");
    }

    outStream.writeChars("HelloHybridKernel,GPUTime=" + watch.elapsedTimeMillis() + "ms\n");
    outStream.writeChars("HelloHybridKernel,peerName: '" + kernel.peerName + "'\n");
    outStream.writeChars("HelloHybridKernel,numPeers: '" + kernel.numPeers + "'\n");
    outStream.writeChars("HelloHybridKernel,summation: '" + Arrays.toString(kernel.summation) + "'\n");
    outStream.writeChars("HelloHybridKernel,getAllPeerNames: '" + Arrays.toString(kernel.allPeerNames) + "'\n");

    // test String.split
    outStream.writeChars("HelloHybridKernel,splitString: '" + kernel.splitString + "'\n");
    outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\") len: " + kernel.splits1.length
            + " values: '" + Arrays.toString(kernel.splits1) + "'\n");
    outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",2) len: " + kernel.splits2.length
            + " values: '" + Arrays.toString(kernel.splits2) + "'\n");
    outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",5) len: " + kernel.splits3.length
            + " values: '" + Arrays.toString(kernel.splits3) + "'\n");
    outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",-2) len: "
            + kernel.splits4.length + " values: '" + Arrays.toString(kernel.splits4) + "'\n");
    outStream.writeChars("HelloHybridKernel,split(\";\") len: " + kernel.splits5.length + " values: '"
            + Arrays.toString(kernel.splits5) + "'\n");

    outStream.close();
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

/********************************* CPU *********************************/
@Override/*from   ww w.j a v  a  2s . c o m*/
public void setup(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
        throws IOException {

    this.m_conf = peer.getConfiguration();
    this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false);
    this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);
    this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1);

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(m_conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    long startTime = 0;
    if (m_timeMeasurement) {
        startTime = System.currentTimeMillis();
    }

    // Init center vectors
    Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH));
    FileSystem fs = FileSystem.get(m_conf);

    final ArrayList<DoubleVector> centers = new ArrayList<DoubleVector>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, centroids, m_conf);
        PipesVectorWritable key = new PipesVectorWritable();
        NullWritable value = NullWritable.get();
        while (reader.next(key, value)) {
            DoubleVector center = key.getVector();
            centers.add(center);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!");

    this.m_centers_cpu = centers.toArray(new DoubleVector[centers.size()]);

    long stopTime = 0;
    if (m_timeMeasurement) {
        stopTime = System.currentTimeMillis();
        LOG.info("# setupTime: " + ((stopTime - startTime) / 1000.0) + " sec");
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("PiEstimatorHybrid,setupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

/********************************* GPU *********************************/
@Override/* w  ww . ja  va2  s. co  m*/
public void setupGpu(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
        throws IOException, SyncException, InterruptedException {

    this.m_conf = peer.getConfiguration();
    this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false);
    this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);
    this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1);
    this.m_blockSize = Integer.parseInt(this.m_conf.get(CONF_BLOCKSIZE));
    this.m_gridSize = Integer.parseInt(this.m_conf.get(CONF_GRIDSIZE));

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(m_conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    long startTime = 0;
    if (m_timeMeasurement) {
        startTime = System.currentTimeMillis();
    }

    // Init center vectors
    Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH));
    FileSystem fs = FileSystem.get(m_conf);

    final List<double[]> centers = new ArrayList<double[]>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, centroids, m_conf);
        PipesVectorWritable key = new PipesVectorWritable();
        NullWritable value = NullWritable.get();
        while (reader.next(key, value)) {
            centers.add(key.getVector().toArray());
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!");

    // build centers_gpu double[][]
    this.m_centers_gpu = new double[centers.size()][centers.get(0).length];
    for (int i = 0; i < centers.size(); i++) {
        double[] vector = centers.get(i);
        for (int j = 0; j < vector.length; j++) {
            this.m_centers_gpu[i][j] = vector[j];
        }
    }

    long stopTime = 0;
    if (m_timeMeasurement) {
        stopTime = System.currentTimeMillis();
        LOG.info("# setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec");
        if (m_isDebuggingEnabled) {
            m_logger.writeChars(
                    "PiEstimatorHybrid,setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java

License:Apache License

/********************************* CPU *********************************/
@Override/*from   ww  w  .j a va  2s .  co  m*/
public void setup(
        BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer)
        throws IOException {

    HamaConfiguration conf = peer.getConfiguration();
    this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false);

    // Choose one as a master, who sorts the matrix rows at the end
    // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2);

    // TODO
    // task must be 0 otherwise write out does NOT work!
    this.m_masterTask = peer.getPeerName(0);

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Load transposed Matrix B
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf),
            new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf);

    IntWritable bKey = new IntWritable();
    PipesVectorWritable bVector = new PipesVectorWritable();

    // for each col of matrix B (cause by transposed B)
    while (reader.next(bKey, bVector)) {
        m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector()));
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value="
                    + bVector.getVector().toString() + "\n");
        }
    }
    reader.close();
}