Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:ColumnStorage.ColumnProject.java

License:Open Source License

void saveNavigator(FileSystem fs, Path naviPath) throws IOException {
    int size = infos.size();
    if (size == 0) {
        return;//  ww w  .  j  av a2s .  c  o m
    }

    try {
        FSDataOutputStream out = fs.create(naviPath);

        out.writeInt(ConstVar.NaviMagic);
        out.writeShort((short) size);

        for (int i = 0; i < size; i++) {
            saveColumnInfo(out, infos.get(i));
        }

        out.close();
    } catch (IOException e) {
        LOG.error("save Column info fail:" + e.getMessage());
    }

}

From source file:com.acme.io.JsonStorage.java

License:Apache License

/**
 * Store schema of the data being written
 * @param schema Schema to be recorded//  w  w w. j av a 2 s  .c  o  m
 * @param location Location as returned by 
 * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)}
 * @param job The {@link Job} object - this should be used only to obtain 
 * cluster properties through {@link Job#getConfiguration()} and not to
 * set/query any runtime job information.  
 * @throws IOException 
 */
public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException {
    // Store the schema in a side file in the same directory.  MapReduce
    // does not include files starting with "_" when reading data for a job.
    FileSystem fs = FileSystem.get(job.getConfiguration());
    DataOutputStream out = fs.create(new Path(location + "/_schema"));
    out.writeBytes(schema.toString());
    out.writeByte('\n');
    out.close();
}

From source file:com.adsame.samelogs.SameLogsSink.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override/*from  w w w .  j a va 2 s  .com*/
public void append(Event e) throws IOException {
    // append the event to the output
    byte[] fn = e.get(TailSource.A_TAILSRCFILE);
    byte[] bd = e.getBody();
    System.out.println("##" + new String(fn) + "##" + new String(bd));

    Map<String, byte[]> maps = e.getAttrs();

    Iterator iter = maps.entrySet().iterator();
    while (iter.hasNext()) {
        Map.Entry entry = (Map.Entry) iter.next();
        String key = (String) entry.getKey();
        System.out.println("key: " + key);
    }

    // here we are assuming the body is a string
    pw.println(new String(e.getBody()));
    pw.flush(); // so we can see it in the file right away

    Configuration configuration = new Configuration();
    FileSystem hdfsFileSystem = FileSystem.get(configuration);
    Path path = new Path("hdfs://nodie-Ubuntu4:9000/user/nodie/input/dfs/hello");
    FSDataOutputStream out;
    System.out.println("exists: " + hdfsFileSystem.exists(path));
    if (hdfsFileSystem.exists(path)) {
        out = hdfsFileSystem.append(path);
    } else {
        out = hdfsFileSystem.create(path);
    }

    out.write(e.getBody());
    out.writeChar('\n');
    out.flush();
    out.close();
}

From source file:com.alectenharmsel.research.LcCounters.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: LineCounter <input> <output>");
        System.exit(-1);//www .j  a  v  a 2 s .  c  om
    }

    Job job = new Job(getConf(), "LineCount");
    job.setJarByClass(LineCount.class);

    job.setInputFormatClass(WholeBlockInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LineCountMapper.class);
    job.setReducerClass(LineCountReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    Configuration check = job.getConfiguration();
    boolean success = job.waitForCompletion(true);

    //Get the counter here, output to a file called total in the dir
    Counters counters = job.getCounters();

    //Throw it in the file
    Path outPath = new Path(args[1]);
    FileSystem fs = outPath.getFileSystem(check);
    OutputStream out = fs.create(new Path(outPath, "total"));
    String total = counters.findCounter(LcCounters.NUM_LINES).getValue() + "\n";
    out.write(total.getBytes());
    out.close();
    return success ? 0 : 1;
}

From source file:com.alexholmes.hadooputils.io.FileUtils.java

License:Apache License

/**
 * Writes the array list into a file as newline-separated lines.
 *
 * @param fs a Hadoop file system//w  w  w  .j a v  a2  s. c  om
 * @param p  the file path
 * @return array of lines to write to the file
 * @throws java.io.IOException if something goes wrong
 */
public static void writeLines(Collection<?> lines, final FileSystem fs, final Path p) throws IOException {
    OutputStream stream = fs.create(p);
    try {
        IOUtils.writeLines(lines, IOUtils.LINE_SEPARATOR, stream);
    } finally {
        stream.close();
    }
}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private void process(FileStatus srcFileStatus) throws IOException, InterruptedException {

    Path stagingFile = null;// www.  ja  va  2 s. c o  m
    FileSystem destFs = null;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();

    try {
        FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig());

        // run a script which can change the name of the file as well as
        // write out a new version of the file
        //
        if (config.getWorkScript() != null) {
            Path newSrcFile = stageSource(srcFileStatus);
            srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile);
        }

        Path srcFile = srcFileStatus.getPath();

        // get the target HDFS file
        //
        Path destFile = getHdfsTargetPath(srcFileStatus);

        if (config.getCodec() != null) {
            String ext = config.getCodec().getDefaultExtension();
            if (!destFile.getName().endsWith(ext)) {
                destFile = new Path(destFile.toString() + ext);
            }
        }

        destFs = destFile.getFileSystem(config.getConfig());

        // get the staging HDFS file
        //
        stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile);
        String batchId = srcFile.toString().substring(
                srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length());

        log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'"
                + "$batchId#" + batchId);

        // if the directory of the target file doesn't exist, attempt to
        // create it
        //
        Path destParentDir = destFile.getParent();
        if (!destFs.exists(destParentDir)) {
            log.info("event#Attempting creation of target directory: " + destParentDir.toUri());
            if (!destFs.mkdirs(destParentDir)) {
                throw new IOException("event#Failed to create target directory: " + destParentDir.toUri());
            }
        }

        // if the staging directory doesn't exist, attempt to create it
        //
        Path destStagingParentDir = stagingFile.getParent();
        if (!destFs.exists(destStagingParentDir)) {
            log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri());
            if (!destFs.mkdirs(destStagingParentDir)) {
                throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri());
            }
        }

        // copy the file
        //
        InputStream is = null;
        OutputStream os = null;
        CRC32 crc = new CRC32();
        try {
            is = new BufferedInputStream(srcFs.open(srcFile));
            if (config.isVerify()) {
                is = new CheckedInputStream(is, crc);
            }
            os = destFs.create(stagingFile);

            if (config.getCodec() != null) {
                os = config.getCodec().createOutputStream(os);
            }

            IOUtils.copyBytes(is, os, 4096, false);
        } finally {
            IOUtils.closeStream(is);
            IOUtils.closeStream(os);
        }

        long srcFileSize = srcFs.getFileStatus(srcFile).getLen();
        long destFileSize = destFs.getFileStatus(stagingFile).getLen();
        if (config.getCodec() == null && srcFileSize != destFileSize) {
            throw new IOException(
                    "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize);
        }

        log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#"
                + batchId);

        if (config.isVerify()) {
            verify(stagingFile, crc.getValue());
        }

        if (destFs.exists(destFile)) {
            destFs.delete(destFile, false);
        }

        log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'"
                + "$batchId#" + batchId);
        if (!destFs.rename(stagingFile, destFile)) {
            throw new IOException("event#Failed to rename file");
        }

        if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) {
            Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX);
            if (destFs.exists(lzoIndexPath)) {
                log.info("event#Deleting index file as it already exists");
                destFs.delete(lzoIndexPath, false);
            }
            indexer.index(destFile);
        }

        fileSystemManager.fileCopyComplete(srcFileStatus);

    } catch (Throwable t) {
        log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t);

        // delete the staging file if it still exists
        //
        try {
            if (destFs != null && destFs.exists(stagingFile)) {
                destFs.delete(stagingFile, false);
            }
        } catch (Throwable t2) {
            log.error("event#Failed to delete staging file " + stagingFile, t2);
        }

        fileSystemManager.fileCopyError(srcFileStatus);
    }

}

From source file:com.alexholmes.json.mapreduce.ExampleJob.java

License:Apache License

/**
 * Writes the contents of {@link #JSON} into a file in the job input directory in HDFS.
 *
 * @param conf     the Hadoop config/*from w ww  .ja  va  2  s  . c  o  m*/
 * @param inputDir the HDFS input directory where we'll write a file
 * @throws IOException if something goes wrong
 */
public static void writeInput(Configuration conf, Path inputDir) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    if (fs.exists(inputDir)) {
        throw new IOException(
                String.format("Input directory '%s' exists - please remove and rerun this example", inputDir));
    }

    OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt")));
    writer.write(JSON);
    IOUtils.closeStream(writer);
}

From source file:com.asakusafw.dag.runtime.internalio.HadoopInternalInputTaskInfoTest.java

License:Apache License

private static void put(FileSystem fs, Path path, String... values) throws IOException {
    try (ModelOutput<Text> out = InternalOutputHandler.create(fs.create(path), Text.class)) {
        Text buf = new Text();
        for (String value : values) {
            buf.set(value);//ww  w .  j a va  2  s .c o  m
            out.write(buf);
        }
    }
}

From source file:com.asakusafw.directio.tools.DirectIoCommandTestRoot.java

License:Apache License

/**
 * Creates a new indoubt transaction.//from   w w  w . ja v a  2s.co  m
 * @param executionId target execution id
 * @throws IOException if failed
 * @throws InterruptedException if interrupted
 */
protected void indoubt(String executionId) throws IOException, InterruptedException {
    Path txPath = HadoopDataSourceUtil.getTransactionInfoPath(conf, executionId);
    Path cmPath = HadoopDataSourceUtil.getCommitMarkPath(conf, executionId);
    FileSystem fs = txPath.getFileSystem(conf);
    fs.create(txPath).close();
    fs.create(cmPath).close();
    int index = 0;
    for (String path : repo.getContainerPaths()) {
        String id = repo.getRelatedId(path);
        DirectDataSource ds = repo.getRelatedDataSource(path);
        OutputTransactionContext txContext = HadoopDataSourceUtil.createContext(executionId, id);
        OutputAttemptContext aContext = new OutputAttemptContext(txContext.getTransactionId(),
                String.valueOf(index), txContext.getOutputId(), new Counter());

        ds.setupTransactionOutput(txContext);
        ds.setupAttemptOutput(aContext);
        try (ModelOutput<StringBuilder> output = ds.openOutput(aContext,
                SimpleDataDefinition.newInstance(StringBuilder.class, new MockFormat()), "", executionId,
                new Counter())) {
            output.write(new StringBuilder("Hello, world!"));
        }
        ds.commitAttemptOutput(aContext);
        ds.cleanupAttemptOutput(aContext);

        index++;
    }
}

From source file:com.asakusafw.m3bp.compiler.tester.externalio.TestIoTaskExecutor.java

License:Apache License

private <T extends Writable> void executeInput(String name, Class<T> dataType, List<Path> paths)
        throws IOException {
    Action<Object, Exception> action = inputs.get(name);
    Invariants.requireNonNull(action, () -> MessageFormat.format("missing input: {0}", name));
    Path path = new Path(paths.get(0).toString().replace('*', '_'));
    FileSystem fs = path.getFileSystem(configuration);
    try (ModelOutput<T> output = new TemporaryFileOutput<>(fs.create(path), dataType.getName(),
            OUTPUT_INIT_BUFFER_SIZE, OUTPUT_PAGE_SIZE)) {
        action.perform(output);/*from w w  w.  j  a  va 2  s  . co m*/
    } catch (Error | RuntimeException | IOException e) {
        throw e;
    } catch (Exception e) {
        throw new AssertionError(e);
    }
}