Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.inmobi.databus.purge.DataPurgerServiceTest.java

License:Apache License

private void createTestPurgefiles(FileSystem fs, Cluster cluster, Calendar date) throws Exception {
    for (String streamname : cluster.getSourceStreams()) {
        String[] files = new String[NUM_OF_FILES];
        String datapath = Cluster.getDateAsYYYYMMDDHHMNPath(date.getTime());
        String commitpath = cluster.getLocalFinalDestDirRoot() + File.separator + streamname + File.separator
                + datapath;/*from   www .j  a v  a  2s. c  om*/
        String mergecommitpath = cluster.getFinalDestDirRoot() + File.separator + streamname + File.separator
                + datapath;
        String trashpath = cluster.getTrashPath() + File.separator + CalendarHelper.getDateAsString(date)
                + File.separator;
        fs.mkdirs(new Path(commitpath));

        for (int j = 0; j < NUM_OF_FILES; ++j) {
            files[j] = new String(cluster.getName() + "-"
                    + TestLocalStreamService.getDateAsYYYYMMDDHHmm(new Date()) + "_" + idFormat.format(j));
            {
                Path path = new Path(commitpath + File.separator + files[j]);
                // LOG.info("Creating streams_local File " + path.getName());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test data for teststream " + path.toString());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }
            {
                Path path = new Path(mergecommitpath + File.separator + files[j]);
                // LOG.info("Creating streams File " + path.getName());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test data for teststream " + path.toString());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }

            {
                Path path = new Path(trashpath + File.separator + String.valueOf(date.get(Calendar.HOUR_OF_DAY))
                        + File.separator + files[j]);
                // LOG.info("Creating trash File " + path.toString());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test trash data for teststream " + path.getName());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }
        }
    }

}

From source file:com.inmobi.databus.utils.CollapseFilesInDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", args[0]);
    String dir = args[1];//  w  w w . j a  va2 s . co  m
    FileSystem fs = FileSystem.get(configuration);
    FileStatus[] fileList = fs.listStatus(new Path(dir));
    if (fileList != null) {
        if (fileList.length > 1) {
            Set<Path> sourceFiles = new HashSet<Path>();
            Set<String> consumePaths = new HashSet<String>();
            //inputPath has have multiple files due to backlog
            //read all and create a tmp file
            for (int i = 0; i < fileList.length; i++) {
                Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                sourceFiles.add(consumeFilePath);
                FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                try {
                    while (fsDataInputStream.available() > 0) {
                        String fileName = fsDataInputStream.readLine();
                        if (fileName != null) {
                            consumePaths.add(fileName.trim());
                            System.out.println("Adding [" + fileName + "] to pull");
                        }
                    }
                } finally {
                    fsDataInputStream.close();
                }
            }
            Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
            FSDataOutputStream out = fs.create(finalPath);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
            try {
                for (String consumePath : consumePaths) {
                    System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                    writer.write(consumePath);
                    writer.write("\n");
                }
            } finally {
                writer.close();
            }
            LOG.warn("Final File - [" + finalPath + "]");
            for (Path deletePath : sourceFiles) {
                System.out.println("Deleting - [" + deletePath + "]");
                fs.delete(deletePath);
            }
        }
    }
}

From source file:com.inmobi.grill.server.GrillServices.java

License:Apache License

private void persistGrillServiceState() throws IOException {
    if (conf.getBoolean(GrillConfConstants.GRILL_SERVER_RESTART_ENABLED,
            GrillConfConstants.DEFAULT_GRILL_SERVER_RESTART_ENABLED)) {
        FileSystem fs = persistDir.getFileSystem(conf);
        LOG.info("Persisting server state in " + persistDir);

        for (GrillService service : grillServices) {
            LOG.info("Persisting state of service:" + service.getName());
            Path serviceWritePath = new Path(persistDir, service.getName() + ".out");
            ObjectOutputStream out = null;
            try {
                out = new ObjectOutputStream(fs.create(serviceWritePath));
                service.writeExternal(out);
            } finally {
                if (out != null) {
                    out.close();//from  www  .j ava2 s . c o m
                }
            }
            Path servicePath = getServicePersistPath(service);
            fs.rename(serviceWritePath, servicePath);
            LOG.info("Persisted service " + service.getName() + " to " + servicePath);
        }
    } else {
        LOG.info("Server restart is not enabled. Not persisting the server state");
    }
}

From source file:com.inmobi.messaging.consumer.util.FileUtil.java

License:Apache License

public static void gzip(Path src, Path target, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FSDataOutputStream out = fs.create(target);
    GzipCodec gzipCodec = (GzipCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
    Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec);
    OutputStream compressedOut = gzipCodec.createOutputStream(out, gzipCompressor);
    FSDataInputStream in = fs.open(src);
    try {//from www. j av a 2  s .  c om
        IOUtils.copyBytes(in, compressedOut, conf);
    } catch (Exception e) {
        LOG.error("Error in compressing ", e);
    } finally {
        in.close();
        CodecPool.returnCompressor(gzipCompressor);
        compressedOut.close();
        out.close();
    }
}

From source file:com.inmobi.messaging.consumer.util.MessageUtil.java

License:Apache License

public static void createMessageFile(String fileName, FileSystem fs, Path parent, int msgIndex)
        throws IOException {
    FSDataOutputStream out = fs.create(new Path(parent, fileName));
    for (int i = 0; i < 100; i++) {
        out.write(Base64.encodeBase64(constructMessage(msgIndex).getBytes()));
        out.write('\n');
        msgIndex++;/* w w w.  j a v  a2s  .c o  m*/
    }
    out.close();
    TestUtil.LOG.debug("Created data file:" + new Path(parent, fileName));
}

From source file:com.inmobi.messaging.consumer.util.TestUtil.java

License:Apache License

public static void createEmptyFile(FileSystem fs, Path parent, String fileName) throws IOException {
    FSDataOutputStream out = fs.create(new Path(parent, fileName));
    LOG.debug("Created empty file:" + new Path(parent, fileName));
    out.close();/*  w  ww . j a v a 2  s.c  o  m*/
}

From source file:com.intel.hadoop.hbase.dot.TestHiveIntegration.java

License:Apache License

@BeforeClass
public static void setUp() throws Exception {
    Configuration config = TEST_UTIL.getConfiguration();
    config.set("hbase.coprocessor.region.classes", "com.intel.hadoop.hbase.dot.access.DataManipulationOps");
    config.set("hbase.coprocessor.master.classes", "com.intel.hadoop.hbase.dot.access.DataDefinitionOps");
    TEST_UTIL.startMiniCluster(1);/*from   w  w  w  . j a v a2 s .co m*/
    TEST_UTIL.startMiniMapReduceCluster();
    initialize(TEST_UTIL.getConfiguration());

    // 1. To put the test data onto miniDFS, and get the file path
    FileSystem fs = FileSystem.get(config);
    FSDataOutputStream output = fs.create(new Path("/tsvfile"));
    PrintStream out = new PrintStream(output);
    out.println("row1|row1_fd1|row1_fd2|row1_fd3|row1_fd4");
    out.println("row2|row2_fd1|row2_fd2|row2_fd3|row2_fd4");
    out.println("row3|row3_fd1|row3_fd2|row3_fd3|row3_fd4");
    out.println("row4|row4_fd1|row4_fd2|row4_fd3|row4_fd4");
    out.println("row5|row5_fd1|row5_fd2|row5_fd3|row5_fd4");
    out.close();
    output.close();

    // fs.copyFromLocalFile(new Path("./src/test/data/data"), new
    // Path("/tsvfile"));
    assertEquals("tsv file name is not correct", fs.listStatus(new Path("/tsvfile"))[0].getPath().getName(),
            "tsvfile");

}

From source file:com.jeffy.hdfs.compression.FileDecompressor.java

License:Apache License

/**
 * @param args//from  ww w .  ja v  a  2  s .  c  o m
 *            
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    //??
    Configuration conf = new Configuration();
    // ?
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    for (String uri : args) {
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path inputPath = new Path(uri);
        // ??????io.compression.codecs
        CompressionCodec codec = factory.getCodec(inputPath);
        // ??
        if (codec == null) {
            System.err.println("No codec found for " + uri);
            continue;
        }
        String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
        try (InputStream in = codec.createInputStream(fs.open(inputPath));
                OutputStream out = fs.create(new Path(outputUri))) {
            IOUtils.copyBytes(in, out, conf);
        }
    }
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.// w w  w .  j  av  a  2s .  co  m
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.kylinolap.common.persistence.HBaseResourceStore.java

License:Apache License

private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table)
        throws IOException {
    Path redirectPath = bigCellHDFSPath(resPath);
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    if (fileSystem.exists(redirectPath)) {
        fileSystem.delete(redirectPath, true);
    }//from  ww w.  j  a v a2s.  com

    FSDataOutputStream out = fileSystem.create(redirectPath);

    try {
        out.write(largeColumn);
    } finally {
        IOUtils.closeQuietly(out);
    }

    return redirectPath;
}