Example usage for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.kylinolap.dict.DictionaryManager.java

License:Apache License

private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException {

    InputStream in = this.getClass().getResourceAsStream("/com/kylinolap/dict/" + dataSetName + ".txt");
    if (in == null) // data set resource not found
        return null;

    ByteArrayOutputStream buf = new ByteArrayOutputStream();
    IOUtils.copy(in, buf);//from  www. j a v a2 s .co  m
    in.close();
    byte[] bytes = buf.toByteArray();

    Path tmpDataSetPath = new Path(
            tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt");

    FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir);
    boolean writtenNewFile = false;
    if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) {
        fs.mkdirs(tmpDataSetPath.getParent());
        FSDataOutputStream out = fs.create(tmpDataSetPath);
        IOUtils.copy(new ByteArrayInputStream(bytes), out);
        out.close();
        writtenNewFile = true;
    }

    String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString();
    if (writtenNewFile)
        logger.info("Dictionary temp data set file written to " + qualifiedPath);
    return qualifiedPath;
}

From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsReducer.java

License:Apache License

@Override
public void reduce(ShortWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    TblColRef col = columnList.get(key.get());

    HashSet<ByteArray> set = new HashSet<ByteArray>();
    for (Text textValue : values) {
        ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength()));
        set.add(value);//w  ww  .j ava2  s. c om
    }

    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get(BatchConstants.OUTPUT_PATH);
    FSDataOutputStream out = fs.create(new Path(outputPath, col.getName()));

    try {
        for (ByteArray value : set) {
            out.write(value.data);
            out.write('\n');
        }
    } finally {
        out.close();
    }

}

From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsReducer.java

License:Apache License

@Override
public void reduce(ShortWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    String columnName = columns[key.get()];

    HashSet<ByteArray> set = new HashSet<ByteArray>();
    for (Text textValue : values) {
        ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength()));
        set.add(value);/*  w ww . j a v  a  2s.c o  m*/
    }

    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get(BatchConstants.OUTPUT_PATH);
    FSDataOutputStream out = fs.create(new Path(outputPath, columnName));

    try {
        for (ByteArray value : set) {
            out.write(value.data);
            out.write('\n');
        }
    } finally {
        out.close();
    }

}

From source file:com.kylinolap.job.tools.DeployCoprocessorCLI.java

License:Apache License

public static Path uploadCoprocessorJar(String localCoprocessorJar, FileSystem fileSystem,
        Set<String> oldJarPaths) throws IOException {
    Path uploadPath = null;//from  ww  w. j  a v  a2 s .c  o  m
    File localCoprocessorFile = new File(localCoprocessorJar);

    // check existing jars
    if (oldJarPaths == null) {
        oldJarPaths = new HashSet<String>();
    }
    Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, KylinConfig.getInstanceFromEnv());
    for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) {
        if (fileStatus.getLen() == localCoprocessorJar.length()
                && fileStatus.getModificationTime() == localCoprocessorFile.lastModified()) {
            uploadPath = fileStatus.getPath();
            break;
        }
        String filename = fileStatus.getPath().toString();
        if (filename.endsWith(".jar")) {
            oldJarPaths.add(filename);
        }
    }

    // upload if not existing
    if (uploadPath == null) {
        // figure out a unique new jar file name
        Set<String> oldJarNames = new HashSet<String>();
        for (String path : oldJarPaths) {
            oldJarNames.add(new Path(path).getName());
        }
        String baseName = getBaseFileName(localCoprocessorJar);
        String newName = null;
        int i = 0;
        while (newName == null) {
            newName = baseName + "-" + (i++) + ".jar";
            if (oldJarNames.contains(newName))
                newName = null;
        }

        // upload
        uploadPath = new Path(coprocessorDir, newName);
        FileInputStream in = null;
        FSDataOutputStream out = null;
        try {
            in = new FileInputStream(localCoprocessorFile);
            out = fileSystem.create(uploadPath);
            IOUtils.copy(in, out);
        } finally {
            IOUtils.closeQuietly(in);
            IOUtils.closeQuietly(out);
        }

        fileSystem.setTimes(uploadPath, localCoprocessorFile.lastModified(), System.currentTimeMillis());

    }

    uploadPath = uploadPath.makeQualified(fileSystem.getUri(), null);
    return uploadPath;
}

From source file:com.liferay.hadoop.action.HadoopJob.java

License:Open Source License

public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception {

    response.setContentType(ContentTypes.TEXT_PLAIN_UTF8);

    PrintWriter writer = response.getWriter();

    FileSystem fileSystem = HadoopManager.getFileSystem();

    JobClient jobClient = HadoopManager.getJobClient();

    writer.println("-- Job Status --");

    Path inputPath = new Path("/index/*/*");
    Path outputPath = new Path("/wordcount/results");

    try {/*from  w ww  .  j a v a 2 s.  c o m*/
        if (_runningJob == null) {
            writer.println("Creating job");

            if (fileSystem.exists(_jobPath)) {
                fileSystem.delete(_jobPath, false);
            }

            if (!fileSystem.exists(_jobPath)) {
                writer.println("Deploying the job code to cluster");

                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    ServletContext servletContext = HadoopManager.getServletContext();

                    InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }

                writer.println("Job code deployed to cluster");
            }

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = HadoopManager.createNewJobConf();

            _jobConf.setJobName("Word Count");

            writer.println("Job '" + _jobConf.getJobName() + "' is being configured");

            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            writer.println("Job code deployed to distributed cache's classpath");

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }

        int jobState = _runningJob.getJobState();

        writer.println(
                "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)");

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            writer.println("Re-issuing the job");

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }
    } catch (Exception ioe) {
        writer.println("Job error: ");

        ioe.printStackTrace(writer);
    }

    writer.flush();
    writer.close();

    return null;
}

From source file:com.liferay.hadoop.search.HadoopDLIndexerPostProcessor.java

License:Open Source License

public void postProcessDocument(Document document, Object obj) throws Exception {

    DLFileEntry dlFileEntry = (DLFileEntry) obj;

    long companyId = dlFileEntry.getCompanyId();
    long repositoryId = dlFileEntry.getRepositoryId();

    String stringObject = document.toString();

    // remove JSON chars

    stringObject = StringUtil.replace(stringObject, new String[] { "\"", ",", ":", "{", "}", "[", "]" },
            new String[] { StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE,
                    StringPool.SPACE, StringPool.SPACE, StringPool.SPACE });

    Path fullDirPath = HadoopManager.getFullDirPath(companyId, repositoryId, null);

    fullDirPath = new Path("/index".concat(fullDirPath.toString()));

    FSDataOutputStream outputStream = null;

    try {/*from  w  w  w. ja v a2 s.com*/
        FileSystem fileSystem = HadoopManager.getFileSystem();

        String suffix = StringPool.SLASH.concat(document.getUID());

        outputStream = fileSystem.create(fullDirPath.suffix(suffix));

        PrintWriter pw = new PrintWriter(outputStream);

        pw.write(stringObject);

        pw.flush();
        pw.close();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        StreamUtil.cleanUp(outputStream);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public void addFile(long companyId, long repositoryId, String fileName, InputStream is)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT);

    FSDataOutputStream outputStream = null;

    try {//from   w  ww .j  a v  a  2s .c  o  m
        FileSystem fileSystem = HadoopManager.getFileSystem();

        outputStream = fileSystem.create(fullPath);

        StreamUtil.transfer(is, outputStream, false);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    } finally {
        StreamUtil.cleanUp(outputStream);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public void updateFile(long companyId, long repositoryId, String fileName, String versionLabel,
        InputStream inputStream) throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel);

    FSDataOutputStream outputStream = null;

    try {//from  w  w w  .  ja v  a  2  s . c  o  m
        FileSystem fileSystem = HadoopManager.getFileSystem();

        outputStream = fileSystem.create(fullPath);

        StreamUtil.transfer(inputStream, outputStream, false);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    } finally {
        StreamUtil.cleanUp(outputStream);
    }
}

From source file:com.liferay.hadoop.util.HadoopManager.java

License:Open Source License

public static void runJob(StoreEvent storeEvent) throws IOException {
    FileSystem fileSystem = getFileSystem();

    if (_servletContext == null) {
        return;/*w  w w.  j av  a2  s  .c  o  m*/
    }

    JobClient jobClient = getJobClient();

    Path inputPath = new Path("/index".concat(storeEvent.getRootPath().toString()).concat("/*"));
    Path outputPath = new Path("/wordcount".concat(storeEvent.getRootPath().toString()).concat("/results"));

    try {
        if (_runningJob == null) {
            if (!fileSystem.exists(_jobPath)) {
                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    InputStream inputStream = _servletContext
                            .getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }
            }

            if (fileSystem.exists(outputPath)) {
                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = new JobConf(_sharedJobConf);

            _jobConf.setJobName("Word Count");
            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            _runningJob = jobClient.submitJob(_jobConf);
        }

        int jobState = _runningJob.getJobState();

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            System.out.println("Re-issuing the word count job.");

            if (fileSystem.exists(outputPath)) {
                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _runningJob = jobClient.submitJob(_jobConf);
        }
    } catch (Exception ioe) {
        ioe.printStackTrace();
    }
}

From source file:com.linkedin.cubert.io.avro.AvroTeeWriter.java

License:Open Source License

@Override
public void open(Configuration conf, JsonNode json, BlockSchema schema, Path root, String filename)
        throws IOException {
    Path teePath = new Path(root, filename + ".avro");
    FileSystem fs = FileSystem.get(conf);

    Schema avroSchema = AvroUtils.convertFromBlockSchema("record", schema);
    record = new Record(avroSchema);
    numColumns = schema.getNumColumns();

    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema);
    dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter.create(avroSchema, fs.create(teePath));
}