Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:cmd.freebase2hdfs.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input = null;// w  w  w  .j a v  a2s .c  om
    String output = null;
    if (args.length == 1) {
        input = "http://download.freebase.com/datadumps/latest/freebase-datadump-quadruples.tsv.bz2";
        output = args[0];
    } else if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.printf(
                "Usage: %s [generic options] [<http://path/to/freebase/datadump>] <hdfs://path/to/destination>\n",
                getClass().getName());
        System.err.println(
                "[<http://path/to/freebase/datadump>] is optional, it defaults to http://download.freebase.com/datadumps/latest/freebase-datadump-quadruples.tsv.bz2\n");
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path outputPath = new Path(output);
    InputStream in = new URL(input).openStream();
    FSDataOutputStream out = fs.create(outputPath);
    IOUtils.copyBytes(in, out, BUFFER_SIZE, true);

    return 0;
}

From source file:cmd.tdbloader4.java

License:Apache License

private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException {
    log.debug("Creating offsets file...");
    Map<Long, Long> offsets = new TreeMap<Long, Long>();
    FileStatus[] status = fs.listStatus(new Path(input));
    for (FileStatus fileStatus : status) {
        Path file = fileStatus.getPath();
        if (file.getName().startsWith("part-r-")) {
            log.debug("Processing: {}", file.getName());
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file)));
            String line = in.readLine();
            String[] tokens = line.split("\\s");
            long partition = Long.valueOf(tokens[0]);
            long offset = Long.valueOf(tokens[1]);
            log.debug("Partition {} has offset {}", partition, offset);
            offsets.put(partition, offset);
        }/*from ww w.  ja  va  2  s .  c  o  m*/
    }

    Path outputPath = new Path(output, Constants.OFFSETS_FILENAME);
    PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath)));
    for (Long partition : offsets.keySet()) {
        out.println(partition + "\t" + offsets.get(partition));
    }
    out.close();
    log.debug("Offset file created.");
}

From source file:cn.lhfei.hadoop.ch04.FileDecompressor.java

License:Apache License

/**
 * use case: % hadoop FileDecompressor file.gz
 * @param args/*from w  ww  .jav  a  2s  .  c om*/
 */
public static void main(String[] args) {
    FileSystem fs = null;
    String uri = args[0];
    Path inputPath = null;
    Configuration conf = new Configuration();
    CompressionCodecFactory factory = null;

    InputStream in = null;
    OutputStream out = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);
        inputPath = new Path(uri);
        factory = new CompressionCodecFactory(conf);
        CompressionCodec codec = factory.getCodec(inputPath);
        if (codec == null) {
            System.err.println("No codec found for " + uri);
            System.exit(1);
        }

        String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());

        in = codec.createInputStream(fs.open(inputPath));
        out = fs.create(new Path(outputUri));

        IOUtils.copyBytes(in, out, conf);

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(in);
        IOUtils.closeStream(out);
    }
}

From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java

License:Apache License

private static void writeMetadataFile(Path outputPath, ParquetMetadata metadataFooter, FileSystem fs,
        String parquetMetadataFile) throws IOException {
    Path metaDataPath = new Path(outputPath, parquetMetadataFile);
    FSDataOutputStream metadata = fs.create(metaDataPath);
    metadata.write(MAGIC);//from  w  w  w .  ja  v  a  2  s . co  m
    serializeFooter(metadataFooter, metadata);
    metadata.close();
}

From source file:co.cask.cdap.template.etl.batch.ETLMapReduceTest.java

License:Apache License

@Test
public void testFiletoTPFS() throws Exception {
    String filePath = "file:///tmp/test/text.txt";
    String testData = "String for testing purposes.";

    Path textFile = new Path(filePath);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FSDataOutputStream writeData = fs.create(textFile);
    writeData.write(testData.getBytes());
    writeData.flush();// www  . ja  v  a2  s.c om
    writeData.close();

    ETLStage source = new ETLStage("File", ImmutableMap.<String, String>builder()
            .put(Properties.File.FILESYSTEM, "Text").put(Properties.File.PATH, filePath).build());

    ETLStage sink = new ETLStage("TPFSAvro",
            ImmutableMap.of(Properties.TimePartitionedFileSetDataset.SCHEMA,
                    FileBatchSource.DEFAULT_SCHEMA.toString(),
                    Properties.TimePartitionedFileSetDataset.TPFS_NAME, "fileSink"));
    ETLBatchConfig etlConfig = new ETLBatchConfig("* * * * *", source, sink, Lists.<ETLStage>newArrayList());
    AdapterConfig adapterConfig = new AdapterConfig("", TEMPLATE_ID.getId(), GSON.toJsonTree(etlConfig));
    Id.Adapter adapterId = Id.Adapter.from(NAMESPACE, "testFileAdapter");
    AdapterManager manager = createAdapter(adapterId, adapterConfig);

    manager.start();
    manager.waitForOneRunToFinish(2, TimeUnit.MINUTES);
    manager.stop();

    DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset("fileSink");
    TimePartitionedFileSet fileSet = fileSetManager.get();
    List<GenericRecord> records = readOutput(fileSet, FileBatchSource.DEFAULT_SCHEMA);
    Assert.assertEquals(1, records.size());
    Assert.assertEquals(testData, records.get(0).get("body").toString());
    fileSet.close();
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

@Override
public void run(ActionContext context) throws Exception {
    Path destination = new Path(config.getDestDirectory());
    FileSystem fileSystem = FileSystem.get(new Configuration());
    destination = fileSystem.makeQualified(destination);
    if (!fileSystem.exists(destination)) {
        fileSystem.mkdirs(destination);/*ww  w  .  ja  v  a 2 s .com*/
    }

    FTPClient ftp;
    if ("ftp".equals(config.getProtocol().toLowerCase())) {
        ftp = new FTPClient();
    } else {
        ftp = new FTPSClient();
    }
    ftp.setControlKeepAliveTimeout(5);
    // UNIX type server
    FTPClientConfig ftpConfig = new FTPClientConfig();
    // Set additional parameters required for the ftp
    // for example config.setServerTimeZoneId("Pacific/Pitcairn")
    ftp.configure(ftpConfig);
    try {
        ftp.connect(config.getHost(), config.getPort());
        ftp.enterLocalPassiveMode();
        String replyString = ftp.getReplyString();
        LOG.info("Connected to server {} and port {} with reply from connect as {}.", config.getHost(),
                config.getPort(), replyString);

        // Check the reply code for actual success
        int replyCode = ftp.getReplyCode();

        if (!FTPReply.isPositiveCompletion(replyCode)) {
            ftp.disconnect();
            throw new RuntimeException(String.format("FTP server refused connection with code %s and reply %s.",
                    replyCode, replyString));
        }

        if (!ftp.login(config.getUserName(), config.getPassword())) {
            LOG.error("login command reply code {}, {}", ftp.getReplyCode(), ftp.getReplyString());
            ftp.logout();
            throw new RuntimeException(String.format(
                    "Login to the FTP server %s and port %s failed. " + "Please check user name and password.",
                    config.getHost(), config.getPort()));
        }

        FTPFile[] ftpFiles = ftp.listFiles(config.getSrcDirectory());
        LOG.info("listFiles command reply code: {}, {}.", ftp.getReplyCode(), ftp.getReplyString());
        // Check the reply code for listFiles call.
        // If its "522 Data connections must be encrypted" then it means data channel also need to be encrypted
        if (ftp.getReplyCode() == 522 && "sftp".equalsIgnoreCase(config.getProtocol())) {
            // encrypt data channel and listFiles again
            ((FTPSClient) ftp).execPROT("P");
            LOG.info("Attempting command listFiles on encrypted data channel.");
            ftpFiles = ftp.listFiles(config.getSrcDirectory());
        }
        for (FTPFile file : ftpFiles) {
            String source = config.getSrcDirectory() + "/" + file.getName();

            LOG.info("Current file {}, source {}", file.getName(), source);
            if (config.getExtractZipFiles() && file.getName().endsWith(".zip")) {
                copyZip(ftp, source, fileSystem, destination);
            } else {
                Path destinationPath = fileSystem.makeQualified(new Path(destination, file.getName()));
                LOG.debug("Downloading {} to {}", file.getName(), destinationPath.toString());
                try (OutputStream output = fileSystem.create(destinationPath)) {
                    InputStream is = ftp.retrieveFileStream(source);
                    ByteStreams.copy(is, output);
                }
            }
            if (!ftp.completePendingCommand()) {
                LOG.error("Error completing command.");
            }
        }
        ftp.logout();
    } finally {
        if (ftp.isConnected()) {
            try {
                ftp.disconnect();
            } catch (Throwable e) {
                LOG.error("Failure to disconnect the ftp connection.", e);
            }
        }
    }
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

private void copyZip(FTPClient ftp, String source, FileSystem fs, Path destination) throws IOException {
    InputStream is = ftp.retrieveFileStream(source);
    try (ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is))) {
        ZipEntry entry;/*from ww  w. j av  a2 s .  c  o m*/
        while ((entry = zis.getNextEntry()) != null) {
            LOG.debug("Extracting {}", entry);
            Path destinationPath = fs.makeQualified(new Path(destination, entry.getName()));
            try (OutputStream os = fs.create(destinationPath)) {
                LOG.debug("Downloading {} to {}", entry.getName(), destinationPath.toString());
                ByteStreams.copy(zis, os);
            }
        }
    }
}

From source file:co.cask.hydrator.plugin.batch.action.FileAction.java

License:Apache License

@SuppressWarnings("ConstantConditions")
@Override/*ww w.j  a  v a2  s .  c  om*/
public void run(BatchActionContext context) throws Exception {
    if (!config.shouldRun(context)) {
        return;
    }
    config.substituteMacros(context);

    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path[] paths;
    Path sourcePath = new Path(config.path);
    if (fileSystem.isDirectory(sourcePath)) {
        FileStatus[] status = fileSystem.listStatus(sourcePath);
        paths = FileUtil.stat2Paths(status);
    } else {
        paths = new Path[] { sourcePath };
    }

    //get regex pattern for file name filtering.
    boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern);
    if (patternSpecified) {
        regex = Pattern.compile(config.pattern);
    }

    switch (config.action.toLowerCase()) {
    case "delete":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                fileSystem.delete(path, true);
            }
        }
        break;
    case "move":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                Path targetFileMovePath = new Path(config.targetFolder, path.getName());
                fileSystem.rename(path, targetFileMovePath);
            }
        }
        break;
    case "archive":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                try (FSDataOutputStream archivedStream = fileSystem
                        .create(new Path(config.targetFolder, path.getName() + ".zip"));
                        ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream);
                        FSDataInputStream fdDataInputStream = fileSystem.open(path)) {
                    zipArchivedStream.putNextEntry(new ZipEntry(path.getName()));
                    int length;
                    byte[] buffer = new byte[1024];
                    while ((length = fdDataInputStream.read(buffer)) > 0) {
                        zipArchivedStream.write(buffer, 0, length);
                    }
                    zipArchivedStream.closeEntry();
                }
                fileSystem.delete(path, true);
            }
        }
        break;
    default:
        LOG.warn("No action required on the file.");
        break;
    }
}

From source file:co.cask.hydrator.plugin.batch.ETLMapReduceTestRun.java

License:Apache License

@Test
public void testFiletoMultipleTPFS() throws Exception {
    String filePath = "file:///tmp/test/text.txt";
    String testData = "String for testing purposes.";

    Path textFile = new Path(filePath);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FSDataOutputStream writeData = fs.create(textFile);
    writeData.write(testData.getBytes());
    writeData.flush();//from  w  ww. java 2s.com
    writeData.close();

    ETLStage source = new ETLStage("source", new ETLPlugin("File", BatchSource.PLUGIN_TYPE,
            ImmutableMap.<String, String>builder().put(Constants.Reference.REFERENCE_NAME, "TestFile")
                    .put(Properties.File.FILESYSTEM, "Text").put(Properties.File.PATH, filePath).build(),
            null));

    ETLStage sink1 = new ETLStage("sink1",
            new ETLPlugin("TPFSAvro", BatchSink.PLUGIN_TYPE,
                    ImmutableMap.of(Properties.TimePartitionedFileSetDataset.SCHEMA,
                            FileBatchSource.DEFAULT_SCHEMA.toString(),
                            Properties.TimePartitionedFileSetDataset.TPFS_NAME, "fileSink1"),
                    null));
    ETLStage sink2 = new ETLStage("sink2",
            new ETLPlugin("TPFSParquet", BatchSink.PLUGIN_TYPE,
                    ImmutableMap.of(Properties.TimePartitionedFileSetDataset.SCHEMA,
                            FileBatchSource.DEFAULT_SCHEMA.toString(),
                            Properties.TimePartitionedFileSetDataset.TPFS_NAME, "fileSink2"),
                    null));

    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink1)
            .addStage(sink2).addConnection(source.getName(), sink1.getName())
            .addConnection(source.getName(), sink2.getName()).build();

    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
    Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "FileToTPFS");
    ApplicationManager appManager = deployApplication(appId, appRequest);

    MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
    mrManager.start();
    mrManager.waitForFinish(2, TimeUnit.MINUTES);

    for (String sinkName : new String[] { "fileSink1", "fileSink2" }) {
        DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset(sinkName);
        try (TimePartitionedFileSet fileSet = fileSetManager.get()) {
            List<GenericRecord> records = readOutput(fileSet, FileBatchSource.DEFAULT_SCHEMA);
            Assert.assertEquals(1, records.size());
            Assert.assertEquals(testData, records.get(0).get("body").toString());
        }
    }
}

From source file:coldstorage.io.Writer.java

License:Apache License

public static void main(String[] args) throws IOException {
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse("{" + "\"namespace\": \"example.avro\", " + "\"type\": \"record\", "
            + "\"name\": \"User\", " + "\"fields\": [" + "     {\"name\": \"id\", \"type\": \"long\"},"
            + "     {\"name\": \"data\", \"type\": \"string\"}" + " ]}");

    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    // Path pathData = new Path("./out/data.avro");
    // Path pathIndex = new Path("./out/data.index");

    Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro");
    Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index");

    Configuration configuration = new Configuration();
    FileSystem fileSystem = pathData.getFileSystem(configuration);

    FSDataOutputStream indexOutputStream = fileSystem.create(pathIndex);
    FSDataOutputStream outputStream = fileSystem.create(pathData);
    dfw.create(schema, outputStream);//  www.jav a  2 s.com
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema);
    Random random = new Random(1);
    final int syncPoint = 1000;
    int count = 0;

    for (int i = 0; i < 100000000; i++) {
        genericRecordBuilder.set("id", (long) i);
        genericRecordBuilder.set("data", Long.toString(random.nextLong()));
        Record record = genericRecordBuilder.build();
        dfw.append(record);
        if (count >= syncPoint) {
            long sync = dfw.sync();
            Object object = record.get("id");
            writeIndex(indexOutputStream, sync, object);
            count = 0;
        }
        count++;
    }
    indexOutputStream.close();
    dfw.close();
}