Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(URI uri, Configuration conf) throws IOException 

Source Link

Document

Get a FileSystem for this URI's scheme and authority.

Usage

From source file:com.alibaba.jstorm.hdfs.bolt.AvroGenericRecordBolt.java

License:Apache License

@Override
protected void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector)
        throws IOException {
    LOG.info("Preparing AvroGenericRecord Bolt...");
    this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
    Schema.Parser parser = new Schema.Parser();
    this.schema = parser.parse(this.schemaAsString);
}

From source file:com.alibaba.jstorm.hdfs.bolt.HdfsBolt.java

License:Apache License

@Override
public void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException {
    LOG.info("Preparing HDFS Bolt...");
    this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
}

From source file:com.alibaba.jstorm.hdfs.bolt.SequenceFileBolt.java

License:Apache License

@Override
public void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException {
    LOG.info("Preparing Sequence File Bolt...");
    if (this.format == null)
        throw new IllegalStateException("SequenceFormat must be specified.");

    this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
    this.codecFactory = new CompressionCodecFactory(hdfsConfig);
}

From source file:com.alibaba.jstorm.hdfs.common.security.AutoHDFS.java

License:Apache License

@SuppressWarnings("unchecked")
protected byte[] getHadoopCredentials(Map conf) {
    try {/*from   w  ww.  ja v a2 s  .c o m*/
        if (UserGroupInformation.isSecurityEnabled()) {
            final Configuration configuration = new Configuration();

            login(configuration);

            final String topologySubmitterUser = (String) conf.get(Config.TOPOLOGY_SUBMITTER_PRINCIPAL);

            final URI nameNodeURI = conf.containsKey(TOPOLOGY_HDFS_URI)
                    ? new URI(conf.get(TOPOLOGY_HDFS_URI).toString())
                    : FileSystem.getDefaultUri(configuration);

            UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

            final UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(topologySubmitterUser,
                    ugi);

            Credentials creds = (Credentials) proxyUser.doAs(new PrivilegedAction<Object>() {
                @Override
                public Object run() {
                    try {
                        FileSystem fileSystem = FileSystem.get(nameNodeURI, configuration);
                        Credentials credential = proxyUser.getCredentials();

                        fileSystem.addDelegationTokens(hdfsPrincipal, credential);
                        LOG.info("Delegation tokens acquired for user {}", topologySubmitterUser);
                        return credential;
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            });

            ByteArrayOutputStream bao = new ByteArrayOutputStream();
            ObjectOutputStream out = new ObjectOutputStream(bao);

            creds.write(out);
            out.flush();
            out.close();

            return bao.toByteArray();
        } else {
            throw new RuntimeException("Security is not enabled for HDFS");
        }
    } catch (Exception ex) {
        throw new RuntimeException("Failed to get delegation tokens.", ex);
    }
}

From source file:com.alibaba.jstorm.hdfs.spout.HdfsSpout.java

License:Apache License

public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    LOG.info("Opening HDFS Spout");
    this.conf = conf;
    this.commitTimer = new Timer();
    this.tracker = new ProgressTracker();
    this.hdfsConfig = new Configuration();

    this.collector = collector;
    this.hdfsConfig = new Configuration();
    this.tupleCounter = 0;

    // Hdfs related settings
    if (conf.containsKey(Configs.HDFS_URI)) {
        this.hdfsUri = conf.get(Configs.HDFS_URI).toString();
    } else {/*from w  w w .j av  a 2  s  . co  m*/
        throw new RuntimeException(Configs.HDFS_URI + " setting is required");
    }

    try {
        this.hdfs = FileSystem.get(URI.create(hdfsUri), hdfsConfig);
    } catch (IOException e) {
        LOG.error("Unable to instantiate file system", e);
        throw new RuntimeException("Unable to instantiate file system", e);
    }

    if (conf.containsKey(configKey)) {
        Map<String, Object> map = (Map<String, Object>) conf.get(configKey);
        if (map != null) {
            for (String keyName : map.keySet()) {
                LOG.info("HDFS Config override : {} = {} ", keyName, String.valueOf(map.get(keyName)));
                this.hdfsConfig.set(keyName, String.valueOf(map.get(keyName)));
            }
            try {
                HdfsSecurityUtil.login(conf, hdfsConfig);
            } catch (IOException e) {
                LOG.error("HDFS Login failed ", e);
                throw new RuntimeException(e);
            }
        } // if(map != null)
    }

    // Reader type config
    if (conf.containsKey(Configs.READER_TYPE)) {
        readerType = conf.get(Configs.READER_TYPE).toString();
        checkValidReader(readerType);
    }

    // -- source dir config
    if (!conf.containsKey(Configs.SOURCE_DIR)) {
        LOG.error(Configs.SOURCE_DIR + " setting is required");
        throw new RuntimeException(Configs.SOURCE_DIR + " setting is required");
    }
    this.sourceDirPath = new Path(conf.get(Configs.SOURCE_DIR).toString());

    // -- archive dir config
    if (!conf.containsKey(Configs.ARCHIVE_DIR)) {
        LOG.error(Configs.ARCHIVE_DIR + " setting is required");
        throw new RuntimeException(Configs.ARCHIVE_DIR + " setting is required");
    }
    this.archiveDirPath = new Path(conf.get(Configs.ARCHIVE_DIR).toString());
    validateOrMakeDir(hdfs, archiveDirPath, "Archive");

    // -- bad files dir config
    if (!conf.containsKey(Configs.BAD_DIR)) {
        LOG.error(Configs.BAD_DIR + " setting is required");
        throw new RuntimeException(Configs.BAD_DIR + " setting is required");
    }

    this.badFilesDirPath = new Path(conf.get(Configs.BAD_DIR).toString());
    validateOrMakeDir(hdfs, badFilesDirPath, "bad files");

    // -- ignore file names config
    if (conf.containsKey(Configs.IGNORE_SUFFIX)) {
        this.ignoreSuffix = conf.get(Configs.IGNORE_SUFFIX).toString();
    }

    // -- lock dir config
    String lockDir = !conf.containsKey(Configs.LOCK_DIR) ? getDefaultLockDir(sourceDirPath)
            : conf.get(Configs.LOCK_DIR).toString();
    this.lockDirPath = new Path(lockDir);
    validateOrMakeDir(hdfs, lockDirPath, "locks");

    // -- lock timeout
    if (conf.get(Configs.LOCK_TIMEOUT) != null) {
        this.lockTimeoutSec = Integer.parseInt(conf.get(Configs.LOCK_TIMEOUT).toString());
    }

    // -- enable/disable ACKing
    Object ackers = conf.get(Config.TOPOLOGY_ACKER_EXECUTORS);
    if (ackers != null) {
        int ackerCount = Integer.parseInt(ackers.toString());
        this.ackEnabled = (ackerCount > 0);
        LOG.debug("ACKer count = {}", ackerCount);
    } else { // ackers==null when ackerCount not explicitly set on the topology
        this.ackEnabled = true;
        LOG.debug("ACK count not explicitly set on topology.");
    }

    LOG.info("ACK mode is {}", ackEnabled ? "enabled" : "disabled");

    // -- commit frequency - count
    if (conf.get(Configs.COMMIT_FREQ_COUNT) != null) {
        commitFrequencyCount = Integer.parseInt(conf.get(Configs.COMMIT_FREQ_COUNT).toString());
    }

    // -- commit frequency - seconds
    if (conf.get(Configs.COMMIT_FREQ_SEC) != null) {
        commitFrequencySec = Integer.parseInt(conf.get(Configs.COMMIT_FREQ_SEC).toString());
        if (commitFrequencySec <= 0) {
            throw new RuntimeException(Configs.COMMIT_FREQ_SEC + " setting must be greater than 0");
        }
    }

    // -- max outstanding tuples
    if (conf.get(Configs.MAX_OUTSTANDING) != null) {
        maxOutstanding = Integer.parseInt(conf.get(Configs.MAX_OUTSTANDING).toString());
    }

    // -- clocks in sync
    if (conf.get(Configs.CLOCKS_INSYNC) != null) {
        clocksInSync = Boolean.parseBoolean(conf.get(Configs.CLOCKS_INSYNC).toString());
    }

    // -- spout id
    spoutId = context.getThisComponentId();

    // setup timer for commit elapse time tracking
    setupCommitElapseTimer();
}

From source file:com.aliyun.fs.utils.OssInputUtils.java

License:Apache License

public FileSplit[] getSplits(String file, int numSplits) throws IOException {
    Path path = new Path(file);
    this.fs = FileSystem.get(path.toUri(), conf);
    fs.initialize(path.toUri(), conf);//  w w  w  .j  a v  a  2s  .  c om

    FileStatus[] files = fs.listStatus(path);
    long totalSize = 0;
    for (FileStatus file1 : files) {
        if (file1.isDirectory()) {
            throw new IOException("Not a file: " + file1.getPath());
        }
        totalSize += file1.getLen();
    }

    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math
            .max(conf.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), 1);

    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    for (FileStatus file2 : files) {
        Path fp = file2.getPath();
        long length = file2.getLen();
        if (length != 0) {
            long splitSize = Math.max(minSize, goalSize);
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                FileSplit split = new FileSplit(fp, length - bytesRemaining, splitSize, new String[0]);
                splits.add(split);
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                FileSplit split = new FileSplit(fp, length - bytesRemaining, bytesRemaining, new String[0]);
                splits.add(split);
            }
        }
    }
    LOG.info("Total # of splits: " + splits.size());
    return splits.toArray(new FileSplit[splits.size()]);
}

From source file:com.aliyun.fs.utils.OssInputUtils.java

License:Apache License

public RecordReader<LongWritable, Text> getOssRecordReader(FileSplit fileSplit, Configuration conf)
        throws IOException {
    String delimiter = conf.get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter) {
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    }//from w ww  .  ja  v a  2s. c om

    if (fs == null) {
        this.fs = FileSystem.get(fileSplit.getPath().toUri(), conf);
        fs.initialize(fileSplit.getPath().toUri(), conf);
    }

    return new OssRecordReader(conf, fileSplit, fs, recordDelimiterBytes);
}

From source file:com.asakusafw.bulkloader.cache.CacheBuildTest.java

License:Apache License

/**
 * Initializes the test./* w ww .ja v  a  2s.  com*/
 * @throws Exception if some errors were occurred
 */
@Before
public void setUp() throws Exception {
    URI uri = getTargetUri();
    FileSystem fs = FileSystem.get(uri, getConfiguration());
    fs.delete(new Path(uri), true);
}

From source file:com.asakusafw.bulkloader.collector.ExportFileSend.java

License:Apache License

/**
 * ????TSV??/*from w  w w.  jav  a  2  s. c o  m*/
 * {@link com.asakusafw.bulkloader.transfer.FileList.Writer}????
 * @param <T> ?
 * @param targetTableModel Export??Model?
 * @param filePath Export
 * @param writer ?Writer
 * @param tableName ??
 * @return ?????????????????? -1
 * @throws BulkLoaderSystemException ??????
 */
protected <T extends Writable> long send(Class<T> targetTableModel, String filePath, FileList.Writer writer,
        String tableName) throws BulkLoaderSystemException {
    FileSystem fs = null;
    String fileName = null;

    // ??
    long maxSize = Long.parseLong(ConfigurationLoader.getProperty(Constants.PROP_KEY_EXP_LOAD_MAX_SIZE));

    try {
        TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel);
        Configuration conf = new Configuration();
        fs = FileSystem.get(new URI(filePath), conf);

        // ?????
        FileStatus[] status = fs.globStatus(new Path(filePath));
        Path[] listedPaths = FileUtil.stat2Paths(status);
        if (listedPaths == null) {
            LOG.info("TG-COLLECTOR-02006", tableName, filePath);
            return -1;
        } else {
            LOG.info("TG-COLLECTOR-02007", listedPaths.length, tableName, filePath);
        }
        long count = 0;
        boolean addEntry = false;
        for (Path path : listedPaths) {
            // ?????
            if (isSystemFile(path)) {
                continue;
            }

            // TODO ????
            // ??????
            ModelInput<T> input = TemporaryStorage.openInput(conf, targetTableModel, path);
            try {
                while (true) {
                    // 
                    addEntry = true;
                    fileName = FileNameUtil.createSendExportFileName(tableName, fileNameMap);
                    OutputStream output = writer.openNext(FileList.content(fileName));
                    try {
                        CountingOutputStream counter = new CountingOutputStream(output);
                        ModelOutput<T> modelOut = factory.createModelOutput(counter);
                        T model = factory.createModelObject();
                        LOG.info("TG-COLLECTOR-02004", tableName, path.toString(), fileName);

                        // ???ModelTSV??
                        boolean nextFile = false;
                        while (input.readTo(model)) {
                            // Modol???
                            modelOut.write(model);
                            count++;
                            // ???????
                            // char?byte?????????
                            // ??????(????)
                            if (counter.getByteCount() > maxSize) {
                                nextFile = true;
                                break;
                            }
                        }
                        modelOut.close();
                        LOG.info("TG-COLLECTOR-02005", tableName, path.toString(), fileName);

                        if (nextFile) {
                            // ???????
                            continue;
                        } else {
                            // ????????
                            break;
                        }
                    } finally {
                        output.close();
                    }
                }
            } finally {
                input.close();
            }
        }
        if (addEntry) {
            return count;
        } else {
            assert count == 0;
            return -1;
        }
    } catch (IOException e) {
        throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat
                .format("HDFS?{0} ???{1}", filePath, fileName));
    } catch (URISyntaxException e) {
        throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001",
                MessageFormat.format("HDFS???HDFS?{0}", filePath));
    } finally {
        if (fs != null) {
            try {
                fs.close();
            } catch (IOException e) {
                throw new BulkLoaderSystemException(e, this.getClass(), "TG-COLLECTOR-02001",
                        MessageFormat.format(
                                "HDFS???URI{0}",
                                filePath));
            }
        }
    }
}