List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(URI uri, Configuration conf) throws IOException
From source file:com.alibaba.jstorm.hdfs.bolt.AvroGenericRecordBolt.java
License:Apache License
@Override protected void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException { LOG.info("Preparing AvroGenericRecord Bolt..."); this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig); Schema.Parser parser = new Schema.Parser(); this.schema = parser.parse(this.schemaAsString); }
From source file:com.alibaba.jstorm.hdfs.bolt.HdfsBolt.java
License:Apache License
@Override public void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException { LOG.info("Preparing HDFS Bolt..."); this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig); }
From source file:com.alibaba.jstorm.hdfs.bolt.SequenceFileBolt.java
License:Apache License
@Override public void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException { LOG.info("Preparing Sequence File Bolt..."); if (this.format == null) throw new IllegalStateException("SequenceFormat must be specified."); this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig); this.codecFactory = new CompressionCodecFactory(hdfsConfig); }
From source file:com.alibaba.jstorm.hdfs.common.security.AutoHDFS.java
License:Apache License
@SuppressWarnings("unchecked") protected byte[] getHadoopCredentials(Map conf) { try {/*from w ww. ja v a2 s .c o m*/ if (UserGroupInformation.isSecurityEnabled()) { final Configuration configuration = new Configuration(); login(configuration); final String topologySubmitterUser = (String) conf.get(Config.TOPOLOGY_SUBMITTER_PRINCIPAL); final URI nameNodeURI = conf.containsKey(TOPOLOGY_HDFS_URI) ? new URI(conf.get(TOPOLOGY_HDFS_URI).toString()) : FileSystem.getDefaultUri(configuration); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); final UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(topologySubmitterUser, ugi); Credentials creds = (Credentials) proxyUser.doAs(new PrivilegedAction<Object>() { @Override public Object run() { try { FileSystem fileSystem = FileSystem.get(nameNodeURI, configuration); Credentials credential = proxyUser.getCredentials(); fileSystem.addDelegationTokens(hdfsPrincipal, credential); LOG.info("Delegation tokens acquired for user {}", topologySubmitterUser); return credential; } catch (IOException e) { throw new RuntimeException(e); } } }); ByteArrayOutputStream bao = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream(bao); creds.write(out); out.flush(); out.close(); return bao.toByteArray(); } else { throw new RuntimeException("Security is not enabled for HDFS"); } } catch (Exception ex) { throw new RuntimeException("Failed to get delegation tokens.", ex); } }
From source file:com.alibaba.jstorm.hdfs.spout.HdfsSpout.java
License:Apache License
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { LOG.info("Opening HDFS Spout"); this.conf = conf; this.commitTimer = new Timer(); this.tracker = new ProgressTracker(); this.hdfsConfig = new Configuration(); this.collector = collector; this.hdfsConfig = new Configuration(); this.tupleCounter = 0; // Hdfs related settings if (conf.containsKey(Configs.HDFS_URI)) { this.hdfsUri = conf.get(Configs.HDFS_URI).toString(); } else {/*from w w w .j av a 2 s . co m*/ throw new RuntimeException(Configs.HDFS_URI + " setting is required"); } try { this.hdfs = FileSystem.get(URI.create(hdfsUri), hdfsConfig); } catch (IOException e) { LOG.error("Unable to instantiate file system", e); throw new RuntimeException("Unable to instantiate file system", e); } if (conf.containsKey(configKey)) { Map<String, Object> map = (Map<String, Object>) conf.get(configKey); if (map != null) { for (String keyName : map.keySet()) { LOG.info("HDFS Config override : {} = {} ", keyName, String.valueOf(map.get(keyName))); this.hdfsConfig.set(keyName, String.valueOf(map.get(keyName))); } try { HdfsSecurityUtil.login(conf, hdfsConfig); } catch (IOException e) { LOG.error("HDFS Login failed ", e); throw new RuntimeException(e); } } // if(map != null) } // Reader type config if (conf.containsKey(Configs.READER_TYPE)) { readerType = conf.get(Configs.READER_TYPE).toString(); checkValidReader(readerType); } // -- source dir config if (!conf.containsKey(Configs.SOURCE_DIR)) { LOG.error(Configs.SOURCE_DIR + " setting is required"); throw new RuntimeException(Configs.SOURCE_DIR + " setting is required"); } this.sourceDirPath = new Path(conf.get(Configs.SOURCE_DIR).toString()); // -- archive dir config if (!conf.containsKey(Configs.ARCHIVE_DIR)) { LOG.error(Configs.ARCHIVE_DIR + " setting is required"); throw new RuntimeException(Configs.ARCHIVE_DIR + " setting is required"); } this.archiveDirPath = new Path(conf.get(Configs.ARCHIVE_DIR).toString()); validateOrMakeDir(hdfs, archiveDirPath, "Archive"); // -- bad files dir config if (!conf.containsKey(Configs.BAD_DIR)) { LOG.error(Configs.BAD_DIR + " setting is required"); throw new RuntimeException(Configs.BAD_DIR + " setting is required"); } this.badFilesDirPath = new Path(conf.get(Configs.BAD_DIR).toString()); validateOrMakeDir(hdfs, badFilesDirPath, "bad files"); // -- ignore file names config if (conf.containsKey(Configs.IGNORE_SUFFIX)) { this.ignoreSuffix = conf.get(Configs.IGNORE_SUFFIX).toString(); } // -- lock dir config String lockDir = !conf.containsKey(Configs.LOCK_DIR) ? getDefaultLockDir(sourceDirPath) : conf.get(Configs.LOCK_DIR).toString(); this.lockDirPath = new Path(lockDir); validateOrMakeDir(hdfs, lockDirPath, "locks"); // -- lock timeout if (conf.get(Configs.LOCK_TIMEOUT) != null) { this.lockTimeoutSec = Integer.parseInt(conf.get(Configs.LOCK_TIMEOUT).toString()); } // -- enable/disable ACKing Object ackers = conf.get(Config.TOPOLOGY_ACKER_EXECUTORS); if (ackers != null) { int ackerCount = Integer.parseInt(ackers.toString()); this.ackEnabled = (ackerCount > 0); LOG.debug("ACKer count = {}", ackerCount); } else { // ackers==null when ackerCount not explicitly set on the topology this.ackEnabled = true; LOG.debug("ACK count not explicitly set on topology."); } LOG.info("ACK mode is {}", ackEnabled ? "enabled" : "disabled"); // -- commit frequency - count if (conf.get(Configs.COMMIT_FREQ_COUNT) != null) { commitFrequencyCount = Integer.parseInt(conf.get(Configs.COMMIT_FREQ_COUNT).toString()); } // -- commit frequency - seconds if (conf.get(Configs.COMMIT_FREQ_SEC) != null) { commitFrequencySec = Integer.parseInt(conf.get(Configs.COMMIT_FREQ_SEC).toString()); if (commitFrequencySec <= 0) { throw new RuntimeException(Configs.COMMIT_FREQ_SEC + " setting must be greater than 0"); } } // -- max outstanding tuples if (conf.get(Configs.MAX_OUTSTANDING) != null) { maxOutstanding = Integer.parseInt(conf.get(Configs.MAX_OUTSTANDING).toString()); } // -- clocks in sync if (conf.get(Configs.CLOCKS_INSYNC) != null) { clocksInSync = Boolean.parseBoolean(conf.get(Configs.CLOCKS_INSYNC).toString()); } // -- spout id spoutId = context.getThisComponentId(); // setup timer for commit elapse time tracking setupCommitElapseTimer(); }
From source file:com.aliyun.fs.utils.OssInputUtils.java
License:Apache License
public FileSplit[] getSplits(String file, int numSplits) throws IOException { Path path = new Path(file); this.fs = FileSystem.get(path.toUri(), conf); fs.initialize(path.toUri(), conf);// w w w .j a v a 2s . c om FileStatus[] files = fs.listStatus(path); long totalSize = 0; for (FileStatus file1 : files) { if (file1.isDirectory()) { throw new IOException("Not a file: " + file1.getPath()); } totalSize += file1.getLen(); } long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); long minSize = Math .max(conf.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), 1); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); for (FileStatus file2 : files) { Path fp = file2.getPath(); long length = file2.getLen(); if (length != 0) { long splitSize = Math.max(minSize, goalSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { FileSplit split = new FileSplit(fp, length - bytesRemaining, splitSize, new String[0]); splits.add(split); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { FileSplit split = new FileSplit(fp, length - bytesRemaining, bytesRemaining, new String[0]); splits.add(split); } } } LOG.info("Total # of splits: " + splits.size()); return splits.toArray(new FileSplit[splits.size()]); }
From source file:com.aliyun.fs.utils.OssInputUtils.java
License:Apache License
public RecordReader<LongWritable, Text> getOssRecordReader(FileSplit fileSplit, Configuration conf) throws IOException { String delimiter = conf.get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) { recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); }//from w ww . ja v a 2s. c om if (fs == null) { this.fs = FileSystem.get(fileSplit.getPath().toUri(), conf); fs.initialize(fileSplit.getPath().toUri(), conf); } return new OssRecordReader(conf, fileSplit, fs, recordDelimiterBytes); }
From source file:com.asakusafw.bulkloader.cache.CacheBuildTest.java
License:Apache License
/** * Initializes the test./* w ww .ja v a 2s. com*/ * @throws Exception if some errors were occurred */ @Before public void setUp() throws Exception { URI uri = getTargetUri(); FileSystem fs = FileSystem.get(uri, getConfiguration()); fs.delete(new Path(uri), true); }
From source file:com.asakusafw.bulkloader.collector.ExportFileSend.java
License:Apache License
/** * ????TSV??/*from w w w. jav a 2 s. c o m*/ * {@link com.asakusafw.bulkloader.transfer.FileList.Writer}???? * @param <T> ? * @param targetTableModel Export??Model? * @param filePath Export * @param writer ?Writer * @param tableName ?? * @return ?????????????????? -1 * @throws BulkLoaderSystemException ?????? */ protected <T extends Writable> long send(Class<T> targetTableModel, String filePath, FileList.Writer writer, String tableName) throws BulkLoaderSystemException { FileSystem fs = null; String fileName = null; // ?? long maxSize = Long.parseLong(ConfigurationLoader.getProperty(Constants.PROP_KEY_EXP_LOAD_MAX_SIZE)); try { TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel); Configuration conf = new Configuration(); fs = FileSystem.get(new URI(filePath), conf); // ????? FileStatus[] status = fs.globStatus(new Path(filePath)); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths == null) { LOG.info("TG-COLLECTOR-02006", tableName, filePath); return -1; } else { LOG.info("TG-COLLECTOR-02007", listedPaths.length, tableName, filePath); } long count = 0; boolean addEntry = false; for (Path path : listedPaths) { // ????? if (isSystemFile(path)) { continue; } // TODO ???? // ?????? ModelInput<T> input = TemporaryStorage.openInput(conf, targetTableModel, path); try { while (true) { // addEntry = true; fileName = FileNameUtil.createSendExportFileName(tableName, fileNameMap); OutputStream output = writer.openNext(FileList.content(fileName)); try { CountingOutputStream counter = new CountingOutputStream(output); ModelOutput<T> modelOut = factory.createModelOutput(counter); T model = factory.createModelObject(); LOG.info("TG-COLLECTOR-02004", tableName, path.toString(), fileName); // ???ModelTSV?? boolean nextFile = false; while (input.readTo(model)) { // Modol??? modelOut.write(model); count++; // ??????? // char?byte????????? // ??????(????) if (counter.getByteCount() > maxSize) { nextFile = true; break; } } modelOut.close(); LOG.info("TG-COLLECTOR-02005", tableName, path.toString(), fileName); if (nextFile) { // ??????? continue; } else { // ???????? break; } } finally { output.close(); } } } finally { input.close(); } } if (addEntry) { return count; } else { assert count == 0; return -1; } } catch (IOException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat .format("HDFS?{0} ???{1}", filePath, fileName)); } catch (URISyntaxException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat.format("HDFS???HDFS?{0}", filePath)); } finally { if (fs != null) { try { fs.close(); } catch (IOException e) { throw new BulkLoaderSystemException(e, this.getClass(), "TG-COLLECTOR-02001", MessageFormat.format( "HDFS???URI{0}", filePath)); } } } }