List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.skp.experiment.common.mapreduce.MapFileOutputFormat.java
License:Apache License
/** Open the output generated by this format. */ public static MapFile.Reader[] getReaders(Path dir, Configuration conf) throws IOException { FileSystem fs = dir.getFileSystem(conf); Path[] names = FileUtil.stat2Paths(fs.listStatus(dir)); // sort names, so that hash partitioning works Arrays.sort(names);/*from ww w . j a va2s . co m*/ MapFile.Reader[] parts = new MapFile.Reader[names.length]; for (int i = 0; i < names.length; i++) { parts[i] = new MapFile.Reader(fs, names[i].toString(), conf); } return parts; }
From source file:com.splicemachine.derby.impl.io.HdfsDirFile.java
License:Apache License
@Override public String[] list() { try {/* w ww.j av a 2s.c o m*/ FileSystem fs = getFileSystem(); FileStatus[] fileStatuses = fs.listStatus(new Path(path)); String[] list = new String[fileStatuses.length]; for (int i = 0; i < fileStatuses.length; i++) { list[i] = fileStatuses[i].getPath().getName(); } return list; } catch (IOException e) { LOG.error(String.format( "An exception occurred while listing the files and directories in the path '%s'.", path), e); return null; } }
From source file:com.splout.db.common.SploutHadoopConfiguration.java
License:Apache License
/** * Adds the SQLite native libraries to the DistributedCache so that they will be present in the java.library.path * of the child's Hadoop task./* w ww. j av a 2 s . c o m*/ * <p/> * Usually you don't need to do this as the task will already try to load them from the job's uncompressed JAR, however * it is not assured that all Hadoop versions do the uncompressing of the JAR so in this case it's safer to use this. */ public static void addSQLite4JavaNativeLibsToDC(Configuration conf, File nativeLibsLocalPath) throws IOException, URISyntaxException { Path nativeLibHdfs = new Path("splout-native"); FileSystem fS = FileSystem.get(conf); if (fS.exists(nativeLibHdfs)) { fS.delete(nativeLibHdfs, true); } fS.mkdirs(nativeLibHdfs); // Copy native libs to HDFS File[] natives = nativeLibsLocalPath.listFiles(); if (natives == null) { throw new RuntimeException( "natives lib folder not present in local working directory! Are you in SPLOUT_HOME?"); } for (File nativeLib : natives) { FileUtil.copy(nativeLib, fS, nativeLibHdfs, false, conf); } for (FileStatus nativeLibInHdfs : fS.listStatus(nativeLibHdfs)) { // http://hadoop.apache.org/docs/r0.20.2/native_libraries.html#Loading+native+libraries+through+DistributedCache DistributedCache.createSymlink(conf); URI uriToAdd = new URI( nativeLibInHdfs.getPath().makeQualified(fS) + "#" + nativeLibInHdfs.getPath().getName()); DistributedCache.addCacheFile(uriToAdd, conf); log.info("Adding to distributed cache: " + uriToAdd); } }
From source file:com.splout.db.examples.PageCountsExample.java
License:Apache License
@Override public int run(String[] args) throws Exception { // Validate params etc JCommander jComm = new JCommander(this); jComm.setProgramName("Splout Page Counts example"); try {//from w w w. ja v a2s .c o m jComm.parse(args); } catch (ParameterException e) { System.err.println(e.getMessage()); jComm.usage(); System.exit(-1); } boolean generate = !noGenerate; // just for clarifying if (generateTupleFiles && deploy) { System.err.println("Can't run a 'dry' TupleFile generation and deploy it."); jComm.usage(); System.exit(-1); } Path outPath = new Path(outputPath); FileSystem outFs = outPath.getFileSystem(getConf()); if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) { File nativeLibs = new File("native"); if (nativeLibs.exists()) { SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf); } } if (generate) { Path inputPath = new Path(this.inputPath); FileSystem inputFileSystem = inputPath.getFileSystem(conf); FileStatus[] fileStatuses = inputFileSystem.listStatus(inputPath); // define the schema that the resultant table will have: date, hour, pagename, pageviews final Schema tableSchema = new Schema("pagecounts", Fields.parse("date:string, hour:string, pagename:string, pageviews:int")); // define the schema of the input files: projectcode, pagename, pageviews, bytes Schema fileSchema = new Schema("pagecountsfile", Fields.parse("projectcode:string, pagename:string, pageviews:int, bytes:long")); // instantiate a TableBuilder TableBuilder tableBuilder = new TableBuilder(tableSchema); // for every input file... for (FileStatus fileStatus : fileStatuses) { String fileName = fileStatus.getPath().getName().toString(); // strip the date and the hour from the file name String fileDate = fileName.split("-")[1]; String fileHour = fileName.split("-")[2].substring(0, 2); // instantiate a custom RecordProcessor to process the records of this file PageCountsRecordProcessor recordProcessor = new PageCountsRecordProcessor(tableSchema, fileDate, fileHour); // use the tableBuilder method for adding each of the files to the mix tableBuilder.addCSVTextFile(fileStatus.getPath(), ' ', TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER, false, false, TupleTextInputFormat.NO_NULL_STRING, fileSchema, recordProcessor); } // partition the dataset by pagename - which should give a fair even distribution. tableBuilder.partitionBy("pagename"); // create a compound index on pagename, date so that typical queries for the dataset will be fast tableBuilder.createIndex("pagename", "date"); long nonExactPageSize = memoryForIndexing / 32000; // number of pages int pageSize = (int) Math.pow(2, (int) Math.round(Math.log(nonExactPageSize) / Math.log(2))); Log.info("Pagesize = " + pageSize + " as memory for indexing was [" + memoryForIndexing + "] and there are 32000 pages."); tableBuilder.initialSQL("pragma page_size=" + pageSize); // insertion order is very important for optimizing query speed because it makes data be co-located in disk tableBuilder.insertionSortOrder(OrderBy.parse("pagename:asc, date:asc")); // instantiate a TablespaceBuilder TablespaceBuilder tablespaceBuilder = new TablespaceBuilder(); // we will partition this dataset in as many partitions as: tablespaceBuilder.setNPartitions(nPartitions); tablespaceBuilder.add(tableBuilder.build()); // we turn a specific SQLite pragma on for making autocomplete queries fast tablespaceBuilder.initStatements("pragma case_sensitive_like=true;"); HadoopUtils.deleteIfExists(outFs, outPath); // finally, instantiate a TablespaceGenerator and execute it TablespaceGenerator tablespaceViewBuilder; if (generateTupleFiles) { // we subclass TablespaceGenerator to be able to run the generation without outputting the SQLite stores, for // benchmark comparisons. // In the future this feature may be useful in general for debugging store creation. tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass()) { @Override public void generateView(Configuration conf, SamplingType samplingType, SamplingOptions samplingOptions) throws Exception { prepareOutput(conf); final int nPartitions = tablespace.getnPartitions(); if (nPartitions > 1) { partitionMap = sample(nPartitions, conf, samplingType, samplingOptions); } else { partitionMap = PartitionMap.oneShardOpenedMap(); } writeOutputMetadata(conf); TupleMRBuilder builder = createMRBuilder(nPartitions, conf); // Set a TupleOutput here instead of SQLiteOutput builder.setOutput(new Path(outputPath, OUT_STORE), new TupleOutputFormat(tableSchema), ITuple.class, NullWritable.class); executeViewGeneration(builder); } }; } else { // ... otherwise a standard TablespaceGenerator is used. tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass()); } tablespaceViewBuilder.generateView(getConf(), SamplingType.FULL_SCAN, new TupleSampler.FullScanSamplingOptions()); } if (deploy) { // use StoreDeployerTool for deploying the already generated dataset StoreDeployerTool deployer = new StoreDeployerTool(qnode, getConf()); ArrayList<TablespaceDepSpec> deployments = new ArrayList<TablespaceDepSpec>(); deployments.add(new TablespaceDepSpec("pagecounts", outPath.toString(), repFactor, null)); deployer.deploy(deployments); } return 1; }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
@SuppressWarnings("deprecation") private long fullScanSampling(TablespaceSpec tablespace, final long sampleSize, Configuration hadoopConf, Path outputPath, final int nSplits) throws TupleSamplerException { MapOnlyJobBuilder builder = new MapOnlyJobBuilder(hadoopConf, "Reservoir Sampling to path " + outputPath); for (Table table : tablespace.getPartitionedTables()) { final TableSpec tableSpec = table.getTableSpec(); final String getPartitionByJavaScript = tableSpec.getPartitionByJavaScript(); for (TableInput inputFile : table.getFiles()) { final RecordProcessor processor = inputFile.getRecordProcessor(); for (Path path : inputFile.getPaths()) { builder.addInput(path, inputFile.getFormat(), new MapOnlyMapper<ITuple, NullWritable, Text, NullWritable>() { final int nSamples = (int) (sampleSize / nSplits); final String[] samples = new String[nSamples]; CounterInterface counterInterface; long recordCounter = 0; JavascriptEngine jsEngine = null; @Override protected void setup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { counterInterface = new CounterInterface(context); // Initialize JavaScript engine if needed if (getPartitionByJavaScript != null) { try { jsEngine = new JavascriptEngine(getPartitionByJavaScript); } catch (Throwable e) { throw new RuntimeException(e); }/*www .ja va 2 s . c o m*/ } } ; // Collect Tuples with decreasing probability // (http://en.wikipedia.org/wiki/Reservoir_sampling) protected void map(ITuple key, NullWritable value, Context context) throws IOException, InterruptedException { ITuple uTuple; try { uTuple = processor.process(key, key.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple == null) { // user may have filtered the record return; } long reservoirIndex; if (recordCounter < nSamples) { reservoirIndex = recordCounter; } else { reservoirIndex = (long) (Math.random() * recordCounter); } if (reservoirIndex < nSamples) { String pkey = null; try { pkey = TablespaceGenerator.getPartitionByKey(uTuple, tableSpec, jsEngine); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } samples[(int) reservoirIndex] = pkey; } recordCounter++; } // Write the in-memory sampled Tuples protected void cleanup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { Text key = new Text(); for (String keyStr : samples) { if (keyStr != null) { key.set(keyStr); context.write(key, NullWritable.get()); } } } }, inputFile.getSpecificHadoopInputFormatContext()); } } } // Set output path Path outReservoirPath = new Path(outputPath + "-reservoir"); builder.setOutput(outReservoirPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class, NullWritable.class); builder.setJarByClass(callingClass); try { Job job = null; job = builder.createJob(); if (!job.waitForCompletion(true)) { throw new TupleSamplerException("Reservoir Sampling failed!"); } } catch (Exception e) { throw new TupleSamplerException("Error creating or launching the sampling job.", e); } finally { try { builder.cleanUpInstanceFiles(); } catch (IOException e) { throw new TupleSamplerException("Error cleaning up the sampling job.", e); } } long retrievedSamples = 0; try { FileSystem outFs = outReservoirPath.getFileSystem(hadoopConf); if (outFs.listStatus(outReservoirPath) == null) { throw new IOException("Output folder not created: the Job failed!"); } retrievedSamples = 0; // Instantiate the writer we will write samples to SequenceFile.Writer writer = new SequenceFile.Writer(outFs, hadoopConf, outputPath, Text.class, NullWritable.class); // Aggregate the output into a single file for being consistent with the other sampling methods for (FileStatus fileStatus : outFs.listStatus(outReservoirPath)) { Path thisPath = fileStatus.getPath(); if (thisPath.getName().startsWith("part-m-")) { SequenceFile.Reader reader = new SequenceFile.Reader(outFs, thisPath, hadoopConf); Text key = new Text(); while (reader.next(key)) { writer.append(key, NullWritable.get()); retrievedSamples++; } reader.close(); } } writer.close(); outFs.delete(outReservoirPath, true); } catch (IOException e) { throw new TupleSamplerException("Error consolidating the sample job results into one file.", e); } return retrievedSamples; }
From source file:com.streamsets.pipeline.stage.destination.hdfs.TestHDFSTargetWholeFile.java
License:Apache License
@Test public void testWholeFilePermission() throws Exception { java.nio.file.Path filePath1 = Paths.get(getTestDir() + "/source_testWholeFilePermissionFiles1.txt"); java.nio.file.Path filePath2 = Paths.get(getTestDir() + "/source_testWholeFilePermissionFiles2.txt"); java.nio.file.Path filePath3 = Paths.get(getTestDir() + "/source_testWholeFilePermissionFiles3.txt"); Files.write(filePath1, "This is a sample file 1 with some text".getBytes()); Files.write(filePath2, "This is a sample file 2 with some text".getBytes()); Files.write(filePath3, "This is a sample file 3 with some text".getBytes()); HdfsTarget hdfsTarget = HdfsTargetUtil.newBuilder().hdfsUri(uri.toString()).dirPathTemplate(getTestDir()) .timeDriver("${time:now()}").dataForamt(DataFormat.WHOLE_FILE).fileType(HdfsFileType.WHOLE_FILE) .fileNameEL("${record:value('/fileInfo/filename')}").maxRecordsPerFile(1).maxFileSize(0) .uniquePrefix("sdc-").idleTimeout("-1").permissionEL("${record:value('/fileInfo/permissions')}") .lateRecordsAction(LateRecordsAction.SEND_TO_LATE_RECORDS_FILE).build(); TargetRunner runner = new TargetRunner.Builder(HdfsDTarget.class, hdfsTarget) .setOnRecordError(OnRecordError.STOP_PIPELINE).build(); runner.runInit();/* ww w . j a v a 2 s . c o m*/ try { runner.runWrite(Arrays.asList(getFileRefRecordForFile(filePath1, "755"), //posix style getFileRefRecordForFile(filePath2, "rwxr--r--"), //unix style getFileRefRecordForFile(filePath3, "-rw-rw----"))); org.apache.hadoop.fs.Path targetPath1 = new org.apache.hadoop.fs.Path( getTestDir() + "/sdc-" + filePath1.getFileName()); org.apache.hadoop.fs.Path targetPath2 = new org.apache.hadoop.fs.Path( getTestDir() + "/sdc-" + filePath2.getFileName()); org.apache.hadoop.fs.Path targetPath3 = new org.apache.hadoop.fs.Path( getTestDir() + "/sdc-" + filePath3.getFileName()); FileSystem fs = FileSystem.get(uri, new HdfsConfiguration()); Assert.assertTrue(fs.exists(targetPath1)); Assert.assertTrue(fs.exists(targetPath2)); Assert.assertTrue(fs.exists(targetPath3)); FsPermission actual1 = fs.listStatus(targetPath1)[0].getPermission(); FsPermission actual2 = fs.listStatus(targetPath2)[0].getPermission(); FsPermission actual3 = fs.listStatus(targetPath3)[0].getPermission(); FsPermission expected1 = new FsPermission("755"); FsPermission expected2 = FsPermission.valueOf("-rwxr--r--"); FsPermission expected3 = FsPermission.valueOf("-rw-rw----"); Assert.assertEquals(expected1, actual1); Assert.assertEquals(expected2, actual2); Assert.assertEquals(expected3, actual3); } finally { runner.runDestroy(); } }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java
License:Apache License
@Override public List<ConfigIssue> init() { List<ConfigIssue> issues = super.init(); validateHadoopFS(issues);//from w w w .ja va2s . com // This is for getting no of splits - no of executors hadoopConf.set(FileInputFormat.LIST_STATUS_NUM_THREADS, "5"); // Per Hive-on-Spark hadoopConf.set(FileInputFormat.SPLIT_MAXSIZE, String.valueOf(750000000)); // Per Hive-on-Spark for (Map.Entry<String, String> config : hdfsConfigs.entrySet()) { hadoopConf.set(config.getKey(), config.getValue()); } List<Path> hdfsDirPaths = new ArrayList<>(); if (hdfsDirLocations == null || hdfsDirLocations.isEmpty()) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_18)); } else if (issues.isEmpty()) { for (String hdfsDirLocation : hdfsDirLocations) { try { FileSystem fs = getFileSystemForInitDestroy(); Path ph = fs.makeQualified(new Path(hdfsDirLocation)); hdfsDirPaths.add(ph); if (!fs.exists(ph)) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_10, hdfsDirLocation)); } else if (!fs.getFileStatus(ph).isDirectory()) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_15, hdfsDirLocation)); } else { try { FileStatus[] files = fs.listStatus(ph); if (files == null || files.length == 0) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_16, hdfsDirLocation)); } else if (getContext().isPreview() && previewBuffer.size() < PREVIEW_SIZE) { for (FileStatus fileStatus : files) { if (fileStatus.isFile()) { String path = fileStatus.getPath().toString(); try { List<Map.Entry> buffer; if (dataFormat == DataFormat.AVRO) { buffer = previewAvroBatch(fileStatus, PREVIEW_SIZE); } else { buffer = previewTextBatch(fileStatus, PREVIEW_SIZE); } for (int i = 0; i < buffer.size() && previewBuffer.size() < PREVIEW_SIZE; i++) { Map.Entry entry = buffer.get(i); previewBuffer.put(String.valueOf(entry.getKey()), entry.getValue() == null ? null : entry.getValue()); } } catch (IOException | InterruptedException ex) { String msg = "Error opening " + path + ": " + ex; LOG.info(msg, ex); issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_16, fileStatus.getPath())); } } } } } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_09, hdfsDirLocation, ex.toString(), ex)); } } } catch (IOException ioe) { LOG.warn("Error connecting to HDFS filesystem: " + ioe, ioe); issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_11, hdfsDirLocation, ioe.toString(), ioe)); } } } hadoopConf.set(FileInputFormat.INPUT_DIR, StringUtils.join(hdfsDirPaths, ",")); hadoopConf.set(FileInputFormat.INPUT_DIR_RECURSIVE, Boolean.toString(recursive)); switch (dataFormat) { case JSON: if (jsonMaxObjectLen < 1) { issues.add( getContext().createConfigIssue(Groups.JSON.name(), "jsonMaxObjectLen", Errors.HADOOPFS_04)); } break; case TEXT: if (textMaxLineLen < 1) { issues.add( getContext().createConfigIssue(Groups.TEXT.name(), "textMaxLineLen", Errors.HADOOPFS_05)); } break; case LOG: logDataFormatValidator = new LogDataFormatValidator(logMode, logMaxObjectLen, retainOriginalLine, customLogFormat, regex, grokPatternDefinition, grokPattern, enableLog4jCustomLogFormat, log4jCustomLogFormat, OnParseError.ERROR, 0, Groups.LOG.name(), getFieldPathToGroupMap(fieldPathsToGroupName)); logDataFormatValidator.validateLogFormatConfig(issues, getContext()); break; case DELIMITED: if (csvMaxObjectLen < 1) { issues.add(getContext().createConfigIssue(Groups.DELIMITED.name(), "csvMaxObjectLen", Errors.HADOOPFS_30)); } break; case AVRO: if (avroSchema != null && !avroSchema.isEmpty()) { hadoopConf.set(AvroJob.INPUT_SCHEMA, avroSchema); hadoopConf.set(CONF_INPUT_KEY_SCHEMA, avroSchema); } break; default: issues.add(getContext().createConfigIssue(Groups.LOG.name(), "dataFormat", Errors.HADOOPFS_06, dataFormat)); } validateParserFactoryConfigs(issues); LOG.info("Issues: " + issues); return issues; }
From source file:com.stumbleupon.hbaseadmin.ClusterUtils.java
License:Open Source License
/** * Remove any regions that do not qualify for compaction * @param admin The hbase admin/* www.j ava2 s . com*/ * @param serverName The server name * @param server The HRegion interface * @return The filtered regions * @throws IOException */ private HRegionInfo getNextEligibleRegion(HBaseAdmin admin, ServerName serverName, HRegionInterface server) throws IOException { HRegionInfo ret = null; List<HRegionInfo> onlineRegions = server.getOnlineRegions(); String hostport = serverName.getHostAndPort(); HServerLoad serverLoad = clusterStatus.getLoad(serverName); if (serverLoad == null) { LOG.warn("Skipping server {} because could not get server load", hostport); } else { List<String> tableNames = compact.getTableNames(); boolean excludeFromList = compact.getExcludeTables(); Map<byte[], RegionLoad> regionLoadMap = serverLoad.getRegionsLoad(); List<String> reasons = new ArrayList<String>(); for (HRegionInfo region : onlineRegions) { String regionName = region.getRegionNameAsString(); String tableName = region.getTableNameAsString(); reasons.clear(); // Ignore any regions in tables that are marked as excluded if (tableNames.size() > 0) { if (excludeFromList && tableNames.contains(tableName)) { continue; } else if (!excludeFromList && !tableNames.contains(tableName)) { continue; } else if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] qualifies because its table '" + tableName + "' has NOT been excluded"); } } // Ignore any regions that we have already visited/compacted if (visitedRegions.isRegionVisited(hostport, regionName)) { continue; } else if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] qualifies because it has NOT been visited"); } // Remove any regions that do not have enough store files to qualify for compaction RegionLoad regionLoad = regionLoadMap.get(region.getRegionName()); boolean isRegionEligible = true; if (regionLoad == null) { LOG.warn("Could not get region load for '{}'. Skipping region...", regionName); continue; } else { try { int numFamilies = getTableDescriptor(admin, region).getColumnFamilies().length; int numRegionStoreFiles = regionLoad.getStorefiles(); int minStoreFilesNeeded = compact.getNumStoreFiles() * numFamilies; if (numRegionStoreFiles >= minStoreFilesNeeded) { isRegionEligible = true; if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] qualifies because it has a total of " + numRegionStoreFiles + " store files in " + numFamilies + " families"); } } else { if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] does not qualify because it has a total of " + numRegionStoreFiles + " store files in " + numFamilies + " families. Needs at least " + minStoreFilesNeeded); } isRegionEligible = false; } } catch (TableNotFoundException e) { LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName, e); continue; } catch (IOException e) { LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName, e); continue; } catch (Exception e) { LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName, e); continue; } } // If enabled, force compaction of any regions that contain store files older than maxStoreFileAge if (!isRegionEligible && compact.getMaxStoreFileAge() > 0) { List<String> files = server.getStoreFileList(region.getRegionName()); FileSystem fs = FileSystem.get(admin.getConfiguration()); if (files != null) { Path[] filePaths = new Path[files.size()]; for (int i = 0; i < files.size(); i++) { filePaths[i] = new Path(files.get(0)); } long maxStoreFileAge = compact.getMaxStoreFileAge(); long now = System.currentTimeMillis(); FileStatus[] storeFilesStatus = fs.listStatus(filePaths); for (FileStatus fileStatus : storeFilesStatus) { long storeFileAge = now - fileStatus.getModificationTime(); if (storeFileAge > maxStoreFileAge) { isRegionEligible = true; if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] forced to qualify because " + "at least one store file is older than the specified maxStoreFileAge"); } break; } } } } if (isRegionEligible) { if (reasons.size() > 0) { for (String reason : reasons) { LOG.debug(reason); } } ret = region; break; } } } return ret; }
From source file:com.talis.hadoop.rdf.RdfSolrJob.java
License:Apache License
private void writeShardManifest(String manifestLocation, String shardLocation, Configuration configuration) throws IOException { Path shardsPath = new Path(INTERMEDIATE_SHARDS_URI); FileSystem fs = FileSystem.get(shardsPath.toUri(), configuration); StringBuffer buf = new StringBuffer(); for (FileStatus status : fs.listStatus(shardsPath)) { LOG.info(status.getPath() + " : " + status.isDir()); if (status.isDir()) { buf.append(status.getPath()); buf.append("\n"); }//from ww w.j a va2 s . c o m } FSDataOutputStream out = fs.create(new Path(manifestLocation)); out.write(buf.toString().getBytes()); out.flush(); out.close(); }
From source file:com.taobao.datax.plugins.common.DFSUtils.java
License:Open Source License
/** * List the statuses of the files/directories in the given path if the path * is a directory./*w w w . jav a2 s. c o m*/ * * @param dfs * handle of {@link FileSystem} * * @param srcpath * Path in {@link FileSystem} * * @param isGlob * need to use file pattern * * @return all {@link Path} in srcpath * * @throws IOException * * */ public static List<Path> listDir(FileSystem dfs, Path srcpath, boolean isGlob) throws IOException { List<Path> list = new ArrayList<Path>(); FileStatus[] status = null; if (isGlob) { status = dfs.globStatus(srcpath); } else { status = dfs.listStatus(srcpath); } if (status != null) { for (FileStatus state : status) { list.add(state.getPath()); } } return list; }