List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:com.splicemachine.tutorials.vti.ORCRecordIterator.java
License:Apache License
/** * Constructor performs initialization and sets the record iterator * /*from w w w . j a v a2 s . co m*/ * @param filesystem * : HDFS File System * @param filePath * : File or folder path in HDFS * @param execRow * : Format of the result record * @throws IOException */ public ORCRecordIterator(FileSystem filesystem, Path filePath, ExecRow execRow) { // set the instance variable of filesystem to use later this.filesystem = filesystem; // Set the instance variable of the result record format to be used // later this.execRow = execRow; try { Path curFiletoProcess = null; Reader reader; // Check if filePath specifes afile or folder // If its folder, set the flag, and get the first file in the folder if (filesystem.isDirectory(filePath)) { isDir = true; this.fileList = filesystem.listFiles(filePath, false); curFiletoProcess = fileList.next().getPath(); } else { curFiletoProcess = filePath; } // Get the reader for the single file (first file in case of folder) reader = getReader(curFiletoProcess); // Get the inspector for the format of the record in the ORC File this.inspector = (StructObjectInspector) reader.getObjectInspector(); // Retrieve the Records from reader to process records = reader.rows(); } catch (Exception e) { try { if (records != null) records.close(); } catch (Exception cE) { throw new RuntimeException(cE); } throw new RuntimeException(e); } }
From source file:com.streamsets.pipeline.spark.SparkStreamingBinding.java
License:Apache License
@Override public void init() throws Exception { for (Object key : properties.keySet()) { logMessage("Property => " + key + " => " + properties.getProperty(key.toString()), isRunningInMesos); }// w w w . j a v a 2 s . co m final SparkConf conf = new SparkConf().setAppName("StreamSets Data Collector - Streaming Mode"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); final String topic = getProperty(TOPIC); final long duration; String durationAsString = getProperty(MAX_WAIT_TIME); try { duration = Long.parseLong(durationAsString); } catch (NumberFormatException ex) { String msg = "Invalid " + MAX_WAIT_TIME + " '" + durationAsString + "' : " + ex; throw new IllegalArgumentException(msg, ex); } Configuration hadoopConf = new SparkHadoopUtil().newConfiguration(conf); if (isRunningInMesos) { hadoopConf = getHadoopConf(hadoopConf); } else { hadoopConf = new Configuration(); } URI hdfsURI = FileSystem.getDefaultUri(hadoopConf); logMessage("Default FS URI: " + hdfsURI, isRunningInMesos); FileSystem hdfs = (new Path(hdfsURI)).getFileSystem(hadoopConf); Path sdcCheckpointPath = new Path(hdfs.getHomeDirectory(), ".streamsets-spark-streaming/" + getProperty("sdc.id") + "/" + encode(topic)); // encode as remote pipeline name might have colon within it String pipelineName = encode(getProperty("cluster.pipeline.name")); final Path checkPointPath = new Path(sdcCheckpointPath, pipelineName); hdfs.mkdirs(checkPointPath); if (!hdfs.isDirectory(checkPointPath)) { throw new IllegalStateException("Could not create checkpoint path: " + sdcCheckpointPath); } if (isRunningInMesos) { String scheme = hdfsURI.getScheme(); if (scheme.equals("hdfs")) { File mesosBootstrapFile = BootstrapCluster.getMesosBootstrapFile(); Path mesosBootstrapPath = new Path(checkPointPath, mesosBootstrapFile.getName()); // in case of hdfs, copy the jar file from local path to hdfs hdfs.copyFromLocalFile(false, true, new Path(mesosBootstrapFile.toURI()), mesosBootstrapPath); conf.setJars(new String[] { mesosBootstrapPath.toString() }); } else if (scheme.equals("s3") || scheme.equals("s3n") || scheme.equals("s3a")) { // we cant upload the jar to s3 as executors wont understand s3 scheme without the aws jar. // So have the jar available on http conf.setJars(new String[] { getProperty("mesos.jar.url") }); } else { throw new IllegalStateException("Unsupported scheme: " + scheme); } } JavaStreamingContextFactory javaStreamingContextFactory = new JavaStreamingContextFactoryImpl(conf, duration, checkPointPath.toString(), getProperty(METADATA_BROKER_LIST), topic, properties.getProperty(AUTO_OFFSET_RESET, "").trim(), isRunningInMesos); ssc = JavaStreamingContext.getOrCreate(checkPointPath.toString(), hadoopConf, javaStreamingContextFactory, true); // mesos tries to stop the context internally, so don't do it here - deadlock bug in spark if (!isRunningInMesos) { final Thread shutdownHookThread = new Thread("Spark.shutdownHook") { @Override public void run() { LOG.debug("Gracefully stopping Spark Streaming Application"); ssc.stop(true, true); LOG.info("Application stopped"); } }; Runtime.getRuntime().addShutdownHook(shutdownHookThread); } logMessage("Making calls through spark context ", isRunningInMesos); ssc.start(); }
From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java
@Override public void resetStream() { try {/* w ww . j a va 2 s .c o m*/ if (out != null) out.close(); writing = false; if (in != null) in.close(); FileSystem fs = FileSystem.get(GetConfiguration.get()); if (fs.isFile(location)) { LinkedList<FileStatus> file = new LinkedList<>(); file.add(fs.getFileStatus(location)); theFiles = file.iterator(); } else { LinkedList<FileStatus> files = new LinkedList<>(); RemoteIterator<LocatedFileStatus> fileremote = fs.listFiles(location, true); while (fileremote.hasNext()) files.add(fileremote.next()); theFiles = files.iterator(); } FileStatus nextFileStatus; do { if (!theFiles.hasNext()) { System.err.println("WARNING: File is Empty"); super.next = null; return; } nextFileStatus = theFiles.next(); } while (fs.isDirectory(nextFileStatus.getPath()) || nextFileStatus.getLen() == 0); in = new BufferedReader(new InputStreamReader(fs.open(nextFileStatus.getPath()))); next = in.readLine(); //out.flush(); } catch (FileNotFoundException ex) { Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java
public String readNextLine() { if (writing)/*from w ww.j a v a2 s .c o m*/ resetStream(); String toReturn = next; try { FileSystem fs = FileSystem.get(GetConfiguration.get()); next = in.readLine(); if (next == null)//at end of current file { FileStatus nextFileStatus; do { if (!theFiles.hasNext()) { super.next = null; return toReturn; } nextFileStatus = theFiles.next(); } while (fs.isDirectory(nextFileStatus.getPath()) || nextFileStatus.getLen() == 0); in = new BufferedReader(new InputStreamReader(fs.open(nextFileStatus.getPath()))); next = in.readLine(); } } catch (IOException ex) { Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex); } return toReturn; }
From source file:com.topsoft.botspider.util.AvroReadTool.java
License:Apache License
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() < 1) { // Unlike other commands, "-" can't be used for stdin, because // we can only use seekable files. err.println("Usage: input_file [-num numToRead]."); return 1; }// w ww . ja va 2 s . c om int numToRead = Integer.MIN_VALUE; for (int i = 0; i < args.size(); i++) { if ("-num".equals(args.get(i))) { numToRead = Integer.parseInt(args.get(i + 1)); i++; } } Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path inputFile = new Path(args.get(0)); if (fs.isDirectory(inputFile)) { inputFile = new Path(inputFile, MapAvroFile.DATA_FILE_NAME); } GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); FileReader<Object> fileReader = new DataFileReader<Object>(new FsInput(inputFile, conf), reader); try { Schema schema = fileReader.getSchema(); DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema); Encoder encoder = EncoderFactory.get().jsonEncoder(schema, out); for (Object datum : fileReader) { if (Integer.MIN_VALUE == numToRead || numToRead-- > 0) { //encoder.init(out); writer.write(datum, encoder); encoder.flush(); //out.println(); } else break; } out.flush(); } finally { fileReader.close(); } return 0; }
From source file:com.topsoft.botspider.util.GetSchemaTool.java
License:Apache License
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 1) { err.println("Usage: input_file"); return 1; }/*ww w. j a v a2s.c om*/ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path inpitFile = new Path(args.get(0)); if (fs.isDirectory(inpitFile)) { inpitFile = new Path(inpitFile, MapAvroFile.DATA_FILE_NAME); } DataFileReader<Void> reader = new DataFileReader<Void>(new FsInput(inpitFile, conf), new GenericDatumReader<Void>()); try { out.println(reader.getSchema().toString(true)); } finally { reader.close(); } return 0; }
From source file:com.turn.camino.render.functions.FileSystemFunctionsTest.java
License:Open Source License
/** * Set up environment// w w w . j a v a 2 s . c o m */ @BeforeClass public void setUp() throws IOException { // mock environment FileSystem fileSystem = mock(FileSystem.class); FileStatus[] fss = new FileStatus[] { new FileStatus(1200000L, false, 3, 1000L, 1409302856296L, new org.apache.hadoop.fs.Path("/a/b/1.dat")), new FileStatus(1400000L, false, 3, 1000L, 1409302867303L, new org.apache.hadoop.fs.Path("/a/b/2.dat")), new FileStatus(1060000L, false, 3, 1000L, 1409302844187L, new org.apache.hadoop.fs.Path("/a/b/3.dat")) }; org.apache.hadoop.fs.Path dir = new org.apache.hadoop.fs.Path("/a/b"); when(fileSystem.exists(dir)).thenReturn(true); when(fileSystem.isDirectory(dir)).thenReturn(true); when(fileSystem.listStatus(dir)).thenReturn(fss); when(fileSystem.exists(new org.apache.hadoop.fs.Path("/x/y"))).thenReturn(false); dir = new org.apache.hadoop.fs.Path("/u/v"); when(fileSystem.exists(dir)).thenReturn(true); when(fileSystem.isDirectory(dir)).thenReturn(false); doThrow(new IOException()).when(fileSystem).listStatus(new org.apache.hadoop.fs.Path("/foo")); context = mock(Context.class); Env env = mock(Env.class); when(context.getEnv()).thenReturn(env); when(env.getCurrentTime()).thenReturn(1409389256296L); when(env.getTimeZone()).thenReturn(TimeZone.getTimeZone("GMT")); when(env.getFileSystem()).thenReturn(fileSystem); }
From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java
License:Apache License
private HoodieTableMetadata(FileSystem fs, String basePath, String tableName, boolean initOnMissing) { this.fs = fs; this.basePath = basePath; try {// w w w. j a va 2 s .c o m Path basePathDir = new Path(this.basePath); if (!fs.exists(basePathDir)) { if (initOnMissing) { fs.mkdirs(basePathDir); } else { throw new DatasetNotFoundException(this.basePath); } } if (!fs.isDirectory(new Path(basePath))) { throw new DatasetNotFoundException(this.basePath); } this.metadataFolder = new Path(this.basePath, METAFOLDER_NAME); Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE); if (!fs.exists(propertyPath)) { if (initOnMissing) { // create .hoodie folder if it does not exist. createHoodieProperties(metadataFolder, tableName); } else { throw new InvalidDatasetException(this.basePath); } } // Load meta data this.commits = new HoodieCommits(scanCommits(COMMIT_FILE_SUFFIX)); this.inflightCommits = scanCommits(INFLIGHT_FILE_SUFFIX); this.properties = readHoodieProperties(); log.info("All commits :" + commits); } catch (IOException e) { throw new HoodieIOException("Could not load HoodieMetadata from path " + basePath, e); } }
From source file:com.uber.hoodie.exception.DatasetNotFoundException.java
License:Apache License
public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir) throws DatasetNotFoundException { // Check if the base path is found try {//from ww w .j a v a 2 s . c om if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) { throw new DatasetNotFoundException(basePathDir.toString()); } // Check if the meta path is found if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) { throw new DatasetNotFoundException(metaPathDir.toString()); } } catch (IllegalArgumentException e) { // if the base path is file:///, then we have a IllegalArgumentException throw new DatasetNotFoundException(metaPathDir.toString()); } catch (IOException e) { throw new HoodieIOException("Could not check if dataset " + basePathDir + " is valid dataset", e); } }
From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java
License:Apache License
public static FileStatus[] listInputFiles(FileSystem fs, Path path) throws IOException { if (!fs.isDirectory(path)) return new FileStatus[] { fs.getFileStatus(path) }; else {//from w w w . j a va2 s . c o m // Get all files in directory that are not directories or hidden files final FileSystem fsFinal = fs; PathFilter filter = new PathFilter() { public boolean accept(Path p) { try { return !(fsFinal.isDirectory(p) || p.getName().startsWith(".") || p.getName().startsWith("_")); } catch (IOException ex) { throw new RuntimeException("Error filtering files.", ex); } } }; return fs.listStatus(path, filter); } }