Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:com.splicemachine.tutorials.vti.ORCRecordIterator.java

License:Apache License

/**
 * Constructor performs initialization and sets the record iterator
 * /*from w  w  w  .  j  a  v  a2 s .  co  m*/
 * @param filesystem
 *            : HDFS File System
 * @param filePath
 *            : File or folder path in HDFS
 * @param execRow
 *            : Format of the result record
 * @throws IOException
 */
public ORCRecordIterator(FileSystem filesystem, Path filePath, ExecRow execRow) {
    // set the instance variable of filesystem to use later
    this.filesystem = filesystem;

    // Set the instance variable of the result record format to be used
    // later
    this.execRow = execRow;

    try {
        Path curFiletoProcess = null;
        Reader reader;

        // Check if filePath specifes afile or folder
        // If its folder, set the flag, and get the first file in the folder
        if (filesystem.isDirectory(filePath)) {
            isDir = true;
            this.fileList = filesystem.listFiles(filePath, false);
            curFiletoProcess = fileList.next().getPath();
        } else {

            curFiletoProcess = filePath;
        }

        // Get the reader for the single file (first file in case of folder)
        reader = getReader(curFiletoProcess);

        // Get the inspector for the format of the record in the ORC File
        this.inspector = (StructObjectInspector) reader.getObjectInspector();

        // Retrieve the Records from reader to process
        records = reader.rows();

    } catch (Exception e) {
        try {
            if (records != null)
                records.close();
        } catch (Exception cE) {
            throw new RuntimeException(cE);
        }
        throw new RuntimeException(e);
    }

}

From source file:com.streamsets.pipeline.spark.SparkStreamingBinding.java

License:Apache License

@Override
public void init() throws Exception {
    for (Object key : properties.keySet()) {
        logMessage("Property => " + key + " => " + properties.getProperty(key.toString()), isRunningInMesos);
    }//  w w  w . j  a  v a  2 s .  co m
    final SparkConf conf = new SparkConf().setAppName("StreamSets Data Collector - Streaming Mode");
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    final String topic = getProperty(TOPIC);
    final long duration;
    String durationAsString = getProperty(MAX_WAIT_TIME);
    try {
        duration = Long.parseLong(durationAsString);
    } catch (NumberFormatException ex) {
        String msg = "Invalid " + MAX_WAIT_TIME + " '" + durationAsString + "' : " + ex;
        throw new IllegalArgumentException(msg, ex);
    }

    Configuration hadoopConf = new SparkHadoopUtil().newConfiguration(conf);
    if (isRunningInMesos) {
        hadoopConf = getHadoopConf(hadoopConf);
    } else {
        hadoopConf = new Configuration();
    }
    URI hdfsURI = FileSystem.getDefaultUri(hadoopConf);
    logMessage("Default FS URI: " + hdfsURI, isRunningInMesos);
    FileSystem hdfs = (new Path(hdfsURI)).getFileSystem(hadoopConf);
    Path sdcCheckpointPath = new Path(hdfs.getHomeDirectory(),
            ".streamsets-spark-streaming/" + getProperty("sdc.id") + "/" + encode(topic));
    // encode as remote pipeline name might have colon within it
    String pipelineName = encode(getProperty("cluster.pipeline.name"));
    final Path checkPointPath = new Path(sdcCheckpointPath, pipelineName);
    hdfs.mkdirs(checkPointPath);
    if (!hdfs.isDirectory(checkPointPath)) {
        throw new IllegalStateException("Could not create checkpoint path: " + sdcCheckpointPath);
    }
    if (isRunningInMesos) {
        String scheme = hdfsURI.getScheme();
        if (scheme.equals("hdfs")) {
            File mesosBootstrapFile = BootstrapCluster.getMesosBootstrapFile();
            Path mesosBootstrapPath = new Path(checkPointPath, mesosBootstrapFile.getName());
            // in case of hdfs, copy the jar file from local path to hdfs
            hdfs.copyFromLocalFile(false, true, new Path(mesosBootstrapFile.toURI()), mesosBootstrapPath);
            conf.setJars(new String[] { mesosBootstrapPath.toString() });
        } else if (scheme.equals("s3") || scheme.equals("s3n") || scheme.equals("s3a")) {
            // we cant upload the jar to s3 as executors wont understand s3 scheme without the aws jar.
            // So have the jar available on http
            conf.setJars(new String[] { getProperty("mesos.jar.url") });
        } else {
            throw new IllegalStateException("Unsupported scheme: " + scheme);
        }
    }
    JavaStreamingContextFactory javaStreamingContextFactory = new JavaStreamingContextFactoryImpl(conf,
            duration, checkPointPath.toString(), getProperty(METADATA_BROKER_LIST), topic,
            properties.getProperty(AUTO_OFFSET_RESET, "").trim(), isRunningInMesos);

    ssc = JavaStreamingContext.getOrCreate(checkPointPath.toString(), hadoopConf, javaStreamingContextFactory,
            true);
    // mesos tries to stop the context internally, so don't do it here - deadlock bug in spark
    if (!isRunningInMesos) {
        final Thread shutdownHookThread = new Thread("Spark.shutdownHook") {
            @Override
            public void run() {
                LOG.debug("Gracefully stopping Spark Streaming Application");
                ssc.stop(true, true);
                LOG.info("Application stopped");
            }
        };
        Runtime.getRuntime().addShutdownHook(shutdownHookThread);
    }
    logMessage("Making calls through spark context ", isRunningInMesos);
    ssc.start();
}

From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java

@Override
public void resetStream() {

    try {/* w ww  .  j  a va 2 s .c o  m*/
        if (out != null)
            out.close();
        writing = false;
        if (in != null)
            in.close();
        FileSystem fs = FileSystem.get(GetConfiguration.get());

        if (fs.isFile(location)) {
            LinkedList<FileStatus> file = new LinkedList<>();
            file.add(fs.getFileStatus(location));
            theFiles = file.iterator();
        } else {
            LinkedList<FileStatus> files = new LinkedList<>();
            RemoteIterator<LocatedFileStatus> fileremote = fs.listFiles(location, true);
            while (fileremote.hasNext())
                files.add(fileremote.next());
            theFiles = files.iterator();
        }

        FileStatus nextFileStatus;
        do {
            if (!theFiles.hasNext()) {
                System.err.println("WARNING: File is Empty");
                super.next = null;
                return;
            }
            nextFileStatus = theFiles.next();
        } while (fs.isDirectory(nextFileStatus.getPath()) || nextFileStatus.getLen() == 0);

        in = new BufferedReader(new InputStreamReader(fs.open(nextFileStatus.getPath())));
        next = in.readLine();

        //out.flush();
    } catch (FileNotFoundException ex) {
        Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java

public String readNextLine() {
    if (writing)/*from  w  ww.j a v a2  s  .c o  m*/
        resetStream();

    String toReturn = next;
    try {
        FileSystem fs = FileSystem.get(GetConfiguration.get());

        next = in.readLine();

        if (next == null)//at end of current file
        {
            FileStatus nextFileStatus;
            do {
                if (!theFiles.hasNext()) {
                    super.next = null;
                    return toReturn;
                }
                nextFileStatus = theFiles.next();
            } while (fs.isDirectory(nextFileStatus.getPath()) || nextFileStatus.getLen() == 0);
            in = new BufferedReader(new InputStreamReader(fs.open(nextFileStatus.getPath())));
            next = in.readLine();
        }
    } catch (IOException ex) {
        Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex);
    }
    return toReturn;
}

From source file:com.topsoft.botspider.util.AvroReadTool.java

License:Apache License

@Override
public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception {
    if (args.size() < 1) {
        // Unlike other commands, "-" can't be used for stdin, because
        // we can only use seekable files.
        err.println("Usage: input_file [-num numToRead].");
        return 1;
    }// w  ww .  ja va 2 s  .  c  om
    int numToRead = Integer.MIN_VALUE;

    for (int i = 0; i < args.size(); i++) {
        if ("-num".equals(args.get(i))) {
            numToRead = Integer.parseInt(args.get(i + 1));
            i++;
        }
    }
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path inputFile = new Path(args.get(0));
    if (fs.isDirectory(inputFile)) {
        inputFile = new Path(inputFile, MapAvroFile.DATA_FILE_NAME);
    }

    GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
    FileReader<Object> fileReader = new DataFileReader<Object>(new FsInput(inputFile, conf), reader);
    try {
        Schema schema = fileReader.getSchema();
        DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
        Encoder encoder = EncoderFactory.get().jsonEncoder(schema, out);
        for (Object datum : fileReader) {
            if (Integer.MIN_VALUE == numToRead || numToRead-- > 0) {
                //encoder.init(out);
                writer.write(datum, encoder);
                encoder.flush();
                //out.println();
            } else
                break;
        }
        out.flush();
    } finally {
        fileReader.close();
    }
    return 0;
}

From source file:com.topsoft.botspider.util.GetSchemaTool.java

License:Apache License

@Override
public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception {
    if (args.size() != 1) {
        err.println("Usage: input_file");
        return 1;
    }/*ww  w. j a v a2s.c  om*/
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path inpitFile = new Path(args.get(0));
    if (fs.isDirectory(inpitFile)) {
        inpitFile = new Path(inpitFile, MapAvroFile.DATA_FILE_NAME);
    }
    DataFileReader<Void> reader = new DataFileReader<Void>(new FsInput(inpitFile, conf),
            new GenericDatumReader<Void>());
    try {

        out.println(reader.getSchema().toString(true));

    } finally {
        reader.close();
    }
    return 0;
}

From source file:com.turn.camino.render.functions.FileSystemFunctionsTest.java

License:Open Source License

/**
 * Set up environment//  w w  w .  j  a  v  a 2  s . c o m
 */
@BeforeClass
public void setUp() throws IOException {
    // mock environment
    FileSystem fileSystem = mock(FileSystem.class);
    FileStatus[] fss = new FileStatus[] {
            new FileStatus(1200000L, false, 3, 1000L, 1409302856296L,
                    new org.apache.hadoop.fs.Path("/a/b/1.dat")),
            new FileStatus(1400000L, false, 3, 1000L, 1409302867303L,
                    new org.apache.hadoop.fs.Path("/a/b/2.dat")),
            new FileStatus(1060000L, false, 3, 1000L, 1409302844187L,
                    new org.apache.hadoop.fs.Path("/a/b/3.dat")) };
    org.apache.hadoop.fs.Path dir = new org.apache.hadoop.fs.Path("/a/b");
    when(fileSystem.exists(dir)).thenReturn(true);
    when(fileSystem.isDirectory(dir)).thenReturn(true);
    when(fileSystem.listStatus(dir)).thenReturn(fss);

    when(fileSystem.exists(new org.apache.hadoop.fs.Path("/x/y"))).thenReturn(false);

    dir = new org.apache.hadoop.fs.Path("/u/v");
    when(fileSystem.exists(dir)).thenReturn(true);
    when(fileSystem.isDirectory(dir)).thenReturn(false);

    doThrow(new IOException()).when(fileSystem).listStatus(new org.apache.hadoop.fs.Path("/foo"));

    context = mock(Context.class);
    Env env = mock(Env.class);
    when(context.getEnv()).thenReturn(env);
    when(env.getCurrentTime()).thenReturn(1409389256296L);
    when(env.getTimeZone()).thenReturn(TimeZone.getTimeZone("GMT"));
    when(env.getFileSystem()).thenReturn(fileSystem);
}

From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java

License:Apache License

private HoodieTableMetadata(FileSystem fs, String basePath, String tableName, boolean initOnMissing) {
    this.fs = fs;
    this.basePath = basePath;

    try {//  w  w  w.  j  a va  2  s .c o m
        Path basePathDir = new Path(this.basePath);
        if (!fs.exists(basePathDir)) {
            if (initOnMissing) {
                fs.mkdirs(basePathDir);
            } else {
                throw new DatasetNotFoundException(this.basePath);
            }
        }

        if (!fs.isDirectory(new Path(basePath))) {
            throw new DatasetNotFoundException(this.basePath);
        }

        this.metadataFolder = new Path(this.basePath, METAFOLDER_NAME);
        Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
        if (!fs.exists(propertyPath)) {
            if (initOnMissing) {
                // create .hoodie folder if it does not exist.
                createHoodieProperties(metadataFolder, tableName);
            } else {
                throw new InvalidDatasetException(this.basePath);
            }
        }

        // Load meta data
        this.commits = new HoodieCommits(scanCommits(COMMIT_FILE_SUFFIX));
        this.inflightCommits = scanCommits(INFLIGHT_FILE_SUFFIX);
        this.properties = readHoodieProperties();
        log.info("All commits :" + commits);
    } catch (IOException e) {
        throw new HoodieIOException("Could not load HoodieMetadata from path " + basePath, e);
    }
}

From source file:com.uber.hoodie.exception.DatasetNotFoundException.java

License:Apache License

public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
        throws DatasetNotFoundException {
    // Check if the base path is found
    try {//from   ww  w  .j  a v  a 2 s .  c  om
        if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
            throw new DatasetNotFoundException(basePathDir.toString());
        }
        // Check if the meta path is found
        if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
            throw new DatasetNotFoundException(metaPathDir.toString());
        }
    } catch (IllegalArgumentException e) {
        // if the base path is file:///, then we have a IllegalArgumentException
        throw new DatasetNotFoundException(metaPathDir.toString());
    } catch (IOException e) {
        throw new HoodieIOException("Could not check if dataset " + basePathDir + " is valid dataset", e);
    }
}

From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java

License:Apache License

public static FileStatus[] listInputFiles(FileSystem fs, Path path) throws IOException {
    if (!fs.isDirectory(path))
        return new FileStatus[] { fs.getFileStatus(path) };
    else {//from   w  w  w  . j a  va2  s  . c o  m
        // Get all files in directory that are not directories or hidden files

        final FileSystem fsFinal = fs;
        PathFilter filter = new PathFilter() {
            public boolean accept(Path p) {
                try {
                    return !(fsFinal.isDirectory(p) || p.getName().startsWith(".")
                            || p.getName().startsWith("_"));
                } catch (IOException ex) {
                    throw new RuntimeException("Error filtering files.", ex);
                }
            }
        };

        return fs.listStatus(path, filter);
    }
}