Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.kitesdk.cli.commands.InputFormatImportCommand.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DM_CONVERT_CASE", justification = "For record types only")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "Data path and target dataset are required.");

    Path source = qualifiedPath(targets.get(0));
    FileSystem sourceFS = source.getFileSystem(getConf());
    Preconditions.checkArgument(sourceFS.exists(source), "Data path does not exist: " + source);

    // this throws IllegalArgumentException if the type is invalid.
    InputFormatUtil.RecordType.valueOf(recordType.trim().toUpperCase());

    String dataset = targets.get(1);

    View<Object> target = load(dataset, Object.class);
    Schema schema = target.getDataset().getDescriptor().getSchema();

    // Build a dataset around the incoming data
    DatasetDescriptor.Builder descriptorBuilder = new DatasetDescriptor.Builder().location(source.toUri())
            .property(InputFormatUtil.INPUT_FORMAT_CLASS_PROP, inFormatClass)
            .property(InputFormatUtil.INPUT_FORMAT_RECORD_PROP, recordType).format(Formats.INPUTFORMAT)
            .schema(ColumnMappingParser//  w w w. java 2  s.  c o m
                    .removeEmbeddedMapping(PartitionStrategyParser.removeEmbeddedStrategy(schema)));

    for (Map.Entry<String, String> prop : properties.entrySet()) {
        descriptorBuilder.property(prop.getKey(), prop.getValue());
    }

    DatasetDescriptor inDescriptor = descriptorBuilder.build();

    TemporaryFileSystemDatasetRepository repo = new TemporaryFileSystemDatasetRepository(getConf(),
            // ensure the same FS as the file source is used
            sourceFS.makeQualified(new Path("/tmp")), target.getDataset().getNamespace(),
            UUID.randomUUID().toString());

    ClassLoader loader = loaderForJars(jars);

    try {
        FileSystemDataset<Object> inDataset = (FileSystemDataset) repo.create("import", "inputformat",
                inDescriptor);
        Iterator<Path> iter = inDataset.pathIterator().iterator();
        Preconditions.checkArgument(iter.hasNext(), "Data path has no data files: " + source);

        TaskUtil.configure(getConf()).addJars(jars);

        TransformTask task;
        if (transform != null) {
            DoFn<Object, Object> transformFn;
            try {
                DynConstructors.Ctor<DoFn<Object, Object>> ctor = new DynConstructors.Builder(DoFn.class)
                        .loader(loader).impl(transform).buildChecked();
                transformFn = ctor.newInstance();
            } catch (NoSuchMethodException e) {
                throw new DatasetException("Cannot find no-arg constructor for class: " + transform, e);
            }
            task = new TransformTask<Object, Object>(inDataset, target, transformFn);
        } else {
            task = new CopyTask<Object>(inDataset, target);
        }

        task.setConf(getConf());

        if (noCompaction) {
            task.noCompaction();
        }

        if (numWriters >= 0) {
            task.setNumWriters(numWriters);
        }

        PipelineResult result;
        try {
            result = runTaskWithClassLoader(task, loader);
        } catch (InterruptedException e) {
            // didn't finish
            return 1;
        }

        if (result.succeeded()) {
            long count = task.getCount();
            if (count > 0) {
                console.info("Added {} records to \"{}\"", count, dataset);
            }
            return 0;
        } else {
            return 1;
        }
    } finally {
        repo.delete();
    }
}

From source file:org.kitesdk.cli.commands.JSONImportCommand.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "JSON path and target dataset name are required.");

    Path source = qualifiedPath(targets.get(0));
    FileSystem sourceFS = source.getFileSystem(getConf());
    Preconditions.checkArgument(sourceFS.exists(source), "JSON path does not exist: " + source);

    String dataset = targets.get(1);

    View<Record> target = load(dataset, Record.class);
    Schema datasetSchema = target.getDataset().getDescriptor().getSchema();

    DatasetDescriptor jsonDescriptor = new DatasetDescriptor.Builder().location(source.toUri())
            .schema(ColumnMappingParser/*from w w  w.  j  a v  a 2  s.  c  o  m*/
                    .removeEmbeddedMapping(PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
            .format("json").build();

    TemporaryFileSystemDatasetRepository repo = new TemporaryFileSystemDatasetRepository(getConf(),
            // ensure the same FS as the file source is used
            sourceFS.makeQualified(new Path("/tmp")), target.getDataset().getNamespace(),
            UUID.randomUUID().toString());

    try {
        FileSystemDataset<Record> jsonDataset = (FileSystemDataset) repo.create("import", "json",
                jsonDescriptor);

        Iterator<Path> iter = jsonDataset.pathIterator().iterator();
        Preconditions.checkArgument(iter.hasNext(), "JSON path has no data files: " + source);

        TaskUtil.configure(getConf()).addJars(jars);

        TransformTask task;
        if (transform != null) {
            DoFn<Record, Record> transformFn;
            try {
                DynConstructors.Ctor<DoFn<Record, Record>> ctor = new DynConstructors.Builder(DoFn.class)
                        .loader(loaderForJars(jars)).impl(transform).buildChecked();
                transformFn = ctor.newInstance();
            } catch (NoSuchMethodException e) {
                throw new DatasetException("Cannot find no-arg constructor for class: " + transform, e);
            }
            task = new TransformTask<Record, Record>(jsonDataset, target, transformFn);
        } else {
            task = new CopyTask<Record>(jsonDataset, target);
        }

        task.setConf(getConf());

        if (noCompaction) {
            task.noCompaction();
        }

        if (numWriters >= 0) {
            task.setNumWriters(numWriters);
        }

        PipelineResult result = task.run();

        if (result.succeeded()) {
            long count = task.getCount();
            if (count > 0) {
                console.info("Added {} records to \"{}\"", count, dataset);
            }
            return 0;
        } else {
            return 1;
        }
    } finally {
        // clean up the temporary repository
        repo.delete();
    }
}

From source file:org.kitesdk.data.hcatalog.HiveUtils.java

License:Apache License

static DatasetDescriptor descriptorForTable(Configuration conf, Table table) {
    final DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder();

    final String serializationLib = table.getSerializationLib();
    if (SERDE_TO_FORMAT.containsKey(serializationLib)) {
        builder.format(SERDE_TO_FORMAT.get(serializationLib));
    } else {//  ww w  .j av  a 2s .c om
        // TODO: should this use an "unknown" format? others fail in open()
        throw new UnknownFormatException("Unknown format for serde:" + serializationLib);
    }

    final Path dataLocation = table.getPath();
    final FileSystem fs = fsForPath(conf, dataLocation);

    builder.location(fs.makeQualified(dataLocation));

    // custom properties
    String namesProperty = coalesce(table.getProperty(CUSTOM_PROPERTIES_PROPERTY_NAME),
            table.getProperty(OLD_CUSTOM_PROPERTIES_PROPERTY_NAME));
    if (namesProperty != null) {
        for (String property : NAME_SPLITTER.split(namesProperty)) {
            builder.property(property, table.getProperty(property));
        }
    }

    if (table.isPartitioned()) {
        String partitionProperty = coalesce(table.getProperty(PARTITION_EXPRESSION_PROPERTY_NAME),
                table.getProperty(OLD_PARTITION_EXPRESSION_PROPERTY_NAME));
        if (partitionProperty != null) {
            builder.partitionStrategy(Accessor.getDefault().fromExpression(partitionProperty));
        } else {
            // build a partition strategy for the table from the Hive strategy
            builder.partitionStrategy(fromPartitionColumns(table.getPartCols()));
        }
    }

    String schemaUrlString = table.getProperty(AVRO_SCHEMA_URL_PROPERTY_NAME);
    if (schemaUrlString != null) {
        try {
            // URI.create is safe because this library wrote the URI
            builder.schemaUri(URI.create(schemaUrlString));
        } catch (IOException e) {
            throw new DatasetIOException("Could not read schema", e);
        }
    }

    String schemaLiteral = table.getProperty(AVRO_SCHEMA_LITERAL_PROPERTY_NAME);
    if (schemaLiteral != null) {
        builder.schemaLiteral(schemaLiteral);
    }

    try {
        return builder.build();
    } catch (IllegalStateException ex) {
        throw new DatasetException("Cannot find schema: missing metadata");
    }
}

From source file:org.kitesdk.data.mapreduce.FileSystemTestBase.java

License:Apache License

@Before
public void setUp() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path testDirectory = fileSystem.makeQualified(new Path(Files.createTempDir().getAbsolutePath()));
    this.repo = new FileSystemDatasetRepository.Builder().configuration(conf).rootDirectory(testDirectory)
            .build();/*w  w w .j a  v a 2s.c  o  m*/
}

From source file:org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository.java

License:Apache License

/**
 * Get a {@link org.kitesdk.data.spi.PartitionKey} corresponding to a partition's filesystem path
 * represented as a {@link URI}. If the path is not a valid partition,
 * then {@link IllegalArgumentException} is thrown. Note that the partition does not
 * have to exist.//from w  w  w  . ja  v a  2 s  . c o  m
 * @param dataset the filesystem dataset
 * @param partitionPath a directory path where the partition data is stored
 * @return a partition key representing the partition at the given path
 * @since 0.4.0
 */
@SuppressWarnings({ "unchecked", "deprecation" })
public static PartitionKey partitionKeyForPath(Dataset dataset, URI partitionPath) {
    Preconditions.checkState(dataset.getDescriptor().isPartitioned(),
            "Attempt to get a partition on a non-partitioned dataset (name:%s)", dataset.getName());

    Preconditions.checkArgument(dataset instanceof FileSystemDataset, "Dataset is not a FileSystemDataset");
    FileSystemDataset fsDataset = (FileSystemDataset) dataset;

    FileSystem fs = fsDataset.getFileSystem();
    URI partitionUri = fs.makeQualified(new Path(partitionPath)).toUri();
    URI directoryUri = fsDataset.getDirectory().toUri();
    URI relativizedUri = directoryUri.relativize(partitionUri);

    if (relativizedUri.equals(partitionUri)) {
        throw new IllegalArgumentException(
                String.format("Partition URI %s has different " + "root directory to dataset (directory: %s).",
                        partitionUri, directoryUri));
    }

    Iterable<String> parts = Splitter.on('/').split(relativizedUri.getPath());

    PartitionStrategy partitionStrategy = dataset.getDescriptor().getPartitionStrategy();
    List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(partitionStrategy);
    if (Iterables.size(parts) > fieldPartitioners.size()) {
        throw new IllegalArgumentException(
                String.format("Too many partition directories " + "for %s (%s), expecting %s.", partitionUri,
                        Iterables.size(parts), fieldPartitioners.size()));
    }

    Schema schema = dataset.getDescriptor().getSchema();
    List<Object> values = Lists.newArrayList();
    int i = 0;
    for (String part : parts) {
        Iterator<String> split = Splitter.on('=').split(part).iterator();
        String fieldName = split.next();
        FieldPartitioner fp = fieldPartitioners.get(i++);
        if (!fieldName.equals(fp.getName())) {
            throw new IllegalArgumentException(
                    String.format("Unrecognized partition name " + "'%s' in partition %s, expecting '%s'.",
                            fieldName, partitionUri, fp.getName()));
        }
        if (!split.hasNext()) {
            throw new IllegalArgumentException(String
                    .format("Missing partition value for " + "'%s' in partition %s.", fieldName, partitionUri));
        }
        String stringValue = split.next();

        values.add(PathConversion.valueForDirname(fp, schema, stringValue));
    }
    return new PartitionKey(values.toArray(new Object[values.size()]));
}

From source file:org.kitesdk.data.spi.hive.HiveUtils.java

License:Apache License

static DatasetDescriptor descriptorForTable(Configuration conf, Table table) {
    final DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder();

    Format format;//from   w  w  w .j a v a 2  s  .c o m
    final String serializationLib = table.getSd().getSerdeInfo().getSerializationLib();
    if (SERDE_TO_FORMAT.containsKey(serializationLib)) {
        format = SERDE_TO_FORMAT.get(serializationLib);
        builder.format(format);
    } else {
        // TODO: should this use an "unknown" format? others fail in open()
        throw new UnknownFormatException("Unknown format for serde:" + serializationLib);
    }

    final Path dataLocation = new Path(table.getSd().getLocation());
    final FileSystem fs = fsForPath(conf, dataLocation);

    builder.location(fs.makeQualified(dataLocation));

    // custom properties
    Map<String, String> properties = table.getParameters();
    String namesProperty = coalesce(properties.get(CUSTOM_PROPERTIES_PROPERTY_NAME),
            properties.get(OLD_CUSTOM_PROPERTIES_PROPERTY_NAME));
    if (namesProperty != null) {
        for (String property : NAME_SPLITTER.split(namesProperty)) {
            builder.property(property, properties.get(property));
        }
    }

    PartitionStrategy partitionStrategy = null;
    if (isPartitioned(table)) {
        String partitionProperty = coalesce(properties.get(PARTITION_EXPRESSION_PROPERTY_NAME),
                properties.get(OLD_PARTITION_EXPRESSION_PROPERTY_NAME));
        if (partitionProperty != null) {
            partitionStrategy = Accessor.getDefault().fromExpression(partitionProperty);
        } else {
            // build a partition strategy for the table from the Hive strategy
            partitionStrategy = fromPartitionColumns(getPartCols(table));
        }
        builder.partitionStrategy(partitionStrategy);
    }

    String schemaUrlString = properties.get(AVRO_SCHEMA_URL_PROPERTY_NAME);
    if (schemaUrlString != null) {
        try {
            // URI.create is safe because this library wrote the URI
            builder.schemaUri(URI.create(schemaUrlString));
        } catch (IOException e) {
            throw new DatasetIOException("Could not read schema", e);
        }
    } else {
        String schemaLiteral = properties.get(AVRO_SCHEMA_LITERAL_PROPERTY_NAME);
        if (schemaLiteral != null) {
            builder.schemaLiteral(schemaLiteral);
        } else {
            builder.schema(HiveSchemaConverter.convertTable(table.getTableName(), table.getSd().getCols(),
                    partitionStrategy));
        }
    }

    String compressionType = properties.get(COMPRESSION_TYPE_PROPERTY_NAME);
    if (compressionType != null) {
        builder.compressionType(compressionType);
    }

    try {
        return builder.build();
    } catch (IllegalStateException ex) {
        throw new DatasetException("Cannot find schema: missing metadata");
    }
}

From source file:org.kitesdk.data.TestDatasetDescriptor.java

License:Apache License

@Test
public void testSchemaFromHdfs() throws IOException {
    MiniDFSTest.setupFS();//from   w  w  w.  j a v  a 2s.  c  o m
    FileSystem fs = MiniDFSTest.getDFS();

    // copy a schema to HDFS
    Path schemaPath = fs.makeQualified(new Path("schema.avsc"));
    FSDataOutputStream out = fs.create(schemaPath);
    IOUtils.copyBytes(DatasetTestUtilities.USER_SCHEMA_URL.toURL().openStream(), out, fs.getConf());
    out.close();

    // build a schema using the HDFS path and check it's the same
    Schema schema = new DatasetDescriptor.Builder().schemaUri(schemaPath.toUri()).build().getSchema();

    Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema);
    MiniDFSTest.teardownFS();
}

From source file:org.kitesdk.minicluster.HBaseService.java

License:Apache License

/**
 * Configure the HBase cluster before launching it
 * //from  w  w w  .j  ava 2 s  .co m
 * @param config
 *          already created Hadoop configuration we'll further configure for
 *          HDFS
 * @param zkClientPort
 *          The client port zookeeper is listening on
 * @param hdfsFs
 *          The HDFS FileSystem this HBase cluster will run on top of
 * @param bindIP
 *          The IP Address to force bind all sockets on. If null, will use
 *          defaults
 * @param masterPort
 *          The port the master listens on
 * @param regionserverPort
 *          The port the regionserver listens on
 * @return The updated Configuration object.
 * @throws IOException
 */
private static Configuration configureHBaseCluster(Configuration config, int zkClientPort, FileSystem hdfsFs,
        String bindIP, int masterPort, int regionserverPort) throws IOException {
    // Configure the zookeeper port
    config.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(zkClientPort));
    // Initialize HDFS path configurations required by HBase
    Path hbaseDir = new Path(hdfsFs.makeQualified(hdfsFs.getHomeDirectory()), "hbase");
    FSUtils.setRootDir(config, hbaseDir);
    hdfsFs.mkdirs(hbaseDir);
    config.set("fs.defaultFS", hdfsFs.getUri().toString());
    config.set("fs.default.name", hdfsFs.getUri().toString());
    FSUtils.setVersion(hdfsFs, hbaseDir);

    // Configure the bind addresses and ports. If running in Openshift, we only
    // have permission to bind to the private IP address, accessible through an
    // environment variable.
    logger.info("HBase force binding to ip: " + bindIP);
    config.set("hbase.master.ipc.address", bindIP);
    config.set(HConstants.MASTER_PORT, Integer.toString(masterPort));
    config.set("hbase.regionserver.ipc.address", bindIP);
    config.set(HConstants.REGIONSERVER_PORT, Integer.toString(regionserverPort));
    config.set(HConstants.ZOOKEEPER_QUORUM, bindIP);

    // By default, the HBase master and regionservers will report to zookeeper
    // that its hostname is what it determines by reverse DNS lookup, and not
    // what we use as the bind address. This means when we set the bind
    // address, daemons won't actually be able to connect to eachother if they
    // are different. Here, we do something that's illegal in 48 states - use
    // reflection to override a private static final field in the DNS class
    // that is a cachedHostname. This way, we are forcing the hostname that
    // reverse dns finds. This may not be compatible with newer versions of
    // Hadoop.
    try {
        Field cachedHostname = DNS.class.getDeclaredField("cachedHostname");
        cachedHostname.setAccessible(true);
        Field modifiersField = Field.class.getDeclaredField("modifiers");
        modifiersField.setAccessible(true);
        modifiersField.setInt(cachedHostname, cachedHostname.getModifiers() & ~Modifier.FINAL);
        cachedHostname.set(null, bindIP);
    } catch (Exception e) {
        // Reflection can throw so many checked exceptions. Let's wrap in an
        // IOException.
        throw new IOException(e);
    }

    // By setting the info ports to -1 for, we won't launch the master or
    // regionserver info web interfaces
    config.set(HConstants.MASTER_INFO_PORT, "-1");
    config.set(HConstants.REGIONSERVER_INFO_PORT, "-1");
    return config;
}

From source file:org.lilyproject.hadooptestfw.fork.HBaseTestingUtility.java

License:Apache License

/**
 * Creates an hbase rootdir in user home directory.  Also creates hbase
 * version file.  Normally you won't make use of this method.  Root hbasedir
 * is created for you as part of mini cluster startup.  You'd only use this
 * method if you were doing manual operation.
 *
 * @return Fully qualified path to hbase root dir
 *///from  w w  w .jav  a 2 s. c  om
public Path createRootDir() throws IOException {
    FileSystem fs = FileSystem.get(this.conf);
    // Lily change: create "hbase" subdirectory under home directory
    // to serve as hbaseRootdir. The home directory can contain other
    // directories and files, which are not necessarily hbase tables.
    // For instance a 'target' dir created by MiniMRCluster.
    // Cfr. HBASE-5317 and HBASE-4025
    Path hbaseRootdir = fs.makeQualified(new Path(fs.getHomeDirectory(), "hbase"));
    this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);
    FSUtils.setVersion(fs, hbaseRootdir);
    return hbaseRootdir;
}

From source file:org.lilyproject.testfw.HadoopLauncher.java

License:Apache License

public MiniHBaseCluster startMiniCluster(final int servers) throws Exception {
    // Make a new random dir to home everything in.  Set it as system property.
    // minidfs reads home from system property.
    this.clusterTestBuildDir = setupClusterTestBuildDir();
    System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.getPath());
    // Bring up mini dfs cluster. This spews a bunch of warnings about missing
    // scheme. Complaints are 'Scheme is undefined for build/test/data/dfs/name1'.
    startMiniDFSCluster(servers, this.clusterTestBuildDir);

    // Mangle conf so fs parameter points to minidfs we just started up
    FileSystem fs = this.dfsCluster.getFileSystem();
    this.conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    this.conf.set("fs.default.name", fs.getUri().toString());
    this.dfsCluster.waitClusterUp();

    // Start up a zk cluster.
    if (this.zkCluster == null) {
        startMiniZKCluster(this.clusterTestBuildDir);
    }/* w  w w.  jav a 2s.c om*/

    // Now do the mini hbase cluster.  Set the hbase.rootdir in config.
    Path hbaseRootdir = fs.makeQualified(fs.getHomeDirectory());
    this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);
    FSUtils.setVersion(fs, hbaseRootdir);
    Configuration c = new Configuration(this.conf);
    this.hbaseCluster = new MiniHBaseCluster(c, servers);
    // Don't leave here till we've done a successful scan of the .META.
    HTable t = new HTable(c, HConstants.META_TABLE_NAME);
    ResultScanner s = t.getScanner(new Scan());
    while (s.next() != null)
        continue;

    return this.hbaseCluster;
}