Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.kitesdk.cli.commands.InputFormatImportCommand.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DM_CONVERT_CASE", justification = "For record types only")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "Data path and target dataset are required.");

    Path source = qualifiedPath(targets.get(0));
    FileSystem sourceFS = source.getFileSystem(getConf());
    Preconditions.checkArgument(sourceFS.exists(source), "Data path does not exist: " + source);

    // this throws IllegalArgumentException if the type is invalid.
    InputFormatUtil.RecordType.valueOf(recordType.trim().toUpperCase());

    String dataset = targets.get(1);

    View<Object> target = load(dataset, Object.class);
    Schema schema = target.getDataset().getDescriptor().getSchema();

    // Build a dataset around the incoming data
    DatasetDescriptor.Builder descriptorBuilder = new DatasetDescriptor.Builder().location(source.toUri())
            .property(InputFormatUtil.INPUT_FORMAT_CLASS_PROP, inFormatClass)
            .property(InputFormatUtil.INPUT_FORMAT_RECORD_PROP, recordType).format(Formats.INPUTFORMAT)
            .schema(ColumnMappingParser//  w w w. java 2  s.  c o m
                    .removeEmbeddedMapping(PartitionStrategyParser.removeEmbeddedStrategy(schema)));

    for (Map.Entry<String, String> prop : properties.entrySet()) {
        descriptorBuilder.property(prop.getKey(), prop.getValue());
    }

    DatasetDescriptor inDescriptor = descriptorBuilder.build();

    TemporaryFileSystemDatasetRepository repo = new TemporaryFileSystemDatasetRepository(getConf(),
            // ensure the same FS as the file source is used
            sourceFS.makeQualified(new Path("/tmp")), target.getDataset().getNamespace(),
            UUID.randomUUID().toString());

    ClassLoader loader = loaderForJars(jars);

    try {
        FileSystemDataset<Object> inDataset = (FileSystemDataset) repo.create("import", "inputformat",
                inDescriptor);
        Iterator<Path> iter = inDataset.pathIterator().iterator();
        Preconditions.checkArgument(iter.hasNext(), "Data path has no data files: " + source);

        TaskUtil.configure(getConf()).addJars(jars);

        TransformTask task;
        if (transform != null) {
            DoFn<Object, Object> transformFn;
            try {
                DynConstructors.Ctor<DoFn<Object, Object>> ctor = new DynConstructors.Builder(DoFn.class)
                        .loader(loader).impl(transform).buildChecked();
                transformFn = ctor.newInstance();
            } catch (NoSuchMethodException e) {
                throw new DatasetException("Cannot find no-arg constructor for class: " + transform, e);
            }
            task = new TransformTask<Object, Object>(inDataset, target, transformFn);
        } else {
            task = new CopyTask<Object>(inDataset, target);
        }

        task.setConf(getConf());

        if (noCompaction) {
            task.noCompaction();
        }

        if (numWriters >= 0) {
            task.setNumWriters(numWriters);
        }

        PipelineResult result;
        try {
            result = runTaskWithClassLoader(task, loader);
        } catch (InterruptedException e) {
            // didn't finish
            return 1;
        }

        if (result.succeeded()) {
            long count = task.getCount();
            if (count > 0) {
                console.info("Added {} records to \"{}\"", count, dataset);
            }
            return 0;
        } else {
            return 1;
        }
    } finally {
        repo.delete();
    }
}

From source file:org.kitesdk.cli.commands.JSONImportCommand.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "JSON path and target dataset name are required.");

    Path source = qualifiedPath(targets.get(0));
    FileSystem sourceFS = source.getFileSystem(getConf());
    Preconditions.checkArgument(sourceFS.exists(source), "JSON path does not exist: " + source);

    String dataset = targets.get(1);

    View<Record> target = load(dataset, Record.class);
    Schema datasetSchema = target.getDataset().getDescriptor().getSchema();

    DatasetDescriptor jsonDescriptor = new DatasetDescriptor.Builder().location(source.toUri())
            .schema(ColumnMappingParser/*from w w  w.  j  a v  a 2  s.  c  o  m*/
                    .removeEmbeddedMapping(PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
            .format("json").build();

    TemporaryFileSystemDatasetRepository repo = new TemporaryFileSystemDatasetRepository(getConf(),
            // ensure the same FS as the file source is used
            sourceFS.makeQualified(new Path("/tmp")), target.getDataset().getNamespace(),
            UUID.randomUUID().toString());

    try {
        FileSystemDataset<Record> jsonDataset = (FileSystemDataset) repo.create("import", "json",
                jsonDescriptor);

        Iterator<Path> iter = jsonDataset.pathIterator().iterator();
        Preconditions.checkArgument(iter.hasNext(), "JSON path has no data files: " + source);

        TaskUtil.configure(getConf()).addJars(jars);

        TransformTask task;
        if (transform != null) {
            DoFn<Record, Record> transformFn;
            try {
                DynConstructors.Ctor<DoFn<Record, Record>> ctor = new DynConstructors.Builder(DoFn.class)
                        .loader(loaderForJars(jars)).impl(transform).buildChecked();
                transformFn = ctor.newInstance();
            } catch (NoSuchMethodException e) {
                throw new DatasetException("Cannot find no-arg constructor for class: " + transform, e);
            }
            task = new TransformTask<Record, Record>(jsonDataset, target, transformFn);
        } else {
            task = new CopyTask<Record>(jsonDataset, target);
        }

        task.setConf(getConf());

        if (noCompaction) {
            task.noCompaction();
        }

        if (numWriters >= 0) {
            task.setNumWriters(numWriters);
        }

        PipelineResult result = task.run();

        if (result.succeeded()) {
            long count = task.getCount();
            if (count > 0) {
                console.info("Added {} records to \"{}\"", count, dataset);
            }
            return 0;
        } else {
            return 1;
        }
    } finally {
        // clean up the temporary repository
        repo.delete();
    }
}

From source file:org.kitesdk.data.hcatalog.HiveUtils.java

License:Apache License

static DatasetDescriptor descriptorForTable(Configuration conf, Table table) {
    final DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder();

    final String serializationLib = table.getSerializationLib();
    if (SERDE_TO_FORMAT.containsKey(serializationLib)) {
        builder.format(SERDE_TO_FORMAT.get(serializationLib));
    } else {//  ww w  .j av  a 2s .c om
        // TODO: should this use an "unknown" format? others fail in open()
        throw new UnknownFormatException("Unknown format for serde:" + serializationLib);
    }

    final Path dataLocation = table.getPath();
    final FileSystem fs = fsForPath(conf, dataLocation);

    builder.location(fs.makeQualified(dataLocation));

    // custom properties
    String namesProperty = coalesce(table.getProperty(CUSTOM_PROPERTIES_PROPERTY_NAME),
            table.getProperty(OLD_CUSTOM_PROPERTIES_PROPERTY_NAME));
    if (namesProperty != null) {
        for (String property : NAME_SPLITTER.split(namesProperty)) {
            builder.property(property, table.getProperty(property));
        }
    }

    if (table.isPartitioned()) {
        String partitionProperty = coalesce(table.getProperty(PARTITION_EXPRESSION_PROPERTY_NAME),
                table.getProperty(OLD_PARTITION_EXPRESSION_PROPERTY_NAME));
        if (partitionProperty != null) {
            builder.partitionStrategy(Accessor.getDefault().fromExpression(partitionProperty));
        } else {
            // build a partition strategy for the table from the Hive strategy
            builder.partitionStrategy(fromPartitionColumns(table.getPartCols()));
        }
    }

    String schemaUrlString = table.getProperty(AVRO_SCHEMA_URL_PROPERTY_NAME);
    if (schemaUrlString != null) {
        try {
            // URI.create is safe because this library wrote the URI
            builder.schemaUri(URI.create(schemaUrlString));
        } catch (IOException e) {
            throw new DatasetIOException("Could not read schema", e);
        }
    }

    String schemaLiteral = table.getProperty(AVRO_SCHEMA_LITERAL_PROPERTY_NAME);
    if (schemaLiteral != null) {
        builder.schemaLiteral(schemaLiteral);
    }

    try {
        return builder.build();
    } catch (IllegalStateException ex) {
        throw new DatasetException("Cannot find schema: missing metadata");
    }
}

From source file:org.kitesdk.data.mapreduce.FileSystemTestBase.java

License:Apache License

@Before
public void setUp() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path testDirectory = fileSystem.makeQualified(new Path(Files.createTempDir().getAbsolutePath()));
    this.repo = new FileSystemDatasetRepository.Builder().configuration(conf).rootDirectory(testDirectory)
            .build();/*w  w w .j a  v a 2s.c  o  m*/
}

From source file:org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository.java

License:Apache License

/**
 * Get a {@link org.kitesdk.data.spi.PartitionKey} corresponding to a partition's filesystem path
 * represented as a {@link URI}. If the path is not a valid partition,
 * then {@link IllegalArgumentException} is thrown. Note that the partition does not
 * have to exist.//from w  w  w  . ja  v a  2 s  . c o  m
 * @param dataset the filesystem dataset
 * @param partitionPath a directory path where the partition data is stored
 * @return a partition key representing the partition at the given path
 * @since 0.4.0
 */
@SuppressWarnings({ "unchecked", "deprecation" })
public static PartitionKey partitionKeyForPath(Dataset dataset, URI partitionPath) {
    Preconditions.checkState(dataset.getDescriptor().isPartitioned(),
            "Attempt to get a partition on a non-partitioned dataset (name:%s)", dataset.getName());

    Preconditions.checkArgument(dataset instanceof FileSystemDataset, "Dataset is not a FileSystemDataset");
    FileSystemDataset fsDataset = (FileSystemDataset) dataset;

    FileSystem fs = fsDataset.getFileSystem();
    URI partitionUri = fs.makeQualified(new Path(partitionPath)).toUri();
    URI directoryUri = fsDataset.getDirectory().toUri();
    URI relativizedUri = directoryUri.relativize(partitionUri);

    if (relativizedUri.equals(partitionUri)) {
        throw new IllegalArgumentException(
                String.format("Partition URI %s has different " + "root directory to dataset (directory: %s).",
                        partitionUri, directoryUri));
    }

    Iterable<String> parts = Splitter.on('/').split(relativizedUri.getPath());

    PartitionStrategy partitionStrategy = dataset.getDescriptor().getPartitionStrategy();
    List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(partitionStrategy);
    if (Iterables.size(parts) > fieldPartitioners.size()) {
        throw new IllegalArgumentException(
                String.format("Too many partition directories " + "for %s (%s), expecting %s.", partitionUri,
                        Iterables.size(parts), fieldPartitioners.size()));
    }

    Schema schema = dataset.getDescriptor().getSchema();
    List<Object> values = Lists.newArrayList();
    int i = 0;
    for (String part : parts) {
        Iterator<String> split = Splitter.on('=').split(part).iterator();
        String fieldName = split.next();
        FieldPartitioner fp = fieldPartitioners.get(i++);
        if (!fieldName.equals(fp.getName())) {
            throw new IllegalArgumentException(
                    String.format("Unrecognized partition name " + "'%s' in partition %s, expecting '%s'.",
                            fieldName, partitionUri, fp.getName()));
        }
        if (!split.hasNext()) {
            throw new IllegalArgumentException(String
                    .format("Missing partition value for " + "'%s' in partition %s.", fieldName, partitionUri));
        }
        String stringValue = split.next();

        values.add(PathConversion.valueForDirname(fp, schema, stringValue));
    }
    return new PartitionKey(values.toArray(new Object[values.size()]));
}

From source file:org.kitesdk.data.spi.hive.HiveUtils.java

License:Apache License

static DatasetDescriptor descriptorForTable(Configuration conf, Table table) {
    final DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder();

    Format format;//from   w  w  w .j a v a 2  s  .c o m
    final String serializationLib = table.getSd().getSerdeInfo().getSerializationLib();
    if (SERDE_TO_FORMAT.containsKey(serializationLib)) {
        format = SERDE_TO_FORMAT.get(serializationLib);
        builder.format(format);
    } else {
        // TODO: should this use an "unknown" format? others fail in open()
        throw new UnknownFormatException("Unknown format for serde:" + serializationLib);
    }

    final Path dataLocation = new Path(table.getSd().getLocation());
    final FileSystem fs = fsForPath(conf, dataLocation);

    builder.location(fs.makeQualified(dataLocation));

    // custom properties
    Map<String, String> properties = table.getParameters();
    String namesProperty = coalesce(properties.get(CUSTOM_PROPERTIES_PROPERTY_NAME),
            properties.get(OLD_CUSTOM_PROPERTIES_PROPERTY_NAME));
    if (namesProperty != null) {
        for (String property : NAME_SPLITTER.split(namesProperty)) {
            builder.property(property, properties.get(property));
        }
    }

    PartitionStrategy partitionStrategy = null;
    if (isPartitioned(table)) {
        String partitionProperty = coalesce(properties.get(PARTITION_EXPRESSION_PROPERTY_NAME),
                properties.get(OLD_PARTITION_EXPRESSION_PROPERTY_NAME));
        if (partitionProperty != null) {
            partitionStrategy = Accessor.getDefault().fromExpression(partitionProperty);
        } else {
            // build a partition strategy for the table from the Hive strategy
            partitionStrategy = fromPartitionColumns(getPartCols(table));
        }
        builder.partitionStrategy(partitionStrategy);
    }

    String schemaUrlString = properties.get(AVRO_SCHEMA_URL_PROPERTY_NAME);
    if (schemaUrlString != null) {
        try {
            // URI.create is safe because this library wrote the URI
            builder.schemaUri(URI.create(schemaUrlString));
        } catch (IOException e) {
            throw new DatasetIOException("Could not read schema", e);
        }
    } else {
        String schemaLiteral = properties.get(AVRO_SCHEMA_LITERAL_PROPERTY_NAME);
        if (schemaLiteral != null) {
            builder.schemaLiteral(schemaLiteral);
        } else {
            builder.schema(HiveSchemaConverter.convertTable(table.getTableName(), table.getSd().getCols(),
                    partitionStrategy));
        }
    }

    String compressionType = properties.get(COMPRESSION_TYPE_PROPERTY_NAME);
    if (compressionType != null) {
        builder.compressionType(compressionType);
    }

    try {
        return builder.build();
    } catch (IllegalStateException ex) {
        throw new DatasetException("Cannot find schema: missing metadata");
    }
}

From source file:org.kitesdk.data.TestDatasetDescriptor.java

License:Apache License

@Test
public void testSchemaFromHdfs() throws IOException {
    MiniDFSTest.setupFS();//from   w  w  w.  j a v  a 2s.  c  o m
    FileSystem fs = MiniDFSTest.getDFS();

    // copy a schema to HDFS
    Path schemaPath = fs.makeQualified(new Path("schema.avsc"));
    FSDataOutputStream out = fs.create(schemaPath);
    IOUtils.copyBytes(DatasetTestUtilities.USER_SCHEMA_URL.toURL().openStream(), out, fs.getConf());
    out.close();

    // build a schema using the HDFS path and check it's the same
    Schema schema = new DatasetDescriptor.Builder().schemaUri(schemaPath.toUri()).build().getSchema();

    Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema);
    MiniDFSTest.teardownFS();
}

From source file:org.kitesdk.minicluster.HBaseService.java

License:Apache License

/**
 * Configure the HBase cluster before launching it
 * //from  w  w w  .j  ava 2 s  .co m
 * @param config
 *          already created Hadoop configuration we'll further configure for
 *          HDFS
 * @param zkClientPort
 *          The client port zookeeper is listening on
 * @param hdfsFs
 *          The HDFS FileSystem this HBase cluster will run on top of
 * @param bindIP
 *          The IP Address to force bind all sockets on. If null, will use
 *          defaults
 * @param masterPort
 *          The port the master listens on
 * @param regionserverPort
 *          The port the regionserver listens on
 * @return The updated Configuration object.
 * @throws IOException
 */
private static Configuration configureHBaseCluster(Configuration config, int zkClientPort, FileSystem hdfsFs,
        String bindIP, int masterPort, int regionserverPort) throws IOException {
    // Configure the zookeeper port
    config.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(zkClientPort));
    // Initialize HDFS path configurations required by HBase
    Path hbaseDir = new Path(hdfsFs.makeQualified(hdfsFs.getHomeDirectory()), "hbase");
    FSUtils.setRootDir(config, hbaseDir);
    hdfsFs.mkdirs(hbaseDir);
    config.set("fs.defaultFS", hdfsFs.getUri().toString());
    config.set("fs.default.name", hdfsFs.getUri().toString());
    FSUtils.setVersion(hdfsFs, hbaseDir);

    // Configure the bind addresses and ports. If running in Openshift, we only
    // have permission to bind to the private IP address, accessible through an
    // environment variable.
    logger.info("HBase force binding to ip: " + bindIP);
    config.set("hbase.master.ipc.address", bindIP);
    config.set(HConstants.MASTER_PORT, Integer.toString(masterPort));
    config.set("hbase.regionserver.ipc.address", bindIP);
    config.set(HConstants.REGIONSERVER_PORT, Integer.toString(regionserverPort));
    config.set(HConstants.ZOOKEEPER_QUORUM, bindIP);

    // By default, the HBase master and regionservers will report to zookeeper
    // that its hostname is what it determines by reverse DNS lookup, and not
    // what we use as the bind address. This means when we set the bind
    // address, daemons won't actually be able to connect to eachother if they
    // are different. Here, we do something that's illegal in 48 states - use
    // reflection to override a private static final field in the DNS class
    // that is a cachedHostname. This way, we are forcing the hostname that
    // reverse dns finds. This may not be compatible with newer versions of
    // Hadoop.
    try {
        Field cachedHostname = DNS.class.getDeclaredField("cachedHostname");
        cachedHostname.setAccessible(true);
        Field modifiersField = Field.class.getDeclaredField("modifiers");
        modifiersField.setAccessible(true);
        modifiersField.setInt(cachedHostname, cachedHostname.getModifiers() & ~Modifier.FINAL);
        cachedHostname.set(null, bindIP);
    } catch (Exception e) {
        // Reflection can throw so many checked exceptions. Let's wrap in an
        // IOException.
        throw new IOException(e);
    }

    // By setting the info ports to -1 for, we won't launch the master or
    // regionserver info web interfaces
    config.set(HConstants.MASTER_INFO_PORT, "-1");
    config.set(HConstants.REGIONSERVER_INFO_PORT, "-1");
    return config;
}

From source file:org.lilyproject.hadooptestfw.fork.HBaseTestingUtility.java

License:Apache License

/**
 * Creates an hbase rootdir in user home directory.  Also creates hbase
 * version file.  Normally you won't make use of this method.  Root hbasedir
 * is created for you as part of mini cluster startup.  You'd only use this
 * method if you were doing manual operation.
 *
 * @return Fully qualified path to hbase root dir
 *///from  w w  w .jav  a 2 s. c  om
public Path createRootDir() throws IOException {
    FileSystem fs = FileSystem.get(this.conf);
    // Lily change: create "hbase" subdirectory under home directory
    // to serve as hbaseRootdir. The home directory can contain other
    // directories and files, which are not necessarily hbase tables.
    // For instance a 'target' dir created by MiniMRCluster.
    // Cfr. HBASE-5317 and HBASE-4025
    Path hbaseRootdir = fs.makeQualified(new Path(fs.getHomeDirectory(), "hbase"));
    this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);
    FSUtils.setVersion(fs, hbaseRootdir);
    return hbaseRootdir;
}

From source file:org.lilyproject.testfw.HadoopLauncher.java

License:Apache License

public MiniHBaseCluster startMiniCluster(final int servers) throws Exception {
    // Make a new random dir to home everything in.  Set it as system property.
    // minidfs reads home from system property.
    this.clusterTestBuildDir = setupClusterTestBuildDir();
    System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.getPath());
    // Bring up mini dfs cluster. This spews a bunch of warnings about missing
    // scheme. Complaints are 'Scheme is undefined for build/test/data/dfs/name1'.
    startMiniDFSCluster(servers, this.clusterTestBuildDir);

    // Mangle conf so fs parameter points to minidfs we just started up
    FileSystem fs = this.dfsCluster.getFileSystem();
    this.conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    this.conf.set("fs.default.name", fs.getUri().toString());
    this.dfsCluster.waitClusterUp();

    // Start up a zk cluster.
    if (this.zkCluster == null) {
        startMiniZKCluster(this.clusterTestBuildDir);
    }/* w  w w.  jav a 2s.c om*/

    // Now do the mini hbase cluster.  Set the hbase.rootdir in config.
    Path hbaseRootdir = fs.makeQualified(fs.getHomeDirectory());
    this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);
    FSUtils.setVersion(fs, hbaseRootdir);
    Configuration c = new Configuration(this.conf);
    this.hbaseCluster = new MiniHBaseCluster(c, servers);
    // Don't leave here till we've done a successful scan of the .META.
    HTable t = new HTable(c, HConstants.META_TABLE_NAME);
    ResultScanner s = t.getScanner(new Scan());
    while (s.next() != null)
        continue;

    return this.hbaseCluster;
}