public String get(String name) 

Get the value of the name property, null if no such property exists.


From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static boolean inputIsBam(Configuration conf) {
    String s = conf.get(inputIsBam);
    if (s.equalsIgnoreCase("true"))
        return true;
    else
        return false;

From source file:bulkload.ImportTsv.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        usage("Wrong number of arguments: " + args.length);
        return -1;
    }
    Configuration conf = getConf();
    // Make sure columns are specified
    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
        return -1;
    // Make sure rowkey is specified
    String rowkey = conf.get(ROWKEY_CONF_KEY);
    if (StringUtil.isEmpty(rowkey)) {
        usage("No rowkey specified or rowkey is empty. Please specify with -D" + ROWKEY_CONF_KEY + "=...");
        return -1;
    // Make sure rowkey handler is specified
    String rowKeyGenerator = conf.get(ROWKEY_GENERATOR_CONF_KEY);
    if (StringUtil.isEmpty(rowKeyGenerator)) {
        usage("No rowkey_handler specified or rowkey generator is empty. Please specify with -D"
                + ROWKEY_GENERATOR_CONF_KEY + "=...");
        return -1;
    // Make sure they specify exactly one column as the row key
    int rowkeysFound = 0;
    for (String col : columns) {
        String[] parts = col.split(":", 3);
        if (parts.length > 1 && rowkey.equals(parts[1])) {
    if (rowkeysFound != 1) {
        usage("Must specify exactly one column as " + rowkey);
        return -1;
    // Make sure at least one columns are specified
    if (columns.length < 1) {
        usage("One or more columns in addition to the row key are required");

    Job job = createSubmittableJob(conf, args);
    return job.waitForCompletion(true) ? 0 : 1;

From source file:bulkload.ImportTsv.java

License:Apache License

 * Sets up the actual job.
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {

    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            TableName tableName = TableName.valueOf(args[0]);
            if (!admin.tableExists(tableName)) {
                String errorMsg = format("Table '%s' does not exist.", tableName);
                throw new TableNotFoundException(errorMsg);
            Path inputDir = new Path(args[1]);
            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            FileInputFormat.setInputPaths(job, inputDir);

            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            if (hfileOutPath != null) {
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    Path outputDir = new Path(hfileOutPath);
                    FileSystem fs = FileSystem.get(conf);
                    if (fs.exists(outputDir)) {
                        if (!fs.delete(outputDir, true)) {
                            throw new IllegalStateException("delete path:" + outputDir + " failed");
                    FileOutputFormat.setOutputPath(job, outputDir);
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
            } else {
                // No reducers. Just write straight to table. Call
                // initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);

                //               TableMapReduceUtil.addDependencyJars(job);
                //               TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                //                     com.google.common.base.Function.class /* Guava used by TsvParser */);

            // Workaround to remove unnecessary hadoop dependencies
            String[] jars = job.getConfiguration().get("tmpjars").split(",", -1);
            StringBuilder filteredJars = new StringBuilder();
            for (String j : jars) {
                String[] parts = j.split("/", -1);
                String fileName = parts[parts.length - 1];
                if (fileName.indexOf("hadoop-") != 0) {
            job.getConfiguration().set("tmpjars", filteredJars.toString());

    return job;

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

private List<InputSplit> getInputSplits(JobContext jobContext, String inputFormatClass, Path path)
        throws ClassNotFoundException, IOException {
    Configuration conf = jobContext.getConfiguration();
    FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(Class.forName(inputFormatClass),
            conf);

    // Set the input path for the left data set
    path = path.getFileSystem(conf).makeQualified(path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get(INPUT_DIR);
    conf.set(INPUT_DIR, dirStr);
    return inputFormat.getSplits(jobContext);

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

public List<InputSplit> getSplits(JobContext job) throws IOException {

    try {
        // Get the input splits from both the left and right data sets
        Configuration conf = job.getConfiguration();
        List<InputSplit> leftSplits = getInputSplits(job, conf.get(LEFT_INPUT_FORMAT),
                new Path(conf.get(LEFT_INPUT_PATH)));
        List<InputSplit> rightSplits = getInputSplits(job, conf.get(RIGHT_INPUT_FORMAT),
                new Path(conf.get(RIGHT_INPUT_PATH)));

        // Create our CompositeInputSplits, size equal to left.length *
        // right.length
        List<InputSplit> compoisteInputSplits = new ArrayList<InputSplit>();

        // For each of the left input splits
        for (InputSplit left : leftSplits) {
            // For each of the right input splits
            for (InputSplit right : rightSplits) {
                // Create a new composite input split composing of the
                // two
                CompositeInputSplit returnSplits = new CompositeInputSplit(2);

        // Return the composite splits
        LOG.info("Total CompositeSplits to process: " + compoisteInputSplits.size());
        return compoisteInputSplits;
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    } catch (InterruptedException e) {
        throw new IOException(e);

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianRecordReader.java

License:Open Source License

public CartesianRecordReader(CompositeInputSplit split, TaskAttemptContext taskAttemptContext)
        throws IOException {

    this.leftIS = split.get(0);
    this.rightIS = split.get(1);
    this.rightTaskAttemptContext = taskAttemptContext;
    this.key = new Text();
    this.value = new Text();
    Configuration conf = rightTaskAttemptContext.getConfiguration();
    try {
        // Create left record reader
        FileInputFormat leftFIF = (FileInputFormat) ReflectionUtils
                .newInstance(Class.forName(conf.get(CartesianInputFormat.LEFT_INPUT_FORMAT)), conf);

        leftRR = leftFIF.createRecordReader(leftIS, taskAttemptContext);

        // Create right record reader
        rightFIF = (FileInputFormat) ReflectionUtils
                .newInstance(Class.forName(conf.get(CartesianInputFormat.RIGHT_INPUT_FORMAT)), conf);
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    } catch (InterruptedException e) {
        throw new IOException(e);

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static <T> ObjectSerializer instantiateSerializer(Configuration conf, Class<T> type)
        throws ClassNotFoundException {
    Class<ObjectSerializer> flowSerializerClass;

    String serializerClassName = conf.get(ObjectSerializer.OBJECT_SERIALIZER_PROPERTY);

    if (serializerClassName == null || serializerClassName.length() == 0)
        flowSerializerClass = (Class<ObjectSerializer>) DEFAULT_OBJECT_SERIALIZER;
    else
        flowSerializerClass = (Class<ObjectSerializer>) Class.forName(serializerClassName);

    ObjectSerializer objectSerializer;

    try {
        objectSerializer = flowSerializerClass.newInstance();

        if (objectSerializer instanceof Configurable)
            ((Configurable) objectSerializer).setConf(conf);
    } catch (Exception exception) {
        throw new IllegalArgumentException("Unable to instantiate serializer \"" + flowSerializerClass.getName()
                + "\" for class: " + type.getName());

    if (!objectSerializer.accepts(type))
        throw new IllegalArgumentException(
                serializerClassName + " won't accept objects of class " + type.toString());

    return objectSerializer;

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static boolean isLocal(Configuration conf) {
    // hadoop 1.0 and 2.0 use different properties to define local mode: we check the new YARN
    // property first
    String frameworkName = conf.get("mapreduce.framework.name");

    // we are running on hadoop 2.0 (YARN)
    if (frameworkName != null)
        return frameworkName.equals("local");

    // for Tez
    String tezLocal = conf.get("tez.local.mode");

    if (tezLocal != null)
        return tezLocal.equals("true");

    // hadoop 1.0: use the old property to determine the local mode
    return conf.get("mapred.job.tracker").equals("local");

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static boolean isYARN(Configuration conf) {
    return conf.get("mapreduce.framework.name") != null;

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static void addInputPath(Configuration conf, Path path) {
    Path workingDirectory = getWorkingDirectory(conf);
    path = new Path(workingDirectory, path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get("mapred.input.dir");
    conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr);