Example usage for org.apache.hadoop.conf Configuration getStrings

List of usage examples for org.apache.hadoop.conf Configuration getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStrings.

Prototype

public String[] getStrings(String name) 

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:com.datascience.hadoop.CsvInputFormat.java

License:Apache License

/**
 * Creates a CSV format from a Hadoop configuration.
 *//*  ww w . ja va2  s. com*/
private static CSVFormat createFormat(Configuration conf) {
    CSVFormat format = CSVFormat
            .newFormat(conf.get(CSV_READER_DELIMITER, DEFAULT_CSV_READER_DELIMITER).charAt(0))
            .withSkipHeaderRecord(conf.getBoolean(CSV_READER_SKIP_HEADER, DEFAULT_CSV_READER_SKIP_HEADER))
            .withRecordSeparator(conf.get(CSV_READER_RECORD_SEPARATOR, DEFAULT_CSV_READER_RECORD_SEPARATOR))
            .withIgnoreEmptyLines(
                    conf.getBoolean(CSV_READER_IGNORE_EMPTY_LINES, DEFAULT_CSV_READER_IGNORE_EMPTY_LINES))
            .withIgnoreSurroundingSpaces(conf.getBoolean(CSV_READER_IGNORE_SURROUNDING_SPACES,
                    DEFAULT_CSV_READER_IGNORE_SURROUNDING_SPACES))
            .withNullString(conf.get(CSV_READER_NULL_STRING, DEFAULT_CSV_READER_NULL_STRING));

    String[] header = conf.getStrings(CSV_READER_COLUMNS);
    if (header != null && header.length > 0)
        format = format.withHeader(header);

    String escape = conf.get(CSV_READER_ESCAPE_CHARACTER, DEFAULT_CSV_READER_ESCAPE_CHARACTER);
    if (escape != null)
        format = format.withEscape(escape.charAt(0));

    String quote = conf.get(CSV_READER_QUOTE_CHARACTER, DEFAULT_CSV_READER_QUOTE_CHARACTER);
    if (quote != null)
        format = format.withQuote(quote.charAt(0));

    String quoteMode = conf.get(CSV_READER_QUOTE_MODE, DEFAULT_CSV_READER_QUOTE_MODE);
    if (quoteMode != null)
        format = format.withQuoteMode(QuoteMode.valueOf(quoteMode));
    return format;
}

From source file:com.datascience.hadoop.CsvOutputFormat.java

License:Apache License

/**
 * Creates a CSV format from a Hadoop configuration.
 *///from w ww .  j a v a2s .c o m
private static CSVFormat createFormat(Configuration conf) {
    CSVFormat format = CSVFormat
            .newFormat(conf.get(CSV_WRITER_DELIMITER, DEFAULT_CSV_WRITER_DELIMITER).charAt(0))
            .withSkipHeaderRecord(conf.getBoolean(CSV_WRITER_SKIP_HEADER, DEFAULT_CSV_WRITER_SKIP_HEADER))
            .withRecordSeparator(conf.get(CSV_WRITER_RECORD_SEPARATOR, DEFAULT_CSV_WRITER_RECORD_SEPARATOR))
            .withIgnoreEmptyLines(
                    conf.getBoolean(CSV_WRITER_IGNORE_EMPTY_LINES, DEFAULT_CSV_WRITER_IGNORE_EMPTY_LINES))
            .withIgnoreSurroundingSpaces(conf.getBoolean(CSV_WRITER_IGNORE_SURROUNDING_SPACES,
                    DEFAULT_CSV_WRITER_IGNORE_SURROUNDING_SPACES))
            .withNullString(conf.get(CSV_WRITER_NULL_STRING, DEFAULT_CSV_WRITER_NULL_STRING));

    String[] header = conf.getStrings(CSV_WRITER_COLUMNS);
    if (header != null && header.length > 0)
        format = format.withHeader(header);

    String escape = conf.get(CSV_WRITER_ESCAPE_CHARACTER, DEFAULT_CSV_WRITER_ESCAPE_CHARACTER);
    if (escape != null)
        format = format.withEscape(escape.charAt(0));

    String quote = conf.get(CSV_WRITER_QUOTE_CHARACTER, DEFAULT_CSV_WRITER_QUOTE_CHARACTER);
    if (quote != null)
        format = format.withQuote(quote.charAt(0));

    String quoteMode = conf.get(CSV_WRITER_QUOTE_MODE, DEFAULT_CSV_WRITER_QUOTE_MODE);
    if (quoteMode != null)
        format = format.withQuoteMode(QuoteMode.valueOf(quoteMode));
    return format;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

public static Path[] getModelPaths(Configuration conf) {
    String[] modelPathNames = conf.getStrings(MODEL_PATHS);
    if (modelPathNames == null || modelPathNames.length == 0) {
        return null;
    }//from   w w w  .  j  a  v  a2s.c  o  m
    Path[] modelPaths = new Path[modelPathNames.length];
    for (int i = 0; i < modelPathNames.length; i++) {
        modelPaths[i] = new Path(modelPathNames[i]);
    }
    return modelPaths;
}

From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public static Class transferConfiguration(JobContext context, JobRunParameterInterface inter) {
    Configuration conf = context.getConfiguration();
    String[] strings = conf.getStrings(MRHBasePluginRunner.EXT_PARAMETERS);
    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String mapParameter = strings[SETTINGS_MAP];
    if (mapParameter != null && !mapParameter.isEmpty()) {
        Map<String, String> settingsMap = (Map<String, String>) ((Object[]) SerializationUtils
                .deserialize(Base64.decodeBase64(mapParameter)))[EXTERNAL_PARAMETERS];
        if (settingsMap != null) {
            Logger.getLogger(FeatureSetCountPlugin.class.getName())
                    .info("Settings map retrieved with " + settingsMap.size() + " entries");
            Constants.setSETTINGS_MAP(settingsMap);
        }//from  w  ww  . j  a  v a  2s .  com
    }

    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String externalParameters = strings[EXTERNAL_PARAMETERS];
    if (externalParameters != null && !externalParameters.isEmpty()) {
        inter.setExt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(externalParameters)));
    }
    final String internalParameters = strings[INTERNAL_PARAMETERS];
    if (internalParameters != null && !internalParameters.isEmpty()) {
        inter.setInt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(internalParameters)));
    }
    final String sourceSets = strings[NUM_AND_SOURCE_FEATURE_SETS];
    if (sourceSets != null && !sourceSets.isEmpty()) {
        List<FeatureSet> sSets = convertBase64StrToFeatureSets(sourceSets);
        inter.setSourceSets(sSets);
    }
    final String destSetParameter = strings[DESTINATION_FEATURE_SET];
    if (destSetParameter != null && !destSetParameter.isEmpty()) {
        inter.setDestSet(SWQEFactory.getSerialization().deserialize(Base64.decodeBase64(destSetParameter),
                FeatureSet.class));
    }
    final String pluginParameter = strings[PLUGIN_CLASS];
    if (pluginParameter != null && !pluginParameter.isEmpty()) {
        Object deserialize = SerializationUtils.deserialize(Base64.decodeBase64(pluginParameter));
        Class plugin = (Class) deserialize;
        return plugin;
    }
    throw new RuntimeException("Could not determine plugin to run");
}

From source file:com.google.cloud.bigtable.mapreduce.Import.java

License:Open Source License

/**
 * Create a {@link Filter} to apply to all incoming keys ({@link KeyValue KeyValues}) to
 * optionally not include in the job output
 * @param conf {@link Configuration} from which to load the filter
 * @return the filter to use for the task, or <tt>null</tt> if no filter to should be used
 * @throws IllegalArgumentException if the filter is misconfigured
 *///from   w w  w  .  j av a2s . c om
public static Filter instantiateFilter(Configuration conf) {
    // get the filter, if it was configured    
    Class<? extends Filter> filterClass = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
    if (filterClass == null) {
        LOG.debug("No configured filter class, accepting all keyvalues.");
        return null;
    }
    LOG.debug("Attempting to create filter:" + filterClass);
    String[] filterArgs = conf.getStrings(FILTER_ARGS_CONF_KEY);
    ArrayList<byte[]> quotedArgs = toQuotedByteArrays(filterArgs);
    try {
        Method m = filterClass.getMethod("createFilterFromArguments", ArrayList.class);
        return (Filter) m.invoke(null, quotedArgs);
    } catch (IllegalAccessException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (SecurityException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (NoSuchMethodException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (IllegalArgumentException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (InvocationTargetException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    }
}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

private static void createTable(Configuration conf, String tableName) throws IOException {
    HTableDescriptor htd = new HTableDescriptor(tableName.getBytes());
    DotUtil.prepareDotTable(conf, htd);/*from  w w  w  . ja  va  2  s .co m*/

    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
    Map<byte[], Map<byte[], JSONObject>> schemas = null;
    schemas = DotUtil.genSchema(columns, htd);
    Set<String> cfSet = new HashSet<String>();
    for (String aColumn : columns) {
        if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn))
            continue;
        // we are only concerned with the first one (in case this is a cf:cq)
        cfSet.add(aColumn.split(":", 2)[0]);
    }
    for (String cf : cfSet) {
        HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
        DotUtil.prepareDotColumn(conf, hcd, htd, schemas);
        htd.addFamily(hcd);
    }
    hbaseAdmin.createTable(htd);
}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

/**
 * Main entry point.//  www. j ava  2 s .c o  m
 *
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }

    // Make sure columns are specified
    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
        System.exit(-1);
    }

    // Make sure they specify exactly one column as the row key
    int rowkeysFound = 0;
    for (String col : columns) {
        if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC))
            rowkeysFound++;
    }
    if (rowkeysFound != 1) {
        usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
        System.exit(-1);
    }

    // Make sure one or more columns are specified
    if (columns.length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    hbaseAdmin = new HBaseAdmin(conf);
    Job job = createSubmittableJob(conf, otherArgs);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseTest.java

License:Apache License

private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
    String[] finalSerializations = new String[currentSerializations.length + 1];
    System.arraycopy(currentSerializations, 0, finalSerializations, 0, currentSerializations.length);
    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);

    AvroSerialization.addToConfiguration(conf);
    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
}

From source file:com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnNoTransformationTest.java

License:Apache License

private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
    String[] finalSerializations = new String[currentSerializations.length + 1];
    System.arraycopy(currentSerializations, 0, finalSerializations, 0, currentSerializations.length);
    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);

    AvroSerialization.addToConfiguration(conf);
    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));

}

From source file:com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnNoTransformationTest.java

License:Apache License

private void resetAvroSerialization() throws IOException {
    Configuration conf = mapDriver.getConfiguration();
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    Schema outputSchema = new Schema.Parser()
            .parse(ClassLoader.getSystemResourceAsStream(NO_TRANSFORMATION_SCHEMA));

    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
    String[] finalSerializations = new String[currentSerializations.length + 1];
    System.arraycopy(currentSerializations, 0, finalSerializations, 0, currentSerializations.length);
    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);

    AvroSerialization.addToConfiguration(conf);
    AvroSerialization.setKeyWriterSchema(conf, outputSchema);
    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));

}