Example usage for org.apache.hadoop.conf Configuration getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStrings.

Prototype

public String[] getStrings(String name)

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:com.datascience.hadoop.CsvInputFormat.java

License:Apache License

/**
 * Creates a CSV format from a Hadoop configuration.
 *//*  ww w . ja va2  s. com*/
private static CSVFormat createFormat(Configuration conf) {
    CSVFormat format = CSVFormat
            .newFormat(conf.get(CSV_READER_DELIMITER, DEFAULT_CSV_READER_DELIMITER).charAt(0))
            .withSkipHeaderRecord(conf.getBoolean(CSV_READER_SKIP_HEADER, DEFAULT_CSV_READER_SKIP_HEADER))
            .withRecordSeparator(conf.get(CSV_READER_RECORD_SEPARATOR, DEFAULT_CSV_READER_RECORD_SEPARATOR))
            .withIgnoreEmptyLines(
                    conf.getBoolean(CSV_READER_IGNORE_EMPTY_LINES, DEFAULT_CSV_READER_IGNORE_EMPTY_LINES))
            .withIgnoreSurroundingSpaces(conf.getBoolean(CSV_READER_IGNORE_SURROUNDING_SPACES,
                    DEFAULT_CSV_READER_IGNORE_SURROUNDING_SPACES))
            .withNullString(conf.get(CSV_READER_NULL_STRING, DEFAULT_CSV_READER_NULL_STRING));

    String[] header = conf.getStrings(CSV_READER_COLUMNS);
    if (header != null && header.length > 0)
        format = format.withHeader(header);

    String escape = conf.get(CSV_READER_ESCAPE_CHARACTER, DEFAULT_CSV_READER_ESCAPE_CHARACTER);
    if (escape != null)
        format = format.withEscape(escape.charAt(0));

    String quote = conf.get(CSV_READER_QUOTE_CHARACTER, DEFAULT_CSV_READER_QUOTE_CHARACTER);
    if (quote != null)
        format = format.withQuote(quote.charAt(0));

    String quoteMode = conf.get(CSV_READER_QUOTE_MODE, DEFAULT_CSV_READER_QUOTE_MODE);
    if (quoteMode != null)
        format = format.withQuoteMode(QuoteMode.valueOf(quoteMode));
    return format;
}

From source file:com.datascience.hadoop.CsvOutputFormat.java

License:Apache License

/**
 * Creates a CSV format from a Hadoop configuration.
 *///from w ww .  j a v a2s .c o m
private static CSVFormat createFormat(Configuration conf) {
    CSVFormat format = CSVFormat
            .newFormat(conf.get(CSV_WRITER_DELIMITER, DEFAULT_CSV_WRITER_DELIMITER).charAt(0))
            .withSkipHeaderRecord(conf.getBoolean(CSV_WRITER_SKIP_HEADER, DEFAULT_CSV_WRITER_SKIP_HEADER))
            .withRecordSeparator(conf.get(CSV_WRITER_RECORD_SEPARATOR, DEFAULT_CSV_WRITER_RECORD_SEPARATOR))
            .withIgnoreEmptyLines(
                    conf.getBoolean(CSV_WRITER_IGNORE_EMPTY_LINES, DEFAULT_CSV_WRITER_IGNORE_EMPTY_LINES))
            .withIgnoreSurroundingSpaces(conf.getBoolean(CSV_WRITER_IGNORE_SURROUNDING_SPACES,
                    DEFAULT_CSV_WRITER_IGNORE_SURROUNDING_SPACES))
            .withNullString(conf.get(CSV_WRITER_NULL_STRING, DEFAULT_CSV_WRITER_NULL_STRING));

    String[] header = conf.getStrings(CSV_WRITER_COLUMNS);
    if (header != null && header.length > 0)
        format = format.withHeader(header);

    String escape = conf.get(CSV_WRITER_ESCAPE_CHARACTER, DEFAULT_CSV_WRITER_ESCAPE_CHARACTER);
    if (escape != null)
        format = format.withEscape(escape.charAt(0));

    String quote = conf.get(CSV_WRITER_QUOTE_CHARACTER, DEFAULT_CSV_WRITER_QUOTE_CHARACTER);
    if (quote != null)
        format = format.withQuote(quote.charAt(0));

    String quoteMode = conf.get(CSV_WRITER_QUOTE_MODE, DEFAULT_CSV_WRITER_QUOTE_MODE);
    if (quoteMode != null)
        format = format.withQuoteMode(QuoteMode.valueOf(quoteMode));
    return format;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

public static Path[] getModelPaths(Configuration conf) {
    String[] modelPathNames = conf.getStrings(MODEL_PATHS);
    if (modelPathNames == null || modelPathNames.length == 0) {
        return null;
    }//from   w w w  .  j  a  v  a2s.c  o  m
    Path[] modelPaths = new Path[modelPathNames.length];
    for (int i = 0; i < modelPathNames.length; i++) {
        modelPaths[i] = new Path(modelPathNames[i]);
    }
    return modelPaths;
}

From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public static Class transferConfiguration(JobContext context, JobRunParameterInterface inter) {
    Configuration conf = context.getConfiguration();
    String[] strings = conf.getStrings(MRHBasePluginRunner.EXT_PARAMETERS);
    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String mapParameter = strings[SETTINGS_MAP];
    if (mapParameter != null && !mapParameter.isEmpty()) {
        Map<String, String> settingsMap = (Map<String, String>) ((Object[]) SerializationUtils
                .deserialize(Base64.decodeBase64(mapParameter)))[EXTERNAL_PARAMETERS];
        if (settingsMap != null) {
            Logger.getLogger(FeatureSetCountPlugin.class.getName())
                    .info("Settings map retrieved with " + settingsMap.size() + " entries");
            Constants.setSETTINGS_MAP(settingsMap);
        }//from  w  ww  . j  a  v a  2s .  com
    }

    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String externalParameters = strings[EXTERNAL_PARAMETERS];
    if (externalParameters != null && !externalParameters.isEmpty()) {
        inter.setExt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(externalParameters)));
    }
    final String internalParameters = strings[INTERNAL_PARAMETERS];
    if (internalParameters != null && !internalParameters.isEmpty()) {
        inter.setInt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(internalParameters)));
    }
    final String sourceSets = strings[NUM_AND_SOURCE_FEATURE_SETS];
    if (sourceSets != null && !sourceSets.isEmpty()) {
        List<FeatureSet> sSets = convertBase64StrToFeatureSets(sourceSets);
        inter.setSourceSets(sSets);
    }
    final String destSetParameter = strings[DESTINATION_FEATURE_SET];
    if (destSetParameter != null && !destSetParameter.isEmpty()) {
        inter.setDestSet(SWQEFactory.getSerialization().deserialize(Base64.decodeBase64(destSetParameter),
                FeatureSet.class));
    }
    final String pluginParameter = strings[PLUGIN_CLASS];
    if (pluginParameter != null && !pluginParameter.isEmpty()) {
        Object deserialize = SerializationUtils.deserialize(Base64.decodeBase64(pluginParameter));
        Class plugin = (Class) deserialize;
        return plugin;
    }
    throw new RuntimeException("Could not determine plugin to run");
}

From source file:com.google.cloud.bigtable.mapreduce.Import.java

License:Open Source License

/**
 * Create a {@link Filter} to apply to all incoming keys ({@link KeyValue KeyValues}) to
 * optionally not include in the job output
 * @param conf {@link Configuration} from which to load the filter
 * @return the filter to use for the task, or <tt>null</tt> if no filter to should be used
 * @throws IllegalArgumentException if the filter is misconfigured
 *///from   w w  w  .  j av a2s . c om
public static Filter instantiateFilter(Configuration conf) {
    // get the filter, if it was configured    
    Class<? extends Filter> filterClass = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
    if (filterClass == null) {
        LOG.debug("No configured filter class, accepting all keyvalues.");
        return null;
    }
    LOG.debug("Attempting to create filter:" + filterClass);
    String[] filterArgs = conf.getStrings(FILTER_ARGS_CONF_KEY);
    ArrayList<byte[]> quotedArgs = toQuotedByteArrays(filterArgs);
    try {
        Method m = filterClass.getMethod("createFilterFromArguments", ArrayList.class);
        return (Filter) m.invoke(null, quotedArgs);
    } catch (IllegalAccessException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (SecurityException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (NoSuchMethodException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (IllegalArgumentException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    } catch (InvocationTargetException e) {
        LOG.error("Couldn't instantiate filter!", e);
        throw new RuntimeException(e);
    }
}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

private static void createTable(Configuration conf, String tableName) throws IOException {
    HTableDescriptor htd = new HTableDescriptor(tableName.getBytes());
    DotUtil.prepareDotTable(conf, htd);/*from  w w  w  . ja  va  2  s .co m*/

    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
    Map<byte[], Map<byte[], JSONObject>> schemas = null;
    schemas = DotUtil.genSchema(columns, htd);
    Set<String> cfSet = new HashSet<String>();
    for (String aColumn : columns) {
        if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn))
            continue;
        // we are only concerned with the first one (in case this is a cf:cq)
        cfSet.add(aColumn.split(":", 2)[0]);
    }
    for (String cf : cfSet) {
        HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
        DotUtil.prepareDotColumn(conf, hcd, htd, schemas);
        htd.addFamily(hcd);
    }
    hbaseAdmin.createTable(htd);
}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

/**
 * Main entry point.//  www. j ava  2 s .c o  m
 *
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }

    // Make sure columns are specified
    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
        System.exit(-1);
    }

    // Make sure they specify exactly one column as the row key
    int rowkeysFound = 0;
    for (String col : columns) {
        if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC))
            rowkeysFound++;
    }
    if (rowkeysFound != 1) {
        usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
        System.exit(-1);
    }

    // Make sure one or more columns are specified
    if (columns.length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    hbaseAdmin = new HBaseAdmin(conf);
    Job job = createSubmittableJob(conf, otherArgs);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseTest.java

License:Apache License

private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
    String[] finalSerializations = new String[currentSerializations.length + 1];
    System.arraycopy(currentSerializations, 0, finalSerializations, 0, currentSerializations.length);
    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);

    AvroSerialization.addToConfiguration(conf);
    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
}

From source file:com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnNoTransformationTest.java

License:Apache License

private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
    String[] finalSerializations = new String[currentSerializations.length + 1];
    System.arraycopy(currentSerializations, 0, finalSerializations, 0, currentSerializations.length);
    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);

    AvroSerialization.addToConfiguration(conf);
    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));

}

From source file:com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnNoTransformationTest.java

License:Apache License

private void resetAvroSerialization() throws IOException {
    Configuration conf = mapDriver.getConfiguration();
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    Schema outputSchema = new Schema.Parser()
            .parse(ClassLoader.getSystemResourceAsStream(NO_TRANSFORMATION_SCHEMA));

    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
    String[] finalSerializations = new String[currentSerializations.length + 1];
    System.arraycopy(currentSerializations, 0, finalSerializations, 0, currentSerializations.length);
    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);

    AvroSerialization.addToConfiguration(conf);
    AvroSerialization.setKeyWriterSchema(conf, outputSchema);
    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));

}