List of usage examples for org.apache.hadoop.conf Configuration getValByRegex
public Map<String, String> getValByRegex(String regex)
From source file:co.cask.cdap.common.conf.ConfigurationUtil.java
License:Apache License
/** * Retrieves all configurations that are prefixed with a particular prefix. * * @see {@link #setNamedConfigurations(Configuration, String, Map)}. * * @param conf the Configuration from which to get the configurations * @param confKeyPrefix the prefix to search for in the keys * @return a map of key-value pairs, representing the requested configurations, after removing the prefix *//* w w w. j ava2 s . c o m*/ public static Map<String, String> getNamedConfigurations(Configuration conf, String confKeyPrefix) { Map<String, String> namedConf = new HashMap<>(); int prefixLength = confKeyPrefix.length(); // since its a regex match, we want to look for the character '.', and not match any character confKeyPrefix = confKeyPrefix.replace(".", "\\."); Map<String, String> properties = conf.getValByRegex("^" + confKeyPrefix + ".*"); for (Map.Entry<String, String> entry : properties.entrySet()) { namedConf.put(entry.getKey().substring(prefixLength), entry.getValue()); } return namedConf; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
private static Map<String, String> getConfigMap(Configuration conf) { assert conf != null; Map<String, String> map = conf.getValByRegex(PREFIX_PATTERN.pattern()); NavigableMap<String, String> prefixMap = createPrefixMap(map, PREFIX); return prefixMap; }
From source file:com.asakusafw.runtime.stage.inprocess.InProcessStageConfigurator.java
License:Apache License
private void install(Job job) { Configuration conf = job.getConfiguration(); int prefixLength = KEY_PREFIX_REPLACE.length(); for (Map.Entry<String, String> entry : conf.getValByRegex(PATTERN_KEY_REPLACE.pattern()).entrySet()) { assert entry.getKey().length() >= prefixLength; String key = entry.getKey().substring(prefixLength); if (key.isEmpty()) { continue; }/*from ww w.j av a2 s . c om*/ String value = entry.getValue(); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("activate in-process configuration: {0}=\"{1}\"->\"{2}\"", //$NON-NLS-1$ key, conf.get(key, ""), //$NON-NLS-1$ value)); } conf.set(key, value); } conf.set(StageConstants.PROP_JOB_RUNNER, SimpleJobRunner.class.getName()); StageResourceDriver.setAccessMode(job, StageResourceDriver.AccessMode.DIRECT); StageInputFormat.setSplitCombinerClass(job, ExtremeSplitCombiner.class); }
From source file:com.digitalpebble.behemoth.DocumentFilter.java
License:Apache License
/** Builds a document filter given a Configuration object **/ public static DocumentFilter getFilters(Configuration conf) { // extracts the patterns Map<String, String> PositiveKVpatterns = conf.getValByRegex(DocumentFilterParamNamePrefixKeep + ".+"); Map<String, String> NegativeKVpatterns = conf.getValByRegex(DocumentFilterParamNamePrefixSkip + ".+"); Map<String, String> tmpMap; DocumentFilter filter = new DocumentFilter(); filter.medataMode = conf.get(DocumentFilterParamNameMode, "AND"); // has to be either prositive or negative but not both if (PositiveKVpatterns.size() > 0 && NegativeKVpatterns.size() > 0) { throw new RuntimeException( "Can't have positive AND negative document filters - check your configuration"); } else if (PositiveKVpatterns.size() > 0) { filter.negativeMode = false;//from w w w .j av a 2 s . c o m tmpMap = PositiveKVpatterns; } else { filter.negativeMode = true; tmpMap = NegativeKVpatterns; } // normalise the keys Iterator<Entry<String, String>> kviter = tmpMap.entrySet().iterator(); while (kviter.hasNext()) { Entry<String, String> ent = kviter.next(); String k = ent.getKey(); String v = ent.getValue(); k = k.substring(DocumentFilterParamNamePrefixKeep.length()); StringBuffer message = new StringBuffer(); if (filter.negativeMode) message.append("Negative "); else message.append("Positive "); message.append("filter found : ").append(k).append(" = ").append(v); LOG.info(message.toString()); filter.KVpatterns.put(k, v); } String URLPatternS = conf.get(DocumentFilterParamNameURLFilterKeep, ""); if (URLPatternS.length() > 0) { try { filter.URLRegex = Pattern.compile(URLPatternS); } catch (PatternSyntaxException e) { filter.URLRegex = null; LOG.error("Can't create regular expression for URL from " + URLPatternS); } } String MTPatternS = conf.get(DocumentFilterParamNameMimeTypeFilterKeep, ""); if (MTPatternS.length() > 0) { try { filter.MimetypeRegex = Pattern.compile(MTPatternS); } catch (PatternSyntaxException e) { filter.MimetypeRegex = null; LOG.error("Can't create regular expression for MimeType from " + MTPatternS); } } filter.maxContentLength = conf.getInt(DocumentFilterParamNameLength, -1); return filter; }
From source file:hydrograph.engine.cascading.integration.HydrographRuntime.java
License:Apache License
public void initialize(Properties config, String[] args, HydrographJob hydrographJob, String jobId, String UDFPath) {/* w w w . j av a 2 s . com*/ AppProps.setApplicationName(hadoopProperties, hydrographJob.getJAXBObject().getName()); hadoopProperties.putAll(config); Configuration conf = new HadoopConfigProvider(hadoopProperties).getJobConf(); SchemaFieldHandler schemaFieldHandler = new SchemaFieldHandler( hydrographJob.getJAXBObject().getInputsOrOutputsOrStraightPulls()); flowManipulationContext = new FlowManipulationContext(hydrographJob, args, schemaFieldHandler, jobId); FlowManipulationHandler flowManipulationHandler = new FlowManipulationHandler(); hydrographJob = flowManipulationHandler.execute(flowManipulationContext); if (hydrographJob.getJAXBObject().getRuntimeProperties() != null && hydrographJob.getJAXBObject().getRuntimeProperties().getProperty() != null) { for (Property property : hydrographJob.getJAXBObject().getRuntimeProperties().getProperty()) { hadoopProperties.put(property.getName(), property.getValue()); } } JAXBTraversal traversal = new JAXBTraversal(hydrographJob.getJAXBObject()); if (traversal.isHiveComponentPresentInFlow()) { try { HiveMetastoreTokenProvider.obtainTokenForHiveMetastore(conf); } catch (TException e) { throw new HydrographRuntimeException(e); } catch (IOException e) { throw new HydrographRuntimeException(e); } } String[] otherArgs; try { otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); } catch (IOException e) { throw new HydrographRuntimeException(e); } String argsString = ""; for (String arg : otherArgs) { argsString = argsString + " " + arg; } LOG.info("After processing arguments are:" + argsString); this.args = otherArgs; // setJar(otherArgs); hadoopProperties.putAll(conf.getValByRegex(".*")); ComponentAdapterFactory componentAdapterFactory = new ComponentAdapterFactory( hydrographJob.getJAXBObject()); flowBuilder = new FlowBuilder(); runtimeContext = new RuntimeContext(hydrographJob, traversal, hadoopProperties, componentAdapterFactory, flowManipulationContext.getSchemaFieldHandler(), UDFPath); LOG.info("Graph '" + runtimeContext.getHydrographJob().getJAXBObject().getName() + "' initialized successfully"); }
From source file:hydrograph.engine.cascading.schemes.MixedSchemeTestsWithEncoding.java
License:Apache License
@Before public void prepare() { outPath = "testData/schemes/TextMixed/output"; Configuration conf = new Configuration(); Properties properties = new Properties(); properties.putAll(conf.getValByRegex(".*")); AppProps.setApplicationJarClass(properties, MixedSchemeTestsWithEncoding.class); flowConnector = new Hadoop2MR1FlowConnector(properties); fields = new Fields("f1", "f2", "f3", "f4", "f5"); fields_new = new Fields("f1", "f2", "f3", "f4", "f5"); }
From source file:hydrograph.engine.cascading.schemes.TextDelimitedSchemeTestsWithEncoding.java
License:Apache License
@Before public void prepare() { outPath = "testData/schemes/TextDelimited/output"; Configuration conf = new Configuration(); Properties properties = new Properties(); properties.putAll(conf.getValByRegex(".*")); AppProps.setApplicationJarClass(properties, TextDelimitedSchemeTestsWithEncoding.class); flowConnector = new Hadoop2MR1FlowConnector(properties); fields = new Fields("f1", "f2", "f3", "f4", "f5"); fields_new = new Fields("f1", "f2", "f3", "f4", "f5"); }
From source file:hydrograph.engine.cascading.schemes.TextDelimiterAndFixedWidthTest.java
License:Apache License
@Before public void prepare() { outPath = "testData/schemes/TextMixed/output"; Configuration conf = new Configuration(); Properties properties = new Properties(); properties.putAll(conf.getValByRegex(".*")); AppProps.setApplicationJarClass(properties, TextDelimiterAndFixedWidthTest.class); flowConnector = new Hadoop2MR1FlowConnector(properties); fields = new Fields("f1", "f2", "f3", "f4", "f5"); fields_new = new Fields("f1", "f2", "f3", "f4", "f5", "newline"); }
From source file:hydrograph.engine.cascading.schemes.TextFixedWidthSchemeTestsWithEncoding.java
License:Apache License
@Before public void prepare() { outPath = "testData/schemes/TextFixedWidth/output"; Configuration conf = new Configuration(); Properties properties = new Properties(); properties.putAll(conf.getValByRegex(".*")); AppProps.setApplicationJarClass(properties, TextFixedWidthSchemeTestsWithEncoding.class); flowConnector = new Hadoop2MR1FlowConnector(properties); types = new Type[] { Integer.class, Date.class, String.class, BigDecimal.class, Long.class }; fields = new Fields("f1", "f2", "f3", "f4", "f5").applyTypes(types); fields_new = new Fields("f1", "f2", "f3", "f4", "f5").applyTypes(types); }
From source file:hydrograph.engine.hive.scheme.HivePartRead.java
License:Apache License
public static void main(String args[]) throws IOException { Configuration conf = new Configuration(); String[] otherArgs;/*w w w.jav a 2 s . c o m*/ otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // print other args String argsString = ""; for (String arg : otherArgs) { argsString = argsString + " " + arg; } System.out.println("After processing arguments are:" + argsString); Properties properties = new Properties(); properties.putAll(conf.getValByRegex(".*")); Tap sink = new Hfs(new TextDelimited(false, ","), "/data/file_out_2", SinkMode.REPLACE); HiveTableDescriptor hiveTableDescriptor = new HiveTableDescriptor("testp14", new String[] { "a", "b", "c" }, new String[] { "string", "string", "string" }, new String[] { "a" }); HiveTap hivetap = new HiveTap(hiveTableDescriptor, new HiveParquetScheme(hiveTableDescriptor)); Tap source = new HivePartitionTap(hivetap); Pipe pipe = new Pipe("pipe"); properties.put("hive.metastore.uris", "thrift://UbuntuD5.bitwiseglobal.net:9083"); FlowDef def = FlowDef.flowDef().addSource(pipe, source).addTailSink(pipe, sink); new Hadoop2MR1FlowConnector(properties).connect(def).complete(); }