List of usage examples for org.apache.hadoop.conf Configuration get
public String get(String name)
name
property, null
if no such property exists. From source file:clustering.init.WordSepMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); String fileName = ((FileSplit) context.getInputSplit()).getPath().getName(); String extention = FileUtils.getExtension(fileName); switch (extention) { case "tsv": this.splitter = "\t"; break;// w ww .j a v a 2s.c o m case "csv": this.splitter = ","; break; default: Configuration conf = context.getConfiguration(); this.splitter = conf.get("column.splitter"); } // TODO: 17-4-21 read from file // dicts this.synonymsMap.put("", "?"); this.synonymsMap.put("?", "?"); // 0901 this.synonymsMap.put("(?:|?)", "?"); this.synonymsMap.put( "(?:||||||)", ""); this.synonymsMap.put("(?:|||)", ""); this.synonymsMap.put("?", ""); // 8703 this.synonymsMap.put("5", ""); this.synonymsMap.put("7", ""); this.synonymsMap.put("(?:4maitc|4mat1c|4mat2c)", "4matic"); this.synonymsMap.put("(?:ican-am|can-am)", "canam"); this.synonymsMap.put("cfm0to", "cfmoto"); this.synonymsMap.put("bmw", "?"); this.synonymsMap.put("benz", ""); this.synonymsMap.put("audi", ""); this.synonymsMap.put("(?:mercecles|mercede)", "mercedes"); this.synonymsMap.put("(?:ferraei|ferrair)", "ferrari"); this.synonymsMap.put("", ""); this.synonymsMap.put("", "?"); this.synonymsMap.put("(?:?|?)", ""); this.synonymsMap.put("", ""); this.synonymsMap.put("?", "?"); this.synonymsMap.put("", ""); this.synonymsMap.put("", ""); this.synonymsMap.put("(?:|)", ""); }
From source file:cn.easyhbase.common.hbase.distributor.WdTableInputFormat.java
License:Apache License
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf.get(ROW_KEY_DISTRIBUTOR_CLASS) != null) { String clazz = conf.get(ROW_KEY_DISTRIBUTOR_CLASS); try {/* w w w.j ava 2 s .com*/ rowKeyDistributor = (AbstractRowKeyDistributor) Class.forName(clazz).newInstance(); if (conf.get(ROW_KEY_DISTRIBUTOR_PARAMS) != null) { rowKeyDistributor.init(conf.get(ROW_KEY_DISTRIBUTOR_PARAMS)); } } catch (Exception e) { throw new RuntimeException( "Cannot create row key distributor, " + ROW_KEY_DISTRIBUTOR_CLASS + ": " + clazz, e); } } }
From source file:cn.edu.hfut.dmic.webcollector.fetcher.FetcherOutputFormat.java
@Override public RecordWriter<Text, Writable> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException { Configuration conf = tac.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputPath = conf.get("mapred.output.dir"); Path fetchPath = new Path(outputPath, "fetch/info"); Path contentPath = new Path(outputPath, "content/info"); Path parseDataPath = new Path(outputPath, "parse/info"); Path redirectPath = new Path(outputPath, "redirect/info"); final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class, CrawlDatum.class); final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class, Content.class); final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class, CrawlDatum.class); final SequenceFile.Writer redirectOut = new SequenceFile.Writer(fs, conf, redirectPath, CrawlDatum.class, Text.class); return new RecordWriter<Text, Writable>() { @Override// ww w .j a v a2 s .com public void write(Text k, Writable v) throws IOException, InterruptedException { if (v instanceof CrawlDatum) { fetchOut.append(k, v); } else if (v instanceof Content) { contentOut.append(k, v); } else if (v instanceof ParseData) { ParseData parseData = (ParseData) v; CrawlDatums next = parseData.next; for (CrawlDatum datum : next) { parseDataOut.append(new Text(datum.getKey()), datum); } } else if (v instanceof Redirect) { Redirect redirect = (Redirect) v; redirectOut.append(redirect.datum, new Text(redirect.realUrl)); } } @Override public void close(TaskAttemptContext tac) throws IOException, InterruptedException { fetchOut.close(); contentOut.close(); parseDataOut.close(); redirectOut.close(); } }; }
From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.Fetcher.java
public Fetcher(Configuration conf) { super(conf);//from w w w . ja v a 2 s.com try { isContentStored = conf.getBoolean("fetcher.store.content", false); String requestFactoryClass = conf.get("plugin.request.factory.class"); String parseFactoryClass = conf.get("plugin.parser.factory.class"); String generatorFactoryClass = conf.get("plugin.generator.factory.class"); String handlerFactoryClass = conf.get("plugin.fetchhandler.factory.class"); requestFactory = (RequestFactory) Class.forName(requestFactoryClass).newInstance(); parserFactory = (ParserFactory) Class.forName(parseFactoryClass).newInstance(); generatorFactory = (GeneratorFactory) Class.forName(generatorFactoryClass).newInstance(); HandlerFactory handlerFactory = (HandlerFactory) Class.forName(handlerFactoryClass).newInstance(); setHandler(handlerFactory.createHandler()); } catch (Exception ex) { LogUtils.getLogger().info("Exception", ex); } }
From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.FetcherOutputFormat.java
@Override public org.apache.hadoop.mapred.RecordWriter<Text, WebWritable> getRecordWriter(FileSystem fs, JobConf jc, String string, Progressable p) throws IOException { Configuration conf = jc; String outputPath = conf.get("mapred.output.dir"); Path fetchPath = new Path(outputPath, "fetch/info"); Path contentPath = new Path(outputPath, "content/info"); Path parseDataPath = new Path(outputPath, "parse_data/info"); Path parseTempPath = new Path(outputPath, "parse_temp/info"); final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class, CrawlDatum.class); final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class, Content.class); final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class, ParseData.class); final SequenceFile.Writer parseTempOut = new SequenceFile.Writer(fs, conf, parseTempPath, Text.class, CrawlDatum.class); return new RecordWriter<Text, WebWritable>() { @Override//from w w w. j a v a 2 s . c o m public void write(Text key, WebWritable value) throws IOException { Writable w = value.get(); if (w instanceof CrawlDatum) { fetchOut.append(key, w); } else if (w instanceof Content) { contentOut.append(key, w); } else if (w instanceof ParseData) { parseDataOut.append(key, w); ParseData parseData = (ParseData) w; if (parseData.getLinks() != null) { for (Link link : parseData.getLinks()) { CrawlDatum datum = new CrawlDatum(); datum.setUrl(link.getUrl()); datum.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); datum.setFetchTime(CrawlDatum.FETCHTIME_UNDEFINED); parseTempOut.append(new Text(datum.getUrl()), datum); } } } } @Override public void close(Reporter rprtr) throws IOException { fetchOut.close(); contentOut.close(); parseDataOut.close(); parseTempOut.close(); } }; }
From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Injector.java
public void inject(Path crawlDir, ArrayList<String> urls) throws IOException, InterruptedException, ClassNotFoundException, Exception { Path crawldb = new Path(crawlDir, "crawldb"); Configuration config = CrawlerConfiguration.create(); System.out.println(config.get("mapred.jar")); FileSystem fs = crawldb.getFileSystem(config); Path tempdb = new Path(crawldb, "temp"); if (fs.exists(tempdb)) { fs.delete(tempdb);/* w w w . j a va 2 s . c o m*/ } SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(tempdb, "info.avro"), Text.class, CrawlDatum.class); for (String url : urls) { CrawlDatum crawldatum = new CrawlDatum(); crawldatum.setUrl(url); crawldatum.setStatus(CrawlDatum.STATUS_DB_INJECTED); writer.append(new Text(url), crawldatum); System.out.println("inject:" + url); } writer.close(); String[] args = new String[] { crawldb.toString(), tempdb.toString() }; ToolRunner.run(CrawlerConfiguration.create(), new Merge(), args); Merge.install(crawldb); if (fs.exists(tempdb)) { fs.delete(tempdb); } }
From source file:co.cask.cdap.app.runtime.spark.SparkRuntimeContextProvider.java
License:Apache License
@Nullable private static PluginInstantiator createPluginInstantiator(CConfiguration cConf, Configuration hConf, ClassLoader parentClassLoader) { String pluginArchive = hConf.get(Constants.Plugin.ARCHIVE); if (pluginArchive == null) { return null; }/*from ww w .ja va2 s . co m*/ return new PluginInstantiator(cConf, parentClassLoader, new File(pluginArchive)); }
From source file:co.cask.cdap.common.conf.ConfigurationUtil.java
License:Apache License
public static <T> T get(Configuration conf, String key, Codec<T> codec) throws IOException { String value = conf.get(key); LOG.trace("De-serializing {} {}", key, value); // Using Latin-1 encoding so that all bytes can be encoded as string. UTF-8 has some invalid bytes that will get // skipped.//from ww w . jav a 2 s . co m return codec.decode(value == null ? null : value.getBytes("ISO-8859-1")); }
From source file:co.cask.cdap.data.hbase.HBase10CDH550Test.java
License:Apache License
@Override public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod, Configuration conf, byte[]... families) throws IOException { if (conf == null) { conf = new Configuration(); }/* ww w. ja v a 2 s. c o m*/ HTableDescriptor htd = new HTableDescriptor(tableName); for (byte[] family : families) { htd.addFamily(new HColumnDescriptor(family)); } HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false); Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod); FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { if (!fs.delete(path, true)) { throw new IOException("Failed delete of " + path); } } return HRegion.createHRegion(info, path, conf, htd); }
From source file:co.cask.cdap.data.hbase.HBase94Test.java
License:Apache License
@Override public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod, Configuration conf, byte[]... families) throws IOException { if (conf == null) { conf = new Configuration(); }//from w ww . java2 s.c om HTableDescriptor htd = new HTableDescriptor(tableName); for (byte[] family : families) { htd.addFamily(new HColumnDescriptor(family)); } HRegionInfo info = new HRegionInfo(htd.getName(), startKey, stopKey, false); Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod); FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { if (!fs.delete(path, true)) { throw new IOException("Failed delete of " + path); } } return HRegion.createHRegion(info, path, conf, htd); }