Example usage for org.apache.hadoop.conf Configuration get

List of usage examples for org.apache.hadoop.conf Configuration get

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:clustering.init.WordSepMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();
    String extention = FileUtils.getExtension(fileName);

    switch (extention) {
    case "tsv":
        this.splitter = "\t";
        break;// w ww .j  a  v a  2s.c  o m
    case "csv":
        this.splitter = ",";
        break;
    default:
        Configuration conf = context.getConfiguration();
        this.splitter = conf.get("column.splitter");
    }

    // TODO: 17-4-21 read from file
    // dicts
    this.synonymsMap.put("", "?");
    this.synonymsMap.put("?", "?");
    // 0901
    this.synonymsMap.put("(?:|?)", "?");
    this.synonymsMap.put(
            "(?:||||||)",
            "");
    this.synonymsMap.put("(?:|||)", "");
    this.synonymsMap.put("?", "");
    // 8703
    this.synonymsMap.put("5", "");
    this.synonymsMap.put("7", "");
    this.synonymsMap.put("(?:4maitc|4mat1c|4mat2c)", "4matic");
    this.synonymsMap.put("(?:ican-am|can-am)", "canam");
    this.synonymsMap.put("cfm0to", "cfmoto");
    this.synonymsMap.put("bmw", "?");
    this.synonymsMap.put("benz", "");
    this.synonymsMap.put("audi", "");
    this.synonymsMap.put("(?:mercecles|mercede)", "mercedes");
    this.synonymsMap.put("(?:ferraei|ferrair)", "ferrari");
    this.synonymsMap.put("", "");
    this.synonymsMap.put("", "?");
    this.synonymsMap.put("(?:?|?)", "");
    this.synonymsMap.put("", "");
    this.synonymsMap.put("?", "?");
    this.synonymsMap.put("", "");
    this.synonymsMap.put("", "");
    this.synonymsMap.put("(?:|)", "");
}

From source file:cn.easyhbase.common.hbase.distributor.WdTableInputFormat.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    super.setConf(conf);

    if (conf.get(ROW_KEY_DISTRIBUTOR_CLASS) != null) {
        String clazz = conf.get(ROW_KEY_DISTRIBUTOR_CLASS);
        try {/*  w  w  w.j  ava  2 s .com*/
            rowKeyDistributor = (AbstractRowKeyDistributor) Class.forName(clazz).newInstance();
            if (conf.get(ROW_KEY_DISTRIBUTOR_PARAMS) != null) {
                rowKeyDistributor.init(conf.get(ROW_KEY_DISTRIBUTOR_PARAMS));
            }
        } catch (Exception e) {
            throw new RuntimeException(
                    "Cannot create row key distributor, " + ROW_KEY_DISTRIBUTOR_CLASS + ": " + clazz, e);
        }
    }
}

From source file:cn.edu.hfut.dmic.webcollector.fetcher.FetcherOutputFormat.java

@Override
public RecordWriter<Text, Writable> getRecordWriter(TaskAttemptContext tac)
        throws IOException, InterruptedException {
    Configuration conf = tac.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get("mapred.output.dir");

    Path fetchPath = new Path(outputPath, "fetch/info");
    Path contentPath = new Path(outputPath, "content/info");
    Path parseDataPath = new Path(outputPath, "parse/info");
    Path redirectPath = new Path(outputPath, "redirect/info");
    final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class,
            Content.class);
    final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer redirectOut = new SequenceFile.Writer(fs, conf, redirectPath, CrawlDatum.class,
            Text.class);

    return new RecordWriter<Text, Writable>() {

        @Override//  ww w .j  a  v a2 s  .com
        public void write(Text k, Writable v) throws IOException, InterruptedException {
            if (v instanceof CrawlDatum) {
                fetchOut.append(k, v);
            } else if (v instanceof Content) {
                contentOut.append(k, v);
            } else if (v instanceof ParseData) {

                ParseData parseData = (ParseData) v;
                CrawlDatums next = parseData.next;
                for (CrawlDatum datum : next) {
                    parseDataOut.append(new Text(datum.getKey()), datum);
                }

            } else if (v instanceof Redirect) {
                Redirect redirect = (Redirect) v;
                redirectOut.append(redirect.datum, new Text(redirect.realUrl));
            }
        }

        @Override
        public void close(TaskAttemptContext tac) throws IOException, InterruptedException {
            fetchOut.close();
            contentOut.close();
            parseDataOut.close();
            redirectOut.close();
        }
    };

}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.Fetcher.java

public Fetcher(Configuration conf) {
    super(conf);//from w w w . ja v  a  2  s.com
    try {
        isContentStored = conf.getBoolean("fetcher.store.content", false);
        String requestFactoryClass = conf.get("plugin.request.factory.class");
        String parseFactoryClass = conf.get("plugin.parser.factory.class");
        String generatorFactoryClass = conf.get("plugin.generator.factory.class");
        String handlerFactoryClass = conf.get("plugin.fetchhandler.factory.class");
        requestFactory = (RequestFactory) Class.forName(requestFactoryClass).newInstance();
        parserFactory = (ParserFactory) Class.forName(parseFactoryClass).newInstance();
        generatorFactory = (GeneratorFactory) Class.forName(generatorFactoryClass).newInstance();
        HandlerFactory handlerFactory = (HandlerFactory) Class.forName(handlerFactoryClass).newInstance();
        setHandler(handlerFactory.createHandler());
    } catch (Exception ex) {
        LogUtils.getLogger().info("Exception", ex);
    }

}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.FetcherOutputFormat.java

@Override
public org.apache.hadoop.mapred.RecordWriter<Text, WebWritable> getRecordWriter(FileSystem fs, JobConf jc,
        String string, Progressable p) throws IOException {
    Configuration conf = jc;
    String outputPath = conf.get("mapred.output.dir");
    Path fetchPath = new Path(outputPath, "fetch/info");
    Path contentPath = new Path(outputPath, "content/info");
    Path parseDataPath = new Path(outputPath, "parse_data/info");
    Path parseTempPath = new Path(outputPath, "parse_temp/info");
    final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class,
            Content.class);
    final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class,
            ParseData.class);
    final SequenceFile.Writer parseTempOut = new SequenceFile.Writer(fs, conf, parseTempPath, Text.class,
            CrawlDatum.class);
    return new RecordWriter<Text, WebWritable>() {
        @Override//from w w w.  j  a  v  a 2  s .  c o m
        public void write(Text key, WebWritable value) throws IOException {
            Writable w = value.get();
            if (w instanceof CrawlDatum) {
                fetchOut.append(key, w);
            } else if (w instanceof Content) {
                contentOut.append(key, w);
            } else if (w instanceof ParseData) {
                parseDataOut.append(key, w);
                ParseData parseData = (ParseData) w;
                if (parseData.getLinks() != null) {
                    for (Link link : parseData.getLinks()) {
                        CrawlDatum datum = new CrawlDatum();
                        datum.setUrl(link.getUrl());
                        datum.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
                        datum.setFetchTime(CrawlDatum.FETCHTIME_UNDEFINED);
                        parseTempOut.append(new Text(datum.getUrl()), datum);
                    }
                }
            }
        }

        @Override
        public void close(Reporter rprtr) throws IOException {
            fetchOut.close();
            contentOut.close();
            parseDataOut.close();
            parseTempOut.close();
        }
    };
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Injector.java

public void inject(Path crawlDir, ArrayList<String> urls)
        throws IOException, InterruptedException, ClassNotFoundException, Exception {
    Path crawldb = new Path(crawlDir, "crawldb");
    Configuration config = CrawlerConfiguration.create();
    System.out.println(config.get("mapred.jar"));
    FileSystem fs = crawldb.getFileSystem(config);
    Path tempdb = new Path(crawldb, "temp");
    if (fs.exists(tempdb)) {
        fs.delete(tempdb);/*  w w  w  . j a va 2 s  . c  o m*/
    }

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(tempdb, "info.avro"), Text.class,
            CrawlDatum.class);
    for (String url : urls) {
        CrawlDatum crawldatum = new CrawlDatum();
        crawldatum.setUrl(url);
        crawldatum.setStatus(CrawlDatum.STATUS_DB_INJECTED);
        writer.append(new Text(url), crawldatum);
        System.out.println("inject:" + url);
    }
    writer.close();

    String[] args = new String[] { crawldb.toString(), tempdb.toString() };

    ToolRunner.run(CrawlerConfiguration.create(), new Merge(), args);
    Merge.install(crawldb);

    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }

}

From source file:co.cask.cdap.app.runtime.spark.SparkRuntimeContextProvider.java

License:Apache License

@Nullable
private static PluginInstantiator createPluginInstantiator(CConfiguration cConf, Configuration hConf,
        ClassLoader parentClassLoader) {
    String pluginArchive = hConf.get(Constants.Plugin.ARCHIVE);
    if (pluginArchive == null) {
        return null;
    }/*from ww  w .ja va2  s .  co  m*/
    return new PluginInstantiator(cConf, parentClassLoader, new File(pluginArchive));
}

From source file:co.cask.cdap.common.conf.ConfigurationUtil.java

License:Apache License

public static <T> T get(Configuration conf, String key, Codec<T> codec) throws IOException {
    String value = conf.get(key);
    LOG.trace("De-serializing {} {}", key, value);
    // Using Latin-1 encoding so that all bytes can be encoded as string. UTF-8 has some invalid bytes that will get
    // skipped.//from  ww  w  .  jav  a  2  s .  co m
    return codec.decode(value == null ? null : value.getBytes("ISO-8859-1"));
}

From source file:co.cask.cdap.data.hbase.HBase10CDH550Test.java

License:Apache License

@Override
public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod,
        Configuration conf, byte[]... families) throws IOException {
    if (conf == null) {
        conf = new Configuration();
    }/* ww w. ja  v  a 2  s.  c o m*/
    HTableDescriptor htd = new HTableDescriptor(tableName);
    for (byte[] family : families) {
        htd.addFamily(new HColumnDescriptor(family));
    }
    HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false);
    Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        if (!fs.delete(path, true)) {
            throw new IOException("Failed delete of " + path);
        }
    }
    return HRegion.createHRegion(info, path, conf, htd);
}

From source file:co.cask.cdap.data.hbase.HBase94Test.java

License:Apache License

@Override
public HRegion createHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod,
        Configuration conf, byte[]... families) throws IOException {
    if (conf == null) {
        conf = new Configuration();
    }//from   w  ww  . java2  s.c om
    HTableDescriptor htd = new HTableDescriptor(tableName);
    for (byte[] family : families) {
        htd.addFamily(new HColumnDescriptor(family));
    }
    HRegionInfo info = new HRegionInfo(htd.getName(), startKey, stopKey, false);
    Path path = new Path(conf.get(HConstants.HBASE_DIR), callingMethod);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        if (!fs.delete(path, true)) {
            throw new IOException("Failed delete of " + path);
        }
    }
    return HRegion.createHRegion(info, path, conf, htd);
}