Example usage for org.apache.hadoop.conf Configuration getBoolean

List of usage examples for org.apache.hadoop.conf Configuration getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.minSupport = conf.getInt(MIN_SUPPORT, DEFAULT_MIN_SUPPORT);

    boolean emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);
    emitUnigrams = true;//from  ww w.j  a  va  2  s.c  o m
    log.info("Min support is {}", minSupport);
    log.info("Emit Unitgrams is {}", emitUnigrams);
}

From source file:edu.indiana.d2i.htrc.corpus.clean.CleanCorpusDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    /**//from w  ww  .j  ava2s . c o  m
     * Specify the # of reducers through -D
     * mapred.reduce.tasks=<numOfReducers> in hadoop command line. Specify
     * whether compression is used through -D
     * user.args.compression=<true/false>
     */
    if (args.length != 2) {
        System.err.printf(
                "Usage: %s [generic options] </path/to/input/directory> </path/to/output/directory>\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration conf = getConf();

    Job job = new Job(conf, "HTRC Cleaning Raw Corpus");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(CleanCorpusDriver.class);
    job.setMapperClass(CleanCorpusMapper.class);
    job.setReducerClass(CleanCorpusReducer.class);

    if (conf.getBoolean("user.args.compression", false)) {
        /* use compression */
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
        SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    }

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TextArrayWritable.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:edu.indiana.d2i.htrc.corpus.retrieve.RetrieveRawCorpusDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    /**/*w  w w  . jav  a  2 s .c  o  m*/
     * Specify the # of reducers through -D
     * mapred.reduce.tasks=<numOfReducers> in hadoop command line. Specify
     * using compression through -D user.args.compression=true
     */
    if (args.length != 3) {
        System.err.printf(
                "Usage: %s [generic options] </path/to/input/directory> </path/to/output/directory> </path/to/property/file>\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration conf = getConf();

    Properties prop = new Properties();

    prop.load(new FileInputStream(args[2]));

    // set configuration parameters

    // data api related parameters
    conf.set(Constants.DATA_API_EPR, prop.getProperty(Constants.DATA_API_EPR));
    conf.set(Constants.DATA_API_CONCAT, prop.getProperty(Constants.DATA_API_CONCAT));
    conf.set(Constants.DATA_API_SELFSIGN, prop.getProperty(Constants.DATA_API_SELFSIGN));
    conf.set(Constants.DATA_API_DELIMITER, prop.getProperty(Constants.DATA_API_DELIMITER));
    conf.set(Constants.DATA_API_VOL_PREFIX, prop.getProperty(Constants.DATA_API_VOL_PREFIX));
    conf.set(Constants.DATA_API_PAGE_PREFIX, prop.getProperty(Constants.DATA_API_PAGE_PREFIX));
    conf.set(Constants.DATA_API_REQ_SIZE, prop.getProperty(Constants.DATA_API_REQ_SIZE));

    // oauth2 related parameters
    conf.set(Constants.OAUTH2_EPR, prop.getProperty(Constants.OAUTH2_EPR));
    conf.set(Constants.OAUTH2_USER_NAME, prop.getProperty(Constants.OAUTH2_USER_NAME));
    conf.set(Constants.OAUTH2_USER_PASSWORD, prop.getProperty(Constants.OAUTH2_USER_PASSWORD));

    // set # of lines (volumes in our case) to be processed by one map task
    conf.set("mapreduce.input.lineinputformat.linespermap", prop.getProperty(Constants.NUM_VOLUMES_PER_MAPPER));

    Job job = new Job(conf, "HTRC Retrieving Raw Corpus");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(RetrieveRawCorpusDriver.class);
    job.setMapperClass(RetrieveRawCorpusMapper.class);
    job.setReducerClass(RetrieveRawCorpusReducer.class);

    if (conf.getBoolean("user.args.compression", false)) {
        /* use compression */
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
        SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    }

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TextArrayWritable.class);

    job.setInputFormatClass(NLineInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:edu.indiana.d2i.htrc.corpus.transform.CorpusTransformDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    /**/*  w w w . j a va2s.c o m*/
     * Specify the # of reducers through -D
     * mapred.reduce.tasks=<numOfReducers> in hadoop command line. Specify
     * whether compression is used through -D user.args.compression=true,
     * use -D user.args.wordset.filename=<wordset_filename> to set wordset
     * filename, use -files </local/path/to/wordset_file> to distribute
     * wordset_file to each compute node
     */
    if (args.length != 2) {
        System.err.printf(
                "Usage: %s [generic options] </path/to/input/directory> </path/to/output/directory>\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration conf = getConf();

    Job job = new Job(conf, "HTRC Transforming Corpus");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(CorpusTransformDriver.class);
    job.setMapperClass(CorpusTransformMapper.class);
    job.setReducerClass(CorpusTransformReducer.class);

    if (conf.getBoolean("user.args.compression", false)) {
        /* use compression */
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
        SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    }

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TextArrayWritable.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:edu.indiana.d2i.htrc.util.DataAPITestDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String dataAPIConfClassName = args[0];
    int maxIdsPerReq = Integer.valueOf(args[1]);
    String queryStr = args[2];/*ww w.  j a  v  a2  s .  c  o  m*/

    Configuration conf = getConf();
    Utilities.setDataAPIConf(conf, dataAPIConfClassName, maxIdsPerReq);

    int maxIdRetrieved = conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100);
    String dataEPR = conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA).split(",")[0];
    String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|");
    String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim");
    String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim");
    String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC,
            "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials");
    boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true);

    if (dataEPR.equals(HTRCConstants.DATA_API_DEFAULT_URL)) {
        dataEPR = HTRCConstants.DATA_API_DEFAULT_URL_PREFIX + dataEPR;
    }

    HTRCDataAPIClient dataClient = new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true)
            .selfsigned(selfsigned).clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc)
            .build();

    //      String queryStr = "yale.39002052249902|uc2.ark:/13960/t88g8h13f|uc2.ark:/13960/t6sx67388|uc2.ark:/13960/t5j96547r|uc2.ark:/13960/t6ww79z3v|yale.39002085406669|miua.4918260.0305.001|uc2.ark:/13960/t3416xb23|uc2.ark:/13960/t86h4mv25|loc.ark:/13960/t2k64mv58|";
    Iterable<Entry<String, String>> entries = dataClient.getID2Content(queryStr);
    for (Entry<String, String> entry : entries) {
        System.out.println(entry.getKey());
    }

    return 0;
}

From source file:edu.indiana.d2i.htrc.util.DataCopyValidation.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    String outputPath = args[0]; // result
    String dataAPIConfClassName = args[1];
    int maxIdsPerReq = Integer.valueOf(args[2]);

    logger.info("DataValidation ");
    logger.info(" - output: " + outputPath);
    logger.info(" - dataAPIConfClassName: " + dataAPIConfClassName);
    logger.info(" - maxIdsPerReq: " + maxIdsPerReq);

    Utilities.setDataAPIConf(conf, dataAPIConfClassName, maxIdsPerReq);

    //      HTRCDataAPIClient client = Utilities.creatDataAPIClient(conf);
    String dataEPR = conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA,
            "https://129-79-49-119.dhcp-bl.indiana.edu:25443/data-api");
    String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|");
    String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim");
    String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim");
    String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC,
            "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials");
    boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true);
    HTRCDataAPIClient client = new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true)
            .selfsigned(selfsigned).clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc)
            .build();// w  w  w . j  a v  a2s .c o  m

    FileSystem fs = FileSystem.get(conf);
    FileStatus[] status = fs.listStatus(new Path(outputPath), Utilities.HIDDEN_FILE_FILTER);
    Text key = new Text();
    Text value = new Text();
    for (int i = 0; i < status.length; i++) {
        SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, status[i].getPath(), conf);
        while (seqReader.next(key, value)) {
            //            logger.info(key.toString());
            Iterable<Entry<String, String>> content = client.getID2Content(key.toString());
            Iterator<Entry<String, String>> iterator = content.iterator();
            Entry<String, String> entry = iterator.next();
            if (!entry.getValue().equals(value.toString())) {
                logger.error("Book : " + key.toString() + " corrupts!");
            }
        }
    }

    logger.info("Finish validation.");

    return 0;
}

From source file:edu.indiana.d2i.htrc.util.IDValidation.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    System.out.println("args.length " + args.length);

    String dataAPIConfClassName = args[0];
    int maxIdsPerReq = Integer.valueOf(args[1]);
    String idfile = args[2];//  w w w.  j  a  v a 2s . c  om
    String accurateIdfile = args[3];

    Configuration conf = getConf();
    Utilities.setDataAPIConf(conf, dataAPIConfClassName, maxIdsPerReq);

    int maxIdRetrieved = conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100);
    String dataEPR = conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA).split(",")[0];
    String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|");
    String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim");
    String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim");
    String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC,
            "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials");
    boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true);

    if (dataEPR.equals(HTRCConstants.DATA_API_DEFAULT_URL)) {
        dataEPR = HTRCConstants.DATA_API_DEFAULT_URL_PREFIX + dataEPR;
    }

    HTRCDataAPIClient dataClient = new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true)
            .selfsigned(selfsigned).clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc)
            .build();

    BufferedReader reader = new BufferedReader(new FileReader(idfile));
    BufferedWriter writer = new BufferedWriter(new FileWriter(accurateIdfile));
    String line = null;
    int count = 0;
    while ((line = reader.readLine()) != null) {
        Iterable<Entry<String, String>> content = dataClient.getID2Content(line);
        if (content != null)
            writer.write(line + "\n");
        if ((++count) % 1000 == 0)
            System.out.println("Finish " + count + " volumes.");
    }

    reader.close();
    writer.close();

    //      String queryStr = "yale.39002052249902|uc2.ark:/13960/t88g8h13f|uc2.ark:/13960/t6sx67388|uc2.ark:/13960/t5j96547r|uc2.ark:/13960/t6ww79z3v|yale.39002085406669|miua.4918260.0305.001|uc2.ark:/13960/t3416xb23|uc2.ark:/13960/t86h4mv25|loc.ark:/13960/t2k64mv58|";
    //      Iterable<Entry<String, String>> entries = dataClient.getID2Content(queryStr);
    //      for (Entry<String, String> entry : entries) {
    //         System.out.println(entry.getKey());
    //      }
    //      
    return 0;
}

From source file:edu.indiana.d2i.htrc.util.Utilities.java

License:Apache License

public static HTRCDataAPIClient creatDataAPIClient(Configuration conf) {
    String dataEPR = conf.get(HTRCConstants.DATA_API_EPR, "129-79-49-119.dhcp-bl.indiana.edu:25443");
    String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|");
    String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim");
    String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim");
    String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC,
            "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials");
    boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true);

    return new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true).selfsigned(selfsigned)
            .clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc).build();
}

From source file:edu.indiana.d2i.htrc.util.Utilities.java

License:Apache License

public static void setDataAPIConf(Configuration conf, String dataAPIConfClassName, int maxIdsPerReq)
        throws ClassNotFoundException {
    Class<?> dataAPIConfClass = Class.forName(dataAPIConfClassName);
    DataAPIDefaultConf confInstance = (DataAPIDefaultConf) ReflectionUtils.newInstance(dataAPIConfClass, conf);
    confInstance.configurate(conf, maxIdsPerReq);

    logger.info("Data API configuration");
    logger.info(" - host: " + conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA,
            "129-79-49-119.dhcp-bl.indiana.edu:25443/data-api"));
    logger.info(" - delimitor: " + conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|"));
    logger.info(" - clientID: " + conf.get(HTRCConstants.DATA_API_CLIENTID, "yim"));
    logger.info(" - clientSecret: " + conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim"));
    logger.info(" - tokenLoc: " + conf.get(HTRCConstants.DATA_API_TOKENLOC,
            "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials"));
    logger.info(" - selfsigned: " + conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true));
    logger.info(" - maxIDRetrieved: " + conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100));
}

From source file:edu.indiana.d2i.htrc.util.Utilities.java

License:Apache License

public static void filterUnexistID(String input, String output) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(input));
    BufferedWriter writer = new BufferedWriter(new FileWriter(output));

    Configuration conf = new Configuration();
    Utilities.setDataAPIConf(conf, "edu.indiana.d2i.htrc.io.DataAPISilvermapleConf", 1);

    int maxIdRetrieved = conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100);
    String dataEPR = conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA).split(",")[0];
    String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|");
    String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim");
    String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim");
    String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC,
            "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials");
    boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true);

    if (dataEPR.equals(HTRCConstants.DATA_API_DEFAULT_URL)) {
        dataEPR = HTRCConstants.DATA_API_DEFAULT_URL_PREFIX + dataEPR;
    }//from  w w  w .  j  a  v a 2  s . com

    HTRCDataAPIClient dataClient = new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true)
            .selfsigned(selfsigned).clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc)
            .build();

    String line = null;
    while ((line = reader.readLine()) != null) {
        Iterable<Entry<String, String>> content = dataClient.getID2Content(line);
        if (content != null)
            writer.write(line + "\n");
    }

    writer.close();
    reader.close();
}