Example usage for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value)

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:io.prestosql.plugin.hive.s3.PrestoS3ConfigurationUpdater.java

License:Apache License

@Override
public void updateConfiguration(Configuration config) {
    // re-map filesystem schemes to match Amazon Elastic MapReduce
    config.set("fs.s3.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3a.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName());

    if (awsAccessKey != null) {
        config.set(S3_ACCESS_KEY, awsAccessKey);
    }//ww  w . j a  v a  2s.  c  om
    if (awsSecretKey != null) {
        config.set(S3_SECRET_KEY, awsSecretKey);
    }
    if (endpoint != null) {
        config.set(S3_ENDPOINT, endpoint);
    }
    if (signerType != null) {
        config.set(S3_SIGNER_TYPE, signerType.name());
    }
    config.setBoolean(S3_PATH_STYLE_ACCESS, pathStyleAccess);
    config.setBoolean(S3_USE_INSTANCE_CREDENTIALS, useInstanceCredentials);
    config.setBoolean(S3_SSL_ENABLED, sslEnabled);
    config.setBoolean(S3_SSE_ENABLED, sseEnabled);
    config.set(S3_SSE_TYPE, sseType.name());
    if (encryptionMaterialsProvider != null) {
        config.set(S3_ENCRYPTION_MATERIALS_PROVIDER, encryptionMaterialsProvider);
    }
    if (kmsKeyId != null) {
        config.set(S3_KMS_KEY_ID, kmsKeyId);
    }
    if (sseKmsKeyId != null) {
        config.set(S3_SSE_KMS_KEY_ID, sseKmsKeyId);
    }
    config.setInt(S3_MAX_CLIENT_RETRIES, maxClientRetries);
    config.setInt(S3_MAX_ERROR_RETRIES, maxErrorRetries);
    config.set(S3_MAX_BACKOFF_TIME, maxBackoffTime.toString());
    config.set(S3_MAX_RETRY_TIME, maxRetryTime.toString());
    config.set(S3_CONNECT_TIMEOUT, connectTimeout.toString());
    config.set(S3_SOCKET_TIMEOUT, socketTimeout.toString());
    config.set(S3_STAGING_DIRECTORY, stagingDirectory.toString());
    config.setInt(S3_MAX_CONNECTIONS, maxConnections);
    config.setLong(S3_MULTIPART_MIN_FILE_SIZE, multipartMinFileSize.toBytes());
    config.setLong(S3_MULTIPART_MIN_PART_SIZE, multipartMinPartSize.toBytes());
    config.setBoolean(S3_PIN_CLIENT_TO_CURRENT_REGION, pinClientToCurrentRegion);
    config.set(S3_USER_AGENT_PREFIX, userAgentPrefix);
    config.set(S3_ACL_TYPE, aclType.name());
}

From source file:io.svectors.hbase.cdc.BaseTest.java

License:Apache License

@Before
public void setUp() throws Exception {
    final Configuration hbaseConf = HBaseConfiguration.create();
    hbaseConf.setInt("replication.stats.thread.period.seconds", 5);
    hbaseConf.setLong("replication.sleep.before.failover", 2000);
    hbaseConf.setInt("replication.source.maxretriesmultiplier", 10);
    hbaseConf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true);

    // add kafka properties. we prefix each property with kafka
    addKafkaProperties(hbaseConf);//from   w  w  w.  ja va 2s  .co  m

    utility = new HBaseTestingUtility(hbaseConf);
    utility.startMiniCluster();
    numRegionServers = utility.getHBaseCluster().getRegionServerThreads().size();

    // setup kafka
    kafkaServer = new KafkaServer(utility.getZkCluster().getClientPort(), 9092);

}

From source file:io.svectors.hbase.sink.HbaseTestUtil.java

License:Apache License

/**
 * Returns a new HBaseTestingUtility instance.
 *//* w  w  w  .j ava  2 s  .  c om*/
private static HBaseTestingUtility createTestingUtility() {
    final Configuration hbaseConf = HBaseConfiguration.create();
    hbaseConf.setInt("replication.stats.thread.period.seconds", 5);
    hbaseConf.setLong("replication.sleep.before.failover", 2000);
    hbaseConf.setInt("replication.source.maxretriesmultiplier", 10);
    return new HBaseTestingUtility(hbaseConf);
}

From source file:ipldataanalysis4.IPLDataAnalysis4.java

@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.printf("Two parameters are required for Data Analysis for IPL- <input dir> <output dir>\n");
        return -1;
    }/*from w ww  .ja v a2s.co  m*/
    Configuration conf = new Configuration();
    DistributedCache.addCacheFile(new URI("testFile"), conf);
    Job job = new Job(getConf(), "Job1");
    long milliSeconds = 1000 * 60 * 60;
    conf.setLong("mapred.task.timeout", milliSeconds);

    job.setJarByClass(IPLDataAnalysis4.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(BloomMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);
    //job.setReducerClass(DataAnalysisReducer.class);
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;

}

From source file:mvm.rya.cloudbase.giraph.format.BspCase.java

License:Apache License

/**
 * Adjust the configuration to the basic test case
 *///ww  w . j  av a 2 s  .  c  o  m
public final void setupConfiguration(GiraphJob job) {
    Configuration conf = job.getConfiguration();
    conf.set("mapred.jar", getJarLocation());

    // Allow this test to be run on a real Hadoop setup
    if (getJobTracker() != null) {
        System.out.println("setup: Sending job to job tracker " + getJobTracker() + " with jar path "
                + getJarLocation() + " for " + getName());
        conf.set("mapred.job.tracker", getJobTracker());
        job.setWorkerConfiguration(getNumWorkers(), getNumWorkers(), 100.0f);
    } else {
        System.out.println(
                "setup: Using local job runner with " + "location " + getJarLocation() + " for " + getName());
        job.setWorkerConfiguration(1, 1, 100.0f);
        // Single node testing
        conf.setBoolean(GiraphJob.SPLIT_MASTER_WORKER, false);
    }
    conf.setInt(GiraphJob.POLL_ATTEMPTS, 10);
    conf.setInt(GiraphJob.POLL_MSECS, 3 * 1000);
    conf.setInt(GiraphJob.ZOOKEEPER_SERVERLIST_POLL_MSECS, 500);
    if (getZooKeeperList() != null) {
        job.setZooKeeperConfiguration(getZooKeeperList());
    }
    // GeneratedInputSplit will generate 5 vertices
    conf.setLong(GeneratedVertexReader.READER_VERTICES, 5);
}

From source file:net.java.jatextmining.JaCoOccurrence.java

License:Apache License

/**
 * Weighting the value of each co-occurrence words.
 * @param conf Specify the Hadoop Configuration object.
 * @param cache Specify the distributed cache file path.
 * @return if success return true, not success return false.
 * @throws IOException Exception for IO.
 * @throws URISyntaxException Exception for URI.
 * @throws InterruptedException Exception for threads, waitForCompletion().
 * @throws ClassNotFoundException Exception for waitForCompletion().
 *//*from   w  w  w.  java 2 s . c  o m*/
private boolean runJaCoOccurrenceWeighting(Configuration conf, String cache)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String reducerNum = conf.get("jatextmining.JaCoOccurrenceCounterReducerNum");
    if (type.equals("chi") || type.equals("mi") || type.equals("freaq")) {
        conf.set("type", type);
    } else {
        System.err.println("error type: [" + type + "]");
        printUsage();
    }
    readDocNumFile(conf);
    conf.setLong("docNum", inputNum);
    Job job = new Job(conf);
    job.setJarByClass(JaCoOccurrence.class);
    TextInputFormat.addInputPath(job, new Path(cache));
    FileOutputFormat.setOutputPath(job, new Path(out));
    job.setMapperClass(CoOccurrenceWeightingMapper.class);
    job.setReducerClass(CoOccurrenceReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(Integer.valueOf(reducerNum));

    return job.waitForCompletion(true);
}

From source file:net.thevis.groovyhadoop.backport.CombineFileRecordReader.java

License:Apache License

/**
 * Get the record reader for the next chunk in this CombineFileSplit.
 *///w w  w.  j ava 2s.co  m
protected boolean initNextRecordReader() throws IOException {

    if (curReader != null) {
        curReader.close();
        curReader = null;
        if (idx > 0) {
            progress += split.getLength(idx - 1); // done processing so far
        }
    }

    // if all chunks have been processed, nothing more to do.
    if (idx == split.getNumPaths()) {
        return false;
    }

    // get a record reader for the idx-th chunk
    try {
        Configuration conf = context.getConfiguration();
        // setup some helper config variables.
        conf.set(MRJobConfig.MAP_INPUT_FILE, split.getPath(idx).toString());
        conf.setLong(MRJobConfig.MAP_INPUT_START, split.getOffset(idx));
        conf.setLong(MRJobConfig.MAP_INPUT_PATH, split.getLength(idx));

        curReader = rrConstructor.newInstance(new Object[] { split, context, Integer.valueOf(idx) });

        if (idx > 0) {
            // initialize() for the first RecordReader will be called by MapTask;
            // we're responsible for initializing subsequent RecordReaders.
            curReader.initialize(split, context);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    idx++;
    return true;
}

From source file:nl.cwi.kba.apps.FeatureExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String in = null;//from w  w w  .j a va  2s .c o m
    String out = null;
    String queryfile = null;
    String contextFile = null;
    String systemdescription = null;
    String corpus_id = null;
    String runtag = null;
    String teamname = null;
    String annoFile = null;
    String gcldFile = null;
    String labelsFile = null;
    String pprFile = null;
    String myverFile = null;
    String wikiFile = null;
    HashMap<String, Object> run_info = new HashMap<String, Object>();

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-i".equals(args[i])) {
                in = args[++i];
            } else if ("-o".equals(args[i])) {
                out = args[++i];
            } else if ("-q".equals(args[i])) {
                queryfile = args[++i];
            } else if ("-r".equals(args[i])) {
                runtag = args[++i];
            } else if ("-l".equals(args[i])) {
                labelsFile = args[++i];
            } else if ("-a".equals(args[i])) {
                annoFile = args[++i];
            } else if ("-t".equals(args[i])) {
                teamname = args[++i];
            } else if ("-d".equals(args[i])) {
                systemdescription = args[++i];
            } else if ("-p".equals(args[i])) {
                pprFile = args[++i];
            } else if ("-g".equals(args[i])) {
                gcldFile = args[++i];

            } else if ("-s".equals(args[i])) {
                myverFile = args[++i];

            } else if ("-c".equals(args[i])) {
                contextFile = args[++i];
            } else if ("-w".equals(args[i])) {
                wikiFile = args[++i];
            } else if ("-h".equals(args[i]) || "--help".equals(args[i])) {
                return printUsage();
            } else {
                other_args.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    if (other_args.size() > 0 || in == null || out == null || queryfile == null)
        return printUsage();

    if (runtag == null)
        runtag = "toy_1";

    if (teamname == null)
        teamname = "CompInsights";

    if (corpus_id == null)
        corpus_id = "kba-stream-corpus-2012-cleansed-only";

    if (systemdescription == null)
        systemdescription = "Description intentionally left blank.";

    LOG.info("Tool: " + this.getClass().getName());
    LOG.info(" - input path: " + in);
    LOG.info(" - output path: " + out);
    LOG.info(" - runtag: " + runtag);
    LOG.info(" - teamname: " + teamname);
    LOG.info(" - corpus_id: " + corpus_id);
    LOG.info(" - run description: " + systemdescription);

    Filter_run fr = new Filter_run.Factory().create(TEAMNAME, RUNTAG, systemdescription, corpus_id);

    Map<String, String> Attr = new LinkedHashMap<String, String>();
    // Attr.put("trec-kba", "");
    /*
    Attr.put("LengthTitle", "");
    Attr.put("LengthBody", "");
    Attr.put("LengthAnchor", "");
    Attr.put("Source", "");
    Attr.put("English", "");
    Attr.put("MentionsTitle", "");
    Attr.put("MentionsBody", "");
    Attr.put("MentionsAnchor", "");
    Attr.put("FirstPos", "");
    Attr.put("LastPos", "");
    Attr.put("Spread", "");
    Attr.put("FirstPosNorm", "");
    Attr.put("LastPosNorm", "");
    Attr.put("SpreadNorm", "");
    // Attr.put("Related", "");
    Attr.put("Relatedtitle", "");
    Attr.put("RelatedBody", "");
    Attr.put("RelatedAnchor", "");
    Attr.put("ppr", "");
    Attr.put("gcld", "");
    Attr.put("partial", "");
    Attr.put("s_form", "");
    Attr.put("contxL", "0");
    Attr.put("contxR", "0");
    Attr.put("cos", "0");
    Attr.put("kl", "0");
    Attr.put("jac", "0");
    Attr.put("Class", "");
    */
    Attr.put("gcld", "0");
    Attr.put("jac", "0");
    Attr.put("cos", "0");
    Attr.put("kl", "0");
    Attr.put("ppr", "0");
    Attr.put("s_form", "0");
    Attr.put("contxR", "0");
    Attr.put("contxL", "0");
    Attr.put("FirstPos", "0");
    Attr.put("LastPos", "0");
    Attr.put("LengthBody", "0");
    Attr.put("FirstPosNorm", "0");
    Attr.put("MentionsBody", "0");
    Attr.put("RelatedBody", "0");
    Attr.put("Spread", "0");
    Attr.put("LastPosNorm", "0");
    Attr.put("SpreadNorm", "0");
    Attr.put("LengthAnchor", "0");
    Attr.put("Source", "0");
    Attr.put("LengthTitle", "0");
    Attr.put("partial", "0");
    Attr.put("MentionsAnchor", "0");
    Attr.put("Relatedtitle", "0");
    Attr.put("English", "0");
    Attr.put("RelatedAnchor", "0");
    Attr.put("MentionsTitle", "0");
    Attr.put("Class", "0");

    Configuration conf = getConf();
    conf.set(QUERYFILEPATH_HDFS, new Path(queryfile).toUri().toString());
    conf.set(LABELSFILEPATH_HDFS, new Path(labelsFile).toUri().toString());
    conf.set(ANNOFILEPATH_HDFS, new Path(annoFile).toUri().toString());
    conf.set(PPR_HDFS, new Path(pprFile).toUri().toString());
    //conf.set(MYVER, new Path(myverFile).toUri().toString());
    conf.set(GCLD_HDFS, new Path(gcldFile).toUri().toString());
    conf.set(CONTEXT_HDFS, new Path(contextFile).toUri().toString());
    conf.set(WIKI_HDFS, new Path(contextFile).toUri().toString());
    conf.set(RUNTAG, runtag);
    conf.set(TEAMNAME, teamname);

    // set time
    conf.setLong("mapred.task.timeout", 40 * 600000);

    FileSystem fs = FileSystem.get(conf);
    // Lookup required data from the topic file
    loadTopicData(queryfile, fr, fs, run_info);
    Job job = new Job(conf, "Feature Extractor");
    job.setJarByClass(FeatureExtractor.class);

    // some weird issues with Thrift classes in the Hadoop distro.
    job.setUserClassesTakesPrecedence(true);

    // make the query file available to each mapper.
    DistributedCache.addCacheFile(new URI(new Path(queryfile) + "#" + QUERYFILEPATH_HDFS),
            job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(labelsFile) + "#" + LABELSFILEPATH_HDFS),
            job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(annoFile) + "#" + ANNOFILEPATH_HDFS),
            job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(pprFile) + "#" + PPR_HDFS), job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(gcldFile) + "#" + GCLD_HDFS), job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(contextFile) + "#" + CONTEXT_HDFS), job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(wikiFile) + "#" + WIKI_HDFS), job.getConfiguration());

    // DistributedCache.addCacheFile( new URI(new Path(myverFile) + "#" +
    //MYVER), job.getConfiguration());

    DistributedCache.createSymlink(job.getConfiguration());

    job.setInputFormatClass(ThriftFileInputFormat.class);
    job.setMapperClass(MyMapper.class);
    FileInputFormat.addInputPath(job, new Path(in));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // job.setCombinerClass(MyReducer.class);
    // job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(1);

    FileSystem.get(conf).delete(new Path(out), true);
    TextOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Let's go
    int status = job.waitForCompletion(true) ? 0 : 1;

    Counters c = job.getCounters();
    long cputime = c.findCounter(org.apache.hadoop.mapred.Task.Counter.CPU_MILLISECONDS).getValue();
    run_info.put("elapsed_time_secs", ((double) cputime / 1000d));

    long num_filter_results = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_OUTPUT_RECORDS)
            .getValue();
    run_info.put("num_filter_results", num_filter_results);

    long num_entity_doc_compares = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_INPUT_RECORDS)
            .getValue();
    run_info.put("num_entity_doc_compares", num_entity_doc_compares);

    long hours = c.findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_INPUT_GROUPS).getValue();
    run_info.put("num_stream_hours", hours);

    fr.setAdditionalProperties("run_info", run_info);

    // System.out.println("#" + new Filter_run.Factory().toJSON(fr));
    System.out.println("@RELATION" + " trec-kba" + " ");
    for (String key : Attr.keySet()) {
        if (key.equalsIgnoreCase("English")) {
            System.out.println("@ATTRIBUTE " + key + " " + "{0,1,2}");
        } else if (key.equalsIgnoreCase("Class")) {
            System.out.println("@ATTRIBUTE " + key + " " + "{0,1}");
        } else {
            System.out.println("@ATTRIBUTE " + key + " " + "NUMERIC");
        }

    }
    System.out.println("\n@DATA");
    Text line = new Text();
    LineReader reader = new LineReader(fs.open(new Path(out + "/part-r-00000")));
    for (int i = 0; i < num_filter_results; i++) {
        reader.readLine(line);
        System.out.println(line.toString().split("\t\t")[1]);
    }
    /*
     * System.out.println("#" + new
     * Filter_run.Factory().toPrettyJSON(fr).replaceAll("\\n", "\n#"));
     */

    return status;

}

From source file:nl.cwi.kba.apps.FeatureExtractor_filterer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String in = null;/*from w w w .jav  a2s  .c om*/
    String out = null;
    String queryfile = null;
    String contextFile = null;
    String systemdescription = null;
    String corpus_id = null;
    String runtag = null;
    String teamname = null;
    String annoFile = null;
    String gcldFile = null;
    String labelsFile = null;
    String pprFile = null;
    String myverFile = null;
    HashMap<String, Object> run_info = new HashMap<String, Object>();

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-i".equals(args[i])) {
                in = args[++i];
            } else if ("-o".equals(args[i])) {
                out = args[++i];
            } else if ("-q".equals(args[i])) {
                queryfile = args[++i];
            } else if ("-r".equals(args[i])) {
                runtag = args[++i];
            } else if ("-l".equals(args[i])) {
                labelsFile = args[++i];
            } else if ("-a".equals(args[i])) {
                annoFile = args[++i];
            } else if ("-t".equals(args[i])) {
                teamname = args[++i];
            } else if ("-d".equals(args[i])) {
                systemdescription = args[++i];

            } else if ("-h".equals(args[i]) || "--help".equals(args[i])) {
                return printUsage();
            } else {
                other_args.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    if (other_args.size() > 0 || in == null || out == null || queryfile == null)
        return printUsage();

    if (runtag == null)
        runtag = "toy_1";

    if (teamname == null)
        teamname = "CompInsights";

    if (corpus_id == null)
        corpus_id = "kba-stream-corpus-2012-cleansed-only";

    if (systemdescription == null)
        systemdescription = "Description intentionally left blank.";

    LOG.info("Tool: " + this.getClass().getName());
    LOG.info(" - input path: " + in);
    LOG.info(" - output path: " + out);
    LOG.info(" - runtag: " + runtag);
    LOG.info(" - teamname: " + teamname);
    LOG.info(" - corpus_id: " + corpus_id);
    LOG.info(" - run description: " + systemdescription);

    Filter_run fr = new Filter_run.Factory().create(TEAMNAME, RUNTAG, systemdescription, corpus_id);

    Map<String, String> Attr = new LinkedHashMap<String, String>();
    // Attr.put("trec-kba", "");
    Attr.put("LengthTitle", "");
    Attr.put("LengthBody", "");
    Attr.put("LengthAnchor", "");
    Attr.put("Source", "");
    Attr.put("English", "");
    Attr.put("MentionsTitle", "");
    Attr.put("MentionsBody", "");
    Attr.put("MentionsAnchor", "");
    Attr.put("FirstPos", "");
    Attr.put("LastPos", "");
    Attr.put("Spread", "");
    Attr.put("FirstPosNorm", "");
    Attr.put("LastPosNorm", "");
    Attr.put("SpreadNorm", "");
    // Attr.put("Related", "");
    Attr.put("Relatedtitle", "");
    Attr.put("RelatedBody", "");
    Attr.put("RelatedAnchor", "");

    //Attr.put("contxL", "0");
    //Attr.put("contxR", "0");
    Attr.put("Class", "");

    Configuration conf = getConf();
    conf.set(QUERYFILEPATH_HDFS, new Path(queryfile).toUri().toString());
    conf.set(LABELSFILEPATH_HDFS, new Path(labelsFile).toUri().toString());
    conf.set(ANNOFILEPATH_HDFS, new Path(annoFile).toUri().toString());

    conf.set(RUNTAG, runtag);
    conf.set(TEAMNAME, teamname);

    //set time
    conf.setLong("mapred.task.timeout", 40 * 600000);

    FileSystem fs = FileSystem.get(conf);
    // Lookup required data from the topic file
    loadTopicData(queryfile, fr, fs, run_info);
    Job job = new Job(conf, "Toy KBA system");
    job.setJarByClass(FeatureExtractor_filterer.class);

    // some weird issues with Thrift classes in the Hadoop distro.
    job.setUserClassesTakesPrecedence(true);

    // make the query file available to each mapper.
    DistributedCache.addCacheFile(new URI(new Path(queryfile) + "#" + QUERYFILEPATH_HDFS),
            job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(labelsFile) + "#" + LABELSFILEPATH_HDFS),
            job.getConfiguration());
    DistributedCache.addCacheFile(new URI(new Path(annoFile) + "#" + ANNOFILEPATH_HDFS),
            job.getConfiguration());

    DistributedCache.createSymlink(job.getConfiguration());

    job.setInputFormatClass(ThriftFileInputFormat.class);
    job.setMapperClass(MyMapper.class);
    FileInputFormat.addInputPath(job, new Path(in));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // job.setCombinerClass(MyReducer.class);
    // job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(1);

    FileSystem.get(conf).delete(new Path(out), true);
    TextOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Let's go
    int status = job.waitForCompletion(true) ? 0 : 1;

    Counters c = job.getCounters();
    long cputime = c.findCounter(org.apache.hadoop.mapred.Task.Counter.CPU_MILLISECONDS).getValue();
    run_info.put("elapsed_time_secs", ((double) cputime / 1000d));

    long num_filter_results = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_OUTPUT_RECORDS)
            .getValue();
    run_info.put("num_filter_results", num_filter_results);

    long num_entity_doc_compares = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_INPUT_RECORDS)
            .getValue();
    run_info.put("num_entity_doc_compares", num_entity_doc_compares);

    long hours = c.findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_INPUT_GROUPS).getValue();
    run_info.put("num_stream_hours", hours);

    fr.setAdditionalProperties("run_info", run_info);

    // System.out.println("#" + new Filter_run.Factory().toJSON(fr));
    System.out.println("@RELATION" + " trec-kba" + " ");
    for (String key : Attr.keySet()) {
        if (key.equalsIgnoreCase("English")) {
            System.out.println("@ATTRIBUTE " + key + " " + "{0,1,2}");
        } else if (key.equalsIgnoreCase("Class")) {
            System.out.println("@ATTRIBUTE " + key + " " + "{0,1}");
        } else {
            System.out.println("@ATTRIBUTE " + key + " " + "NUMERIC");
        }

    }
    System.out.println("\n@DATA");
    Text line = new Text();
    LineReader reader = new LineReader(fs.open(new Path(out + "/part-r-00000")));
    for (int i = 0; i < num_filter_results; i++) {
        reader.readLine(line);
        System.out.println(line.toString().split("\t\t")[1]);
    }

    return status;

}

From source file:nl.cwi.kba2013.apps.AnnotationExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String in = null;//ww w. j a v a  2s .c om
    String out = null;

    String annoFile = null;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-i".equals(args[i])) {
                in = args[++i];
            } else if ("-o".equals(args[i])) {
                out = args[++i];
            } else if ("-q".equals(args[i])) {

            } else if ("-a".equals(args[i])) {
                annoFile = args[++i];

            } else if ("-h".equals(args[i]) || "--help".equals(args[i])) {
                return printUsage();
            } else {
                other_args.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    if (other_args.size() > 0 || in == null || out == null)
        return printUsage();

    LOG.info("Tool: " + this.getClass().getName());
    LOG.info(" - input path: " + in);
    LOG.info(" - output path: " + out);

    Configuration conf = getConf();

    conf.set(ANNOFILEPATH_HDFS, new Path(annoFile).toUri().toString());

    // set time
    conf.setLong("mapred.task.timeout", 40 * 600000);
    conf.set("mapred.map.child.java.opts", "-Xmx4g -XX:-UseGCOverheadLimit");

    FileSystem fs = FileSystem.get(conf);
    // Lookup required data from the topic file

    Job job = new Job(conf, "Annotation Extractor");
    job.setJarByClass(AnnotationExtractor.class);

    // some weird issues with Thrift classes in the Hadoop distro.
    //job.setUserClassesTakesPrecedence(true);

    // make the query file available to each mapper.

    DistributedCache.addCacheFile(new URI(new Path(annoFile) + "#" + ANNOFILEPATH_HDFS),
            job.getConfiguration());
    DistributedCache.createSymlink(job.getConfiguration());

    job.setInputFormatClass(TextInputFormat.class);
    //job.setMapperClass(MyMapper.class);
    FileInputFormat.addInputPath(job, new Path(in));

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(Text.class);

    job.setCombinerClass(MyReducer.class);
    //job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(1);

    FileSystem.get(conf).delete(new Path(out), true);
    TextOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Let's go
    int status = job.waitForCompletion(true) ? 0 : 1;

    return status;

}