List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:io.prestosql.plugin.hive.s3.PrestoS3ConfigurationUpdater.java
License:Apache License
@Override public void updateConfiguration(Configuration config) { // re-map filesystem schemes to match Amazon Elastic MapReduce config.set("fs.s3.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3a.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName()); if (awsAccessKey != null) { config.set(S3_ACCESS_KEY, awsAccessKey); }//ww w . j a v a 2s. c om if (awsSecretKey != null) { config.set(S3_SECRET_KEY, awsSecretKey); } if (endpoint != null) { config.set(S3_ENDPOINT, endpoint); } if (signerType != null) { config.set(S3_SIGNER_TYPE, signerType.name()); } config.setBoolean(S3_PATH_STYLE_ACCESS, pathStyleAccess); config.setBoolean(S3_USE_INSTANCE_CREDENTIALS, useInstanceCredentials); config.setBoolean(S3_SSL_ENABLED, sslEnabled); config.setBoolean(S3_SSE_ENABLED, sseEnabled); config.set(S3_SSE_TYPE, sseType.name()); if (encryptionMaterialsProvider != null) { config.set(S3_ENCRYPTION_MATERIALS_PROVIDER, encryptionMaterialsProvider); } if (kmsKeyId != null) { config.set(S3_KMS_KEY_ID, kmsKeyId); } if (sseKmsKeyId != null) { config.set(S3_SSE_KMS_KEY_ID, sseKmsKeyId); } config.setInt(S3_MAX_CLIENT_RETRIES, maxClientRetries); config.setInt(S3_MAX_ERROR_RETRIES, maxErrorRetries); config.set(S3_MAX_BACKOFF_TIME, maxBackoffTime.toString()); config.set(S3_MAX_RETRY_TIME, maxRetryTime.toString()); config.set(S3_CONNECT_TIMEOUT, connectTimeout.toString()); config.set(S3_SOCKET_TIMEOUT, socketTimeout.toString()); config.set(S3_STAGING_DIRECTORY, stagingDirectory.toString()); config.setInt(S3_MAX_CONNECTIONS, maxConnections); config.setLong(S3_MULTIPART_MIN_FILE_SIZE, multipartMinFileSize.toBytes()); config.setLong(S3_MULTIPART_MIN_PART_SIZE, multipartMinPartSize.toBytes()); config.setBoolean(S3_PIN_CLIENT_TO_CURRENT_REGION, pinClientToCurrentRegion); config.set(S3_USER_AGENT_PREFIX, userAgentPrefix); config.set(S3_ACL_TYPE, aclType.name()); }
From source file:io.svectors.hbase.cdc.BaseTest.java
License:Apache License
@Before public void setUp() throws Exception { final Configuration hbaseConf = HBaseConfiguration.create(); hbaseConf.setInt("replication.stats.thread.period.seconds", 5); hbaseConf.setLong("replication.sleep.before.failover", 2000); hbaseConf.setInt("replication.source.maxretriesmultiplier", 10); hbaseConf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true); // add kafka properties. we prefix each property with kafka addKafkaProperties(hbaseConf);//from w w w. ja va 2s .co m utility = new HBaseTestingUtility(hbaseConf); utility.startMiniCluster(); numRegionServers = utility.getHBaseCluster().getRegionServerThreads().size(); // setup kafka kafkaServer = new KafkaServer(utility.getZkCluster().getClientPort(), 9092); }
From source file:io.svectors.hbase.sink.HbaseTestUtil.java
License:Apache License
/** * Returns a new HBaseTestingUtility instance. *//* w w w .j ava 2 s . c om*/ private static HBaseTestingUtility createTestingUtility() { final Configuration hbaseConf = HBaseConfiguration.create(); hbaseConf.setInt("replication.stats.thread.period.seconds", 5); hbaseConf.setLong("replication.sleep.before.failover", 2000); hbaseConf.setInt("replication.source.maxretriesmultiplier", 10); return new HBaseTestingUtility(hbaseConf); }
From source file:ipldataanalysis4.IPLDataAnalysis4.java
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.printf("Two parameters are required for Data Analysis for IPL- <input dir> <output dir>\n"); return -1; }/*from w ww .ja v a2s.co m*/ Configuration conf = new Configuration(); DistributedCache.addCacheFile(new URI("testFile"), conf); Job job = new Job(getConf(), "Job1"); long milliSeconds = 1000 * 60 * 60; conf.setLong("mapred.task.timeout", milliSeconds); job.setJarByClass(IPLDataAnalysis4.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(BloomMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); //job.setReducerClass(DataAnalysisReducer.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:mvm.rya.cloudbase.giraph.format.BspCase.java
License:Apache License
/** * Adjust the configuration to the basic test case *///ww w . j av a 2 s . c o m public final void setupConfiguration(GiraphJob job) { Configuration conf = job.getConfiguration(); conf.set("mapred.jar", getJarLocation()); // Allow this test to be run on a real Hadoop setup if (getJobTracker() != null) { System.out.println("setup: Sending job to job tracker " + getJobTracker() + " with jar path " + getJarLocation() + " for " + getName()); conf.set("mapred.job.tracker", getJobTracker()); job.setWorkerConfiguration(getNumWorkers(), getNumWorkers(), 100.0f); } else { System.out.println( "setup: Using local job runner with " + "location " + getJarLocation() + " for " + getName()); job.setWorkerConfiguration(1, 1, 100.0f); // Single node testing conf.setBoolean(GiraphJob.SPLIT_MASTER_WORKER, false); } conf.setInt(GiraphJob.POLL_ATTEMPTS, 10); conf.setInt(GiraphJob.POLL_MSECS, 3 * 1000); conf.setInt(GiraphJob.ZOOKEEPER_SERVERLIST_POLL_MSECS, 500); if (getZooKeeperList() != null) { job.setZooKeeperConfiguration(getZooKeeperList()); } // GeneratedInputSplit will generate 5 vertices conf.setLong(GeneratedVertexReader.READER_VERTICES, 5); }
From source file:net.java.jatextmining.JaCoOccurrence.java
License:Apache License
/** * Weighting the value of each co-occurrence words. * @param conf Specify the Hadoop Configuration object. * @param cache Specify the distributed cache file path. * @return if success return true, not success return false. * @throws IOException Exception for IO. * @throws URISyntaxException Exception for URI. * @throws InterruptedException Exception for threads, waitForCompletion(). * @throws ClassNotFoundException Exception for waitForCompletion(). *//*from w w w. java 2 s . c o m*/ private boolean runJaCoOccurrenceWeighting(Configuration conf, String cache) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { String reducerNum = conf.get("jatextmining.JaCoOccurrenceCounterReducerNum"); if (type.equals("chi") || type.equals("mi") || type.equals("freaq")) { conf.set("type", type); } else { System.err.println("error type: [" + type + "]"); printUsage(); } readDocNumFile(conf); conf.setLong("docNum", inputNum); Job job = new Job(conf); job.setJarByClass(JaCoOccurrence.class); TextInputFormat.addInputPath(job, new Path(cache)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setMapperClass(CoOccurrenceWeightingMapper.class); job.setReducerClass(CoOccurrenceReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(Integer.valueOf(reducerNum)); return job.waitForCompletion(true); }
From source file:net.thevis.groovyhadoop.backport.CombineFileRecordReader.java
License:Apache License
/** * Get the record reader for the next chunk in this CombineFileSplit. *///w w w. j ava 2s.co m protected boolean initNextRecordReader() throws IOException { if (curReader != null) { curReader.close(); curReader = null; if (idx > 0) { progress += split.getLength(idx - 1); // done processing so far } } // if all chunks have been processed, nothing more to do. if (idx == split.getNumPaths()) { return false; } // get a record reader for the idx-th chunk try { Configuration conf = context.getConfiguration(); // setup some helper config variables. conf.set(MRJobConfig.MAP_INPUT_FILE, split.getPath(idx).toString()); conf.setLong(MRJobConfig.MAP_INPUT_START, split.getOffset(idx)); conf.setLong(MRJobConfig.MAP_INPUT_PATH, split.getLength(idx)); curReader = rrConstructor.newInstance(new Object[] { split, context, Integer.valueOf(idx) }); if (idx > 0) { // initialize() for the first RecordReader will be called by MapTask; // we're responsible for initializing subsequent RecordReaders. curReader.initialize(split, context); } } catch (Exception e) { throw new RuntimeException(e); } idx++; return true; }
From source file:nl.cwi.kba.apps.FeatureExtractor.java
License:Apache License
@Override public int run(String[] args) throws Exception { String in = null;//from w w w .j a va 2s .c o m String out = null; String queryfile = null; String contextFile = null; String systemdescription = null; String corpus_id = null; String runtag = null; String teamname = null; String annoFile = null; String gcldFile = null; String labelsFile = null; String pprFile = null; String myverFile = null; String wikiFile = null; HashMap<String, Object> run_info = new HashMap<String, Object>(); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-i".equals(args[i])) { in = args[++i]; } else if ("-o".equals(args[i])) { out = args[++i]; } else if ("-q".equals(args[i])) { queryfile = args[++i]; } else if ("-r".equals(args[i])) { runtag = args[++i]; } else if ("-l".equals(args[i])) { labelsFile = args[++i]; } else if ("-a".equals(args[i])) { annoFile = args[++i]; } else if ("-t".equals(args[i])) { teamname = args[++i]; } else if ("-d".equals(args[i])) { systemdescription = args[++i]; } else if ("-p".equals(args[i])) { pprFile = args[++i]; } else if ("-g".equals(args[i])) { gcldFile = args[++i]; } else if ("-s".equals(args[i])) { myverFile = args[++i]; } else if ("-c".equals(args[i])) { contextFile = args[++i]; } else if ("-w".equals(args[i])) { wikiFile = args[++i]; } else if ("-h".equals(args[i]) || "--help".equals(args[i])) { return printUsage(); } else { other_args.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (other_args.size() > 0 || in == null || out == null || queryfile == null) return printUsage(); if (runtag == null) runtag = "toy_1"; if (teamname == null) teamname = "CompInsights"; if (corpus_id == null) corpus_id = "kba-stream-corpus-2012-cleansed-only"; if (systemdescription == null) systemdescription = "Description intentionally left blank."; LOG.info("Tool: " + this.getClass().getName()); LOG.info(" - input path: " + in); LOG.info(" - output path: " + out); LOG.info(" - runtag: " + runtag); LOG.info(" - teamname: " + teamname); LOG.info(" - corpus_id: " + corpus_id); LOG.info(" - run description: " + systemdescription); Filter_run fr = new Filter_run.Factory().create(TEAMNAME, RUNTAG, systemdescription, corpus_id); Map<String, String> Attr = new LinkedHashMap<String, String>(); // Attr.put("trec-kba", ""); /* Attr.put("LengthTitle", ""); Attr.put("LengthBody", ""); Attr.put("LengthAnchor", ""); Attr.put("Source", ""); Attr.put("English", ""); Attr.put("MentionsTitle", ""); Attr.put("MentionsBody", ""); Attr.put("MentionsAnchor", ""); Attr.put("FirstPos", ""); Attr.put("LastPos", ""); Attr.put("Spread", ""); Attr.put("FirstPosNorm", ""); Attr.put("LastPosNorm", ""); Attr.put("SpreadNorm", ""); // Attr.put("Related", ""); Attr.put("Relatedtitle", ""); Attr.put("RelatedBody", ""); Attr.put("RelatedAnchor", ""); Attr.put("ppr", ""); Attr.put("gcld", ""); Attr.put("partial", ""); Attr.put("s_form", ""); Attr.put("contxL", "0"); Attr.put("contxR", "0"); Attr.put("cos", "0"); Attr.put("kl", "0"); Attr.put("jac", "0"); Attr.put("Class", ""); */ Attr.put("gcld", "0"); Attr.put("jac", "0"); Attr.put("cos", "0"); Attr.put("kl", "0"); Attr.put("ppr", "0"); Attr.put("s_form", "0"); Attr.put("contxR", "0"); Attr.put("contxL", "0"); Attr.put("FirstPos", "0"); Attr.put("LastPos", "0"); Attr.put("LengthBody", "0"); Attr.put("FirstPosNorm", "0"); Attr.put("MentionsBody", "0"); Attr.put("RelatedBody", "0"); Attr.put("Spread", "0"); Attr.put("LastPosNorm", "0"); Attr.put("SpreadNorm", "0"); Attr.put("LengthAnchor", "0"); Attr.put("Source", "0"); Attr.put("LengthTitle", "0"); Attr.put("partial", "0"); Attr.put("MentionsAnchor", "0"); Attr.put("Relatedtitle", "0"); Attr.put("English", "0"); Attr.put("RelatedAnchor", "0"); Attr.put("MentionsTitle", "0"); Attr.put("Class", "0"); Configuration conf = getConf(); conf.set(QUERYFILEPATH_HDFS, new Path(queryfile).toUri().toString()); conf.set(LABELSFILEPATH_HDFS, new Path(labelsFile).toUri().toString()); conf.set(ANNOFILEPATH_HDFS, new Path(annoFile).toUri().toString()); conf.set(PPR_HDFS, new Path(pprFile).toUri().toString()); //conf.set(MYVER, new Path(myverFile).toUri().toString()); conf.set(GCLD_HDFS, new Path(gcldFile).toUri().toString()); conf.set(CONTEXT_HDFS, new Path(contextFile).toUri().toString()); conf.set(WIKI_HDFS, new Path(contextFile).toUri().toString()); conf.set(RUNTAG, runtag); conf.set(TEAMNAME, teamname); // set time conf.setLong("mapred.task.timeout", 40 * 600000); FileSystem fs = FileSystem.get(conf); // Lookup required data from the topic file loadTopicData(queryfile, fr, fs, run_info); Job job = new Job(conf, "Feature Extractor"); job.setJarByClass(FeatureExtractor.class); // some weird issues with Thrift classes in the Hadoop distro. job.setUserClassesTakesPrecedence(true); // make the query file available to each mapper. DistributedCache.addCacheFile(new URI(new Path(queryfile) + "#" + QUERYFILEPATH_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(labelsFile) + "#" + LABELSFILEPATH_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(annoFile) + "#" + ANNOFILEPATH_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(pprFile) + "#" + PPR_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(gcldFile) + "#" + GCLD_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(contextFile) + "#" + CONTEXT_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(wikiFile) + "#" + WIKI_HDFS), job.getConfiguration()); // DistributedCache.addCacheFile( new URI(new Path(myverFile) + "#" + //MYVER), job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); job.setInputFormatClass(ThriftFileInputFormat.class); job.setMapperClass(MyMapper.class); FileInputFormat.addInputPath(job, new Path(in)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // job.setCombinerClass(MyReducer.class); // job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); FileSystem.get(conf).delete(new Path(out), true); TextOutputFormat.setOutputPath(job, new Path(out)); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Let's go int status = job.waitForCompletion(true) ? 0 : 1; Counters c = job.getCounters(); long cputime = c.findCounter(org.apache.hadoop.mapred.Task.Counter.CPU_MILLISECONDS).getValue(); run_info.put("elapsed_time_secs", ((double) cputime / 1000d)); long num_filter_results = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_OUTPUT_RECORDS) .getValue(); run_info.put("num_filter_results", num_filter_results); long num_entity_doc_compares = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_INPUT_RECORDS) .getValue(); run_info.put("num_entity_doc_compares", num_entity_doc_compares); long hours = c.findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_INPUT_GROUPS).getValue(); run_info.put("num_stream_hours", hours); fr.setAdditionalProperties("run_info", run_info); // System.out.println("#" + new Filter_run.Factory().toJSON(fr)); System.out.println("@RELATION" + " trec-kba" + " "); for (String key : Attr.keySet()) { if (key.equalsIgnoreCase("English")) { System.out.println("@ATTRIBUTE " + key + " " + "{0,1,2}"); } else if (key.equalsIgnoreCase("Class")) { System.out.println("@ATTRIBUTE " + key + " " + "{0,1}"); } else { System.out.println("@ATTRIBUTE " + key + " " + "NUMERIC"); } } System.out.println("\n@DATA"); Text line = new Text(); LineReader reader = new LineReader(fs.open(new Path(out + "/part-r-00000"))); for (int i = 0; i < num_filter_results; i++) { reader.readLine(line); System.out.println(line.toString().split("\t\t")[1]); } /* * System.out.println("#" + new * Filter_run.Factory().toPrettyJSON(fr).replaceAll("\\n", "\n#")); */ return status; }
From source file:nl.cwi.kba.apps.FeatureExtractor_filterer.java
License:Apache License
@Override public int run(String[] args) throws Exception { String in = null;/*from w w w .jav a2s .c om*/ String out = null; String queryfile = null; String contextFile = null; String systemdescription = null; String corpus_id = null; String runtag = null; String teamname = null; String annoFile = null; String gcldFile = null; String labelsFile = null; String pprFile = null; String myverFile = null; HashMap<String, Object> run_info = new HashMap<String, Object>(); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-i".equals(args[i])) { in = args[++i]; } else if ("-o".equals(args[i])) { out = args[++i]; } else if ("-q".equals(args[i])) { queryfile = args[++i]; } else if ("-r".equals(args[i])) { runtag = args[++i]; } else if ("-l".equals(args[i])) { labelsFile = args[++i]; } else if ("-a".equals(args[i])) { annoFile = args[++i]; } else if ("-t".equals(args[i])) { teamname = args[++i]; } else if ("-d".equals(args[i])) { systemdescription = args[++i]; } else if ("-h".equals(args[i]) || "--help".equals(args[i])) { return printUsage(); } else { other_args.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (other_args.size() > 0 || in == null || out == null || queryfile == null) return printUsage(); if (runtag == null) runtag = "toy_1"; if (teamname == null) teamname = "CompInsights"; if (corpus_id == null) corpus_id = "kba-stream-corpus-2012-cleansed-only"; if (systemdescription == null) systemdescription = "Description intentionally left blank."; LOG.info("Tool: " + this.getClass().getName()); LOG.info(" - input path: " + in); LOG.info(" - output path: " + out); LOG.info(" - runtag: " + runtag); LOG.info(" - teamname: " + teamname); LOG.info(" - corpus_id: " + corpus_id); LOG.info(" - run description: " + systemdescription); Filter_run fr = new Filter_run.Factory().create(TEAMNAME, RUNTAG, systemdescription, corpus_id); Map<String, String> Attr = new LinkedHashMap<String, String>(); // Attr.put("trec-kba", ""); Attr.put("LengthTitle", ""); Attr.put("LengthBody", ""); Attr.put("LengthAnchor", ""); Attr.put("Source", ""); Attr.put("English", ""); Attr.put("MentionsTitle", ""); Attr.put("MentionsBody", ""); Attr.put("MentionsAnchor", ""); Attr.put("FirstPos", ""); Attr.put("LastPos", ""); Attr.put("Spread", ""); Attr.put("FirstPosNorm", ""); Attr.put("LastPosNorm", ""); Attr.put("SpreadNorm", ""); // Attr.put("Related", ""); Attr.put("Relatedtitle", ""); Attr.put("RelatedBody", ""); Attr.put("RelatedAnchor", ""); //Attr.put("contxL", "0"); //Attr.put("contxR", "0"); Attr.put("Class", ""); Configuration conf = getConf(); conf.set(QUERYFILEPATH_HDFS, new Path(queryfile).toUri().toString()); conf.set(LABELSFILEPATH_HDFS, new Path(labelsFile).toUri().toString()); conf.set(ANNOFILEPATH_HDFS, new Path(annoFile).toUri().toString()); conf.set(RUNTAG, runtag); conf.set(TEAMNAME, teamname); //set time conf.setLong("mapred.task.timeout", 40 * 600000); FileSystem fs = FileSystem.get(conf); // Lookup required data from the topic file loadTopicData(queryfile, fr, fs, run_info); Job job = new Job(conf, "Toy KBA system"); job.setJarByClass(FeatureExtractor_filterer.class); // some weird issues with Thrift classes in the Hadoop distro. job.setUserClassesTakesPrecedence(true); // make the query file available to each mapper. DistributedCache.addCacheFile(new URI(new Path(queryfile) + "#" + QUERYFILEPATH_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(labelsFile) + "#" + LABELSFILEPATH_HDFS), job.getConfiguration()); DistributedCache.addCacheFile(new URI(new Path(annoFile) + "#" + ANNOFILEPATH_HDFS), job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); job.setInputFormatClass(ThriftFileInputFormat.class); job.setMapperClass(MyMapper.class); FileInputFormat.addInputPath(job, new Path(in)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // job.setCombinerClass(MyReducer.class); // job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); FileSystem.get(conf).delete(new Path(out), true); TextOutputFormat.setOutputPath(job, new Path(out)); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Let's go int status = job.waitForCompletion(true) ? 0 : 1; Counters c = job.getCounters(); long cputime = c.findCounter(org.apache.hadoop.mapred.Task.Counter.CPU_MILLISECONDS).getValue(); run_info.put("elapsed_time_secs", ((double) cputime / 1000d)); long num_filter_results = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_OUTPUT_RECORDS) .getValue(); run_info.put("num_filter_results", num_filter_results); long num_entity_doc_compares = c.findCounter(org.apache.hadoop.mapred.Task.Counter.MAP_INPUT_RECORDS) .getValue(); run_info.put("num_entity_doc_compares", num_entity_doc_compares); long hours = c.findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_INPUT_GROUPS).getValue(); run_info.put("num_stream_hours", hours); fr.setAdditionalProperties("run_info", run_info); // System.out.println("#" + new Filter_run.Factory().toJSON(fr)); System.out.println("@RELATION" + " trec-kba" + " "); for (String key : Attr.keySet()) { if (key.equalsIgnoreCase("English")) { System.out.println("@ATTRIBUTE " + key + " " + "{0,1,2}"); } else if (key.equalsIgnoreCase("Class")) { System.out.println("@ATTRIBUTE " + key + " " + "{0,1}"); } else { System.out.println("@ATTRIBUTE " + key + " " + "NUMERIC"); } } System.out.println("\n@DATA"); Text line = new Text(); LineReader reader = new LineReader(fs.open(new Path(out + "/part-r-00000"))); for (int i = 0; i < num_filter_results; i++) { reader.readLine(line); System.out.println(line.toString().split("\t\t")[1]); } return status; }
From source file:nl.cwi.kba2013.apps.AnnotationExtractor.java
License:Apache License
@Override public int run(String[] args) throws Exception { String in = null;//ww w. j a v a 2s .c om String out = null; String annoFile = null; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-i".equals(args[i])) { in = args[++i]; } else if ("-o".equals(args[i])) { out = args[++i]; } else if ("-q".equals(args[i])) { } else if ("-a".equals(args[i])) { annoFile = args[++i]; } else if ("-h".equals(args[i]) || "--help".equals(args[i])) { return printUsage(); } else { other_args.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (other_args.size() > 0 || in == null || out == null) return printUsage(); LOG.info("Tool: " + this.getClass().getName()); LOG.info(" - input path: " + in); LOG.info(" - output path: " + out); Configuration conf = getConf(); conf.set(ANNOFILEPATH_HDFS, new Path(annoFile).toUri().toString()); // set time conf.setLong("mapred.task.timeout", 40 * 600000); conf.set("mapred.map.child.java.opts", "-Xmx4g -XX:-UseGCOverheadLimit"); FileSystem fs = FileSystem.get(conf); // Lookup required data from the topic file Job job = new Job(conf, "Annotation Extractor"); job.setJarByClass(AnnotationExtractor.class); // some weird issues with Thrift classes in the Hadoop distro. //job.setUserClassesTakesPrecedence(true); // make the query file available to each mapper. DistributedCache.addCacheFile(new URI(new Path(annoFile) + "#" + ANNOFILEPATH_HDFS), job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); job.setInputFormatClass(TextInputFormat.class); //job.setMapperClass(MyMapper.class); FileInputFormat.addInputPath(job, new Path(in)); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(Text.class); job.setCombinerClass(MyReducer.class); //job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); FileSystem.get(conf).delete(new Path(out), true); TextOutputFormat.setOutputPath(job, new Path(out)); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Let's go int status = job.waitForCompletion(true) ? 0 : 1; return status; }