List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:crimeScoreMapper.java
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { StringTokenizer tokenizer = new StringTokenizer(value.toString(), ","); if (tokenizer.countTokens() != 0) { String[] data = new String[tokenizer.countTokens()]; int i = 0; while (tokenizer.hasMoreTokens()) { data[i] = tokenizer.nextToken(); i++;/*from w w w. j a va 2 s . co m*/ } String type = data[2]; type = type.trim(); int weight = 0; if (type.equalsIgnoreCase("arson")) { weight = 2; } else if (type.equalsIgnoreCase("theft")) { weight = 3; } else if (type.equalsIgnoreCase("assault")) { weight = 2; } else if (type.equalsIgnoreCase("battery")) { weight = 4; } else if (type.equalsIgnoreCase("robbery")) { weight = 3; } else if (type.equalsIgnoreCase("burglary")) { weight = 3; } else if (type.equalsIgnoreCase("gambling")) { weight = 1; } else if (type.equalsIgnoreCase("homicide")) { weight = 4; } else if (type.equalsIgnoreCase("stalking")) { weight = 1; } else if (type.equalsIgnoreCase("narcotics")) { weight = 2; } else if (type.equalsIgnoreCase("obscenity")) { weight = 1; } else if (type.equalsIgnoreCase("kidnapping")) { weight = 3; } else if (type.equalsIgnoreCase("sex offense")) { weight = 3; } else if (type.equalsIgnoreCase("intimidation")) { weight = 2; } else if (type.equalsIgnoreCase("non - criminal")) { weight = 1; } else if (type.equalsIgnoreCase("prostitution")) { weight = 2; } else if (type.equalsIgnoreCase("other offense")) { weight = 1; } else if (type.equalsIgnoreCase("non-criminal")) { weight = 1; } else if (type.equalsIgnoreCase("criminal damage")) { weight = 2; } else if (type.equalsIgnoreCase("public indecency")) { weight = 2; } else if (type.equalsIgnoreCase("criminal trespass")) { weight = 2; } else if (type.equalsIgnoreCase("human trafficking")) { weight = 3; } else if (type.equalsIgnoreCase("weapons violation")) { weight = 2; } else if (type.equalsIgnoreCase("deceptive practice")) { weight = 2; } else if (type.equalsIgnoreCase("crim sexual assault")) { weight = 4; } else if (type.equalsIgnoreCase("motor vehicle theft")) { weight = 2; } else if (type.equalsIgnoreCase("liquor law violation")) { weight = 1; } else if (type.equalsIgnoreCase("public peace violation")) { weight = 1; } else if (type.equalsIgnoreCase("other narcotic violation")) { weight = 1; } else if (type.equalsIgnoreCase("offense involving children")) { weight = 3; } else if (type.equalsIgnoreCase("interference with public officer")) { weight = 1; } else if (type.equalsIgnoreCase("concealed carry license violation")) { weight = 2; } if (data[3].trim().startsWith("60")) output.collect(new Text(data[3].trim()), new IntWritable(weight)); else output.collect(new Text(data[4].trim()), new IntWritable(weight)); } else { output.collect(new Text("ProBLEMMMMMMMMMMMMMMMMMMMMM"), new IntWritable(1)); } }
From source file:FriendsReducer.java
License:Apache License
public void reduce(Text anagramKey, Iterator<Text> anagramValues, OutputCollector<Text, Text> results, Reporter reporter) throws IOException { String output = ""; List<String> values1 = Arrays.asList(anagramValues.next().toString().split(",")); String[] values2 = anagramValues.next().toString().split(","); List<String> r = new ArrayList<String>(); for (String v2 : values2) { if (values1.contains(v2)) r.add(v2);//from ww w. j av a2 s .c o m } //StringTokenizer outputTokenizer = new StringTokenizer(output,"~"); /* if(outputTokenizer.countTokens()>=2) {*/ // output = output.replace("~", ","); outputKey.set(anagramKey.toString()); outputValue.set(String.join(",", r)); results.collect(outputKey, outputValue); //} }
From source file:MinMaxCountReducer.java
@Override protected void reduce(Text key, Iterable<MinMaxCountTupple> values, Context context) throws IOException, InterruptedException { //Initialize result result.setMax(-100);/*from ww w .j av a 2s. c om*/ result.setMin(100); int sum = 0; for (MinMaxCountTupple val : values) { if (val.getMin() < result.getMin()) { result.setMin(val.getMin()); } if (val.getMax() > result.getMax()) { result.setMax(val.getMax()); } sum += val.getCount(); } result.setCount(sum); Put put = new Put(toBytes(key.toString())); put.add(Bytes.toBytes("cf"), toBytes("min"), toBytes(result.getMin())); put.add(Bytes.toBytes("cf"), toBytes("max"), toBytes(result.getMax())); put.add(Bytes.toBytes("cf"), toBytes("count"), toBytes(result.getCount())); System.out.println( "R-> " + key.toString() + " " + result.getMin() + " " + result.getMax() + " " + result.getCount()); context.write(null, put); }
From source file:WikipediaForwardIndexBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override// w w w. j a va2 s .c om public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION)); String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION); String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); if (!inputPath.isAbsolute()) { System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class); FileSystem fs = FileSystem.get(conf); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + inputPath); LOG.info(" - index file: " + indexFile); LOG.info(" - language: " + language); LOG.info("Note: This tool only works on block-compressed SequenceFiles!"); conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language)); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); if (language != null) { conf.set("wiki.language", language); } conf.setInputFormat(NoSplitSequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(IdentityReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int blocks = (int) counters.getCounter(Blocks.Total); LOG.info("number of blocks: " + blocks); LOG.info("Writing index file..."); LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000"))); FSDataOutputStream out = fs.create(new Path(indexFile), true); out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName()); out.writeUTF(inputPath.toString()); out.writeInt(blocks); int cnt = 0; Text line = new Text(); while (reader.readLine(line) > 0) { String[] arr = line.toString().split("\\s+"); int docno = Integer.parseInt(arr[0]); int offset = Integer.parseInt(arr[1]); short fileno = Short.parseShort(arr[2]); out.writeInt(docno); out.writeInt(offset); out.writeShort(fileno); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " blocks written"); } } reader.close(); out.close(); if (cnt != blocks) { throw new RuntimeException("Error: mismatch in block count!"); } // Clean up. fs.delete(new Path(tmpPath), true); return 0; }
From source file:DistribCountingMapper.java
License:Apache License
@Override public void map(LongWritable lineNum, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { IntWritable one = new IntWritable(1); HashSet<String> transactionItems = new HashSet<String>(); StringTokenizer st = new StringTokenizer(value.toString()); while (st.hasMoreTokens()) { transactionItems.add(st.nextToken()); }/*from w w w. j a va 2s . co m*/ Set<Set<String>> powerSet = Sets.powerSet(transactionItems); for (Set<String> itemset : powerSet) { if (itemset.size() > 0) { String[] itemsetArr = new String[itemset.size()]; itemset.toArray(itemsetArr); Arrays.sort(itemsetArr); String itemsetStr = ""; for (int i = 0; i < itemsetArr.length; i++) { itemsetStr += itemsetArr[i] + " "; } output.collect(new Text(itemsetStr), one); } } }
From source file:Text2FormatStorageMR.java
License:Open Source License
@SuppressWarnings("unchecked") public static int readFormatFile(JobConf conf, String inputPath, int lineNum) throws Exception { RecordReader<WritableComparable, Writable> currRecReader; conf.set("mapred.input.dir", inputPath); InputFormat inputFormat = new FormatStorageInputFormat(); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return -1; }/* w w w .j av a 2 s .com*/ currRecReader = inputFormat.getRecordReader(inputSplits[0], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); int num = 0; while (true) { boolean ret = currRecReader.next(key, value); if (ret) { Text Line = (Text) key; System.out.println(Line.toString()); num++; if (num >= lineNum) break; } else break; } return 0; }
From source file:AnagramMapper.java
License:Apache License
public void map(LongWritable key, Text value, OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException { String word = value.toString(); char[] wordChars = word.toCharArray(); Arrays.sort(wordChars);/*from w w w .j a v a 2 s . com*/ String sortedWord = new String(wordChars); sortedText.set(sortedWord); orginalText.set(word); outputCollector.collect(sortedText, orginalText); }
From source file:MedianMaper.java
@Override public void map(LongWritable Key, Text Value, Context context) throws IOException, InterruptedException { int YearWeek; double Price; Configuration conf = context.getConfiguration(); String Cheapest_Carrier = conf.get("Cheapest_Carrier"); Calendar Cal = Calendar.getInstance(); FlightPriceParser FParser = new FlightPriceParser(); if (!FParser.map(Value.toString())) { return;//w w w. j a v a 2 s .co m } if (FParser.Carrier != Cheapest_Carrier) return; Cal.set(FParser.Year, FParser.Month - 1, FParser.DayOfMonth); YearWeek = FParser.Year * 100 + Cal.get(Calendar.WEEK_OF_YEAR); context.write(new IntWritable(YearWeek), new IntWritable((int) Math.round(FParser.Price * 100))); }
From source file:FriendsMapper.java
License:Apache License
public void map(LongWritable key, Text value, OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException { // value => A:B,C,D,E String[] kv = value.toString().split(":"); // kv[0] = A / kv[1] = B,C,D,E String k = kv[0];/*from w w w. j a v a2 s. c om*/ // k = A; List<String> vs = Arrays.asList(kv[1].split(",")); // vs = {B,C,D,E} String values = kv[1]; orginalText.set(values); for (String v : vs) { // v = {B,C,D,E} List<String> ks = new ArrayList<String>(); // ks = {} ks.add(k); // ks = {A} ks.add(v); // ks = {A,v} Collections.sort(ks); sortedText.set(String.join(",", ks)); // A,v outputCollector.collect(sortedText, orginalText); } }
From source file:TestString.java
License:Apache License
@Test public void testStringSubstring() throws Exception { Text text = new Text("string"); Text text1 = new Text(); Text text2 = new Text(); long start = System.nanoTime(); for (int i = 0; i < 100000000; i++) { String str = text.toString(); String str1 = str.substring(0, 2); String str2 = str.substring(3, str.length()); text1.set(str1);//from w w w . ja v a 2 s . c o m text2.set(str2); } long end = System.nanoTime(); System.out.println("TextStringSubString"); System.out.println("text1: " + text1.toString()); System.out.println("text2: " + text2.toString()); System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds."); }