List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterResultReducer.java
protected void reduce(Text key, Iterable<Text> values, Context context) throws java.io.IOException, InterruptedException { BigInteger partialCount = BigInteger.ZERO; long numPartialSolutions = 0; for (Text item : values) { String value = item.toString(); if (NQueenIncrementalCounterResultMapper.PARTIAL_SOLUTION_ID.equals(value) && numPartialSolutions < Long.MAX_VALUE) { numPartialSolutions++;/*from www .j a va 2 s.c o m*/ } else { BigInteger solutionCount = new BigInteger(value); partialCount = partialCount.add(solutionCount); } } context.write(NullWritable.get(), new Text(key.toString() + "=" + partialCount.toString() + ";")); if (numPartialSolutions > 0) { context.write(NullWritable.get(), new Text( key.toString() + " NO FINALIZADO - EXISTEM " + numPartialSolutions + " SOLUES ABERTAS.")); } }
From source file:br.com.lassal.nqueens.grid.mapreduce.NQueenPartialShotMapper.java
protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { NQueenPartialShot record = new NQueenPartialShot(value.toString()); // String rowSize = context.getConfiguration().get(NQueenPartialShotMapper.NQueenRowSize_PROP); String predicado = record.getNewPrefix(); // predicado = predicado!= null ? predicado + "," : (rowSize + ":"); int numIter = record.getNumColToRun(SliceSize); boolean isFinalRun = record.isFinalRun(SliceSize); if (!isFinalRun && record.hasPrefix() && record.getPrefix().length >= (SliceSize * 2)) { if (this.isPositionInvalid(record.getPrefix())) { return; // ignora esta linha }/*from w ww . j ava 2s.c o m*/ } for (int a = 0; a < record.getColumnSize(); a++) { if (numIter > 1) { for (int b = 0; b < record.getColumnSize(); b++) { if (numIter > 2) { for (int c = 0; c < record.getColumnSize(); c++) { if (numIter > 3) { for (int d = 0; d < record.getColumnSize(); d++) { if (!(a == b || a == c || a == d || b == c || b == d || c == d || a == (b + 1) || a == (b - 1) || a == (c + 2) || a == (c - 2) || a == (d + 3) || a == (d - 3) || b == (c + 1) || b == (c - 1) || b == (d + 2) || b == (d - 2) || c == (d + 1) || c == (d - 1) || (isFinalRun && this.isPositionInvalid(record.getPrefix(), a, b, c, d)))) { nqueenShot.set(String.format("%s%s,%s,%s,%s", predicado, a, b, c, d)); context.write(nqueenShot, nullValue); } } } else { if (!(a == b || a == c || b == c || a == (b + 1) || a == (b - 1) || a == (c + 2) || a == (c - 2) || b == (c + 1) || b == (c - 1) || isFinalRun && this.isPositionInvalid(record.getPrefix(), a, b, c))) { nqueenShot.set(String.format("%s%s,%s,%s", predicado, a, b, c)); context.write(nqueenShot, nullValue); } } } } else { if (!(a == b || a == (b - 1) || a == (b + 1) || isFinalRun && this.isPositionInvalid(record.getPrefix(), a, b))) { nqueenShot.set(String.format("%s%s,%s", predicado, a, b)); context.write(nqueenShot, nullValue); } } } } else { if (isFinalRun && this.isPositionInvalid(record.getPrefix(), a)) { nqueenShot.set(predicado + a); context.write(nqueenShot, nullValue); } } } }
From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMap.java
License:Open Source License
public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { FileSplit fileSplit = (FileSplit) reporter.getInputSplit(); String filename = fileSplit.getPath().getName(); List<String> tokenizedLine = lineTokenizer.tokenize(value.toString()); for (String auxWord : tokenizedLine) { output.collect(new Text(auxWord), new Text(filename)); }//from w w w . jav a 2 s.c o m }
From source file:brickhouse.udf.sketch.Md5.java
License:Apache License
public Text evaluate(final Text s) { if (s == null) { return null; }//from w ww .java 2 s.c om try { MessageDigest md = MessageDigest.getInstance("MD5"); md.update(s.toString().getBytes()); byte[] md5hash = md.digest(); StringBuilder builder = new StringBuilder(); for (byte b : md5hash) { builder.append(Integer.toString((b & 0xff) + 0x100, 16).substring(1)); } return new Text(builder.toString()); } catch (NoSuchAlgorithmException nsae) { System.out.println("Cannot find digest algorithm"); System.exit(1); } return null; }
From source file:BU.MET.CS755.SpeciesGraphBuilderMapper.java
public void map(Text key, Text value, OutputCollector output, Reporter reporter) throws IOException { String title = null;// w w w. j a v a2 s . c o m String inputString; ArrayList<String> outlinks = null; //Get the DOM Builder Factory DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); try { //Get the DOM Builder DocumentBuilder builder = factory.newDocumentBuilder(); //Load and Parse the XML document //document contains the complete XML as a Tree. inputString = key.toString(); InputSource is = new InputSource(new StringReader(key.toString())); Document document = builder.parse(is); // Look for taxonavigation marker. if ((inputString.indexOf("== Taxonavigation ==") == -1) && (inputString.indexOf("==Taxonavigation==") == -1)) { return; } // Get the title node NodeList nodeList = document.getDocumentElement().getChildNodes(); NodeList theTitle = document.getElementsByTagName("title"); // Parse the species name from the title node. for (int i = 0; i < theTitle.getLength(); i++) { Node theNode = theTitle.item(i); Node nodeVal = theNode.getFirstChild(); title = nodeVal.getNodeValue(); title = title.replace(":", "_"); } // Get the sub-species list from <text> NodeList theText = document.getElementsByTagName("text"); for (int i = 0; i < theText.getLength(); i++) { Node theNode = theText.item(i); Node nodeVal = theNode.getFirstChild(); if (nodeVal != null) { outlinks = GetOutlinks(nodeVal.getNodeValue()); } } } catch (Exception e) { e.printStackTrace(); } if (title != null && title.length() > 0) { reporter.setStatus(title); } else { return; } StringBuilder builder = new StringBuilder(); if (outlinks != null) { for (String link : outlinks) { link = link.replace(" ", "_"); link = link.replace("\n", ""); builder.append(" "); builder.append(link); } } // remove any newlines if (builder.toString().contains("\n")) { builder.toString().replace("\n", ""); } output.collect(new Text(title), new Text(builder.toString())); }
From source file:BU.MET.CS755.SpeciesGraphBuilderReducer.java
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { reporter.setStatus(key.toString()); String toWrite = ""; int count = 0; while (values.hasNext()) { String page = ((Text) values.next()).toString(); page.replaceAll(" ", "_"); toWrite += " " + page; count += 1;// w ww. j av a 2 s . c om } IntWritable i = new IntWritable(count); String num = (i).toString(); toWrite = num + ":" + toWrite; output.collect(key, new Text(toWrite)); }
From source file:cascading.tap.hadoop.ZipInputFormatTest.java
License:Open Source License
public void testSplits() throws Exception { JobConf job = new JobConf(); FileSystem currentFs = FileSystem.get(job); Path file = new Path(workDir, "test.zip"); Reporter reporter = Reporter.NULL;/*from w ww .j av a 2 s.c om*/ int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); FileInputFormat.setInputPaths(job, file); for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream); long length = 0; LOG.debug("creating; zip file with entries = " + entries); // for each entry in the zip file for (int entryCounter = 0; entryCounter < entries; entryCounter++) { // construct zip entries splitting MAX_LENGTH between entries long entryLength = MAX_LENGTH / entries; ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt"); zipEntry.setMethod(ZipEntry.DEFLATED); zos.putNextEntry(zipEntry); for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) { zos.write(Long.toString(length).getBytes()); zos.write("\n".getBytes()); } zos.flush(); zos.closeEntry(); } zos.flush(); zos.close(); currentFs.delete(file, true); OutputStream outputStream = currentFs.create(file); byteArrayOutputStream.writeTo(outputStream); outputStream.close(); ZipInputFormat format = new ZipInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); InputSplit[] splits = format.getSplits(job, 100); BitSet bits = new BitSet((int) length); for (int j = 0; j < splits.length; j++) { LOG.debug("split[" + j + "]= " + splits[j]); RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { int v = Integer.parseInt(value.toString()); LOG.debug("read " + v); if (bits.get(v)) LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos()); assertFalse("key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.close(); } } assertEquals("some keys in no partition.", length, bits.cardinality()); } }
From source file:cc.slda.AnnotateDocuments.java
License:Apache License
public static Map<Integer, String> importParameter(SequenceFile.Reader sequenceFileReader) throws IOException { Map<Integer, String> hashMap = new HashMap<Integer, String>(); IntWritable intWritable = new IntWritable(); Text text = new Text(); while (sequenceFileReader.next(intWritable, text)) { hashMap.put(intWritable.get(), text.toString()); }//from ww w .j a va 2 s . com return hashMap; }
From source file:cc.slda.DisplayTopic.java
License:Apache License
@SuppressWarnings("unchecked") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(Settings.HELP_OPTION, false, "print the help message"); options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg() .withDescription("input beta file").create(Settings.INPUT_OPTION)); options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg() .withDescription("term index file").create(ParseCorpus.INDEX)); options.addOption(OptionBuilder.withArgName(Settings.INTEGER_INDICATOR).hasArg() .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION)); String betaString = null;/*from w w w .j av a2s. c o m*/ String indexString = null; int topDisplay = TOP_DISPLAY; CommandLineParser parser = new GnuParser(); HelpFormatter formatter = new HelpFormatter(); try { CommandLine line = parser.parse(options, args); if (line.hasOption(Settings.HELP_OPTION)) { formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } if (line.hasOption(Settings.INPUT_OPTION)) { betaString = line.getOptionValue(Settings.INPUT_OPTION); } else { throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized..."); } if (line.hasOption(ParseCorpus.INDEX)) { indexString = line.getOptionValue(ParseCorpus.INDEX); } else { throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized..."); } if (line.hasOption(TOP_DISPLAY_OPTION)) { topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION)); } } catch (ParseException pe) { System.err.println(pe.getMessage()); formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } catch (NumberFormatException nfe) { System.err.println(nfe.getMessage()); System.exit(0); } JobConf conf = new JobConf(DisplayTopic.class); FileSystem fs = FileSystem.get(conf); Path indexPath = new Path(indexString); Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path..."); Path betaPath = new Path(betaString); Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path..."); SequenceFile.Reader sequenceFileReader = null; try { IntWritable intWritable = new IntWritable(); Text text = new Text(); Map<Integer, String> termIndex = new HashMap<Integer, String>(); sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf); while (sequenceFileReader.next(intWritable, text)) { termIndex.put(intWritable.get(), text.toString()); } PairOfIntFloat pairOfIntFloat = new PairOfIntFloat(); // HMapIFW hmap = new HMapIFW(); HMapIDW hmap = new HMapIDW(); TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>(); sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf); while (sequenceFileReader.next(pairOfIntFloat, hmap)) { treeMap.clear(); System.out.println("=============================="); System.out.println( "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement()); System.out.println("=============================="); Iterator<Integer> itr1 = hmap.keySet().iterator(); int temp1 = 0; while (itr1.hasNext()) { temp1 = itr1.next(); treeMap.put(-hmap.get(temp1), temp1); if (treeMap.size() > topDisplay) { treeMap.remove(treeMap.lastKey()); } } Iterator<Double> itr2 = treeMap.keySet().iterator(); double temp2 = 0; while (itr2.hasNext()) { temp2 = itr2.next(); if (termIndex.containsKey(treeMap.get(temp2))) { System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2); } else { System.out.println("How embarrassing! Term index not found..."); } } } } finally { IOUtils.closeStream(sequenceFileReader); } return 0; }
From source file:cereal.impl.ProtobufMessageMapping.java
License:Apache License
@Override public void update(Iterable<Entry<Key, Value>> iter, InstanceOrBuilder<T> obj) { checkNotNull(iter, "Iterable was null"); checkNotNull(obj, "InstanceOrBuilder was null"); checkArgument(Type.BUILDER == obj.getType(), "Expected argument to be a builder"); final GeneratedMessage.Builder<?> builder = (GeneratedMessage.Builder<?>) obj.get(); final List<Entry<Key, Value>> leftoverFields = new LinkedList<>(); for (Entry<Key, Value> entry : iter) { String fieldName = entry.getKey().getColumnQualifier().toString(); int index = fieldName.indexOf(PERIOD); if (0 <= index) { leftoverFields.add(entry);// w w w . j a v a 2 s.co m continue; } // Find the FieldDescriptor from the Key for (FieldDescriptor fieldDesc : builder.getDescriptorForType().getFields()) { if (fieldDesc.isRepeated()) { int offset = fieldName.lastIndexOf(DOLLAR); if (offset < 0) { throw new RuntimeException( "Could not find offset of separator for repeated field count in " + fieldName); } fieldName = fieldName.substring(0, offset); } if (fieldName.equals(fieldDesc.getName())) { Value value = entry.getValue(); switch (fieldDesc.getJavaType()) { case INT: Integer intVal = Integer.parseInt(value.toString()); if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, intVal); } else { builder.setField(fieldDesc, intVal); } break; case LONG: Long longVal = Long.parseLong(value.toString()); if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, longVal); } else { builder.setField(fieldDesc, longVal); } break; case FLOAT: Float floatVal = Float.parseFloat(value.toString()); if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, floatVal); } else { builder.setField(fieldDesc, floatVal); } break; case DOUBLE: Double doubleVal = Double.parseDouble(value.toString()); if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, doubleVal); } else { builder.setField(fieldDesc, doubleVal); } break; case BOOLEAN: Boolean booleanVal = Boolean.parseBoolean(value.toString()); if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, booleanVal); } else { builder.setField(fieldDesc, booleanVal); } break; case STRING: String strVal = value.toString(); if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, strVal); } else { builder.setField(fieldDesc, strVal); } break; case BYTE_STRING: ByteString byteStrVal = ByteString.copyFrom(entry.getValue().get()); if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, byteStrVal); } else { builder.setField(fieldDesc, byteStrVal); } break; default: log.warn("Ignoring unknown serialized type {}", fieldDesc.getJavaType()); break; } break; } } } // All primitives in object should be filled out. // Make sure nested messages get filled out too. if (!leftoverFields.isEmpty()) { for (FieldDescriptor fieldDesc : builder.getDescriptorForType().getFields()) { if (JavaType.MESSAGE == fieldDesc.getJavaType()) { // For each Key-Value pair which have this prefix as the fieldname (column qualifier) final String fieldName = fieldDesc.getName(); final String singularPrefix = fieldName + PERIOD, repeatedPrefix = fieldName + DOLLAR; log.debug("Extracting Key-Value pairs for {}", fieldDesc.getName()); // Use a TreeMap to ensure the correct repetition order is preserved Map<Integer, List<Entry<Key, Value>>> fieldsForNestedMessage = new TreeMap<>(); final Text _holder = new Text(); Iterator<Entry<Key, Value>> leftoverFieldsIter = leftoverFields.iterator(); while (leftoverFieldsIter.hasNext()) { final Entry<Key, Value> entry = leftoverFieldsIter.next(); final Key key = entry.getKey(); entry.getKey().getColumnQualifier(_holder); String colqual = _holder.toString(); if (colqual.startsWith(singularPrefix)) { // Make a copy of the original Key, stripping the prefix off of the qualifier Key copy = new Key(key.getRow(), key.getColumnFamily(), new Text(colqual.substring(singularPrefix.length())), key.getColumnVisibility(), key.getTimestamp()); List<Entry<Key, Value>> kvPairs = fieldsForNestedMessage.get(-1); if (null == kvPairs) { kvPairs = new LinkedList<>(); fieldsForNestedMessage.put(-1, kvPairs); } kvPairs.add(Maps.immutableEntry(copy, entry.getValue())); // Remove it from the list as we should never have to reread this one again leftoverFieldsIter.remove(); } else if (colqual.startsWith(repeatedPrefix)) { // Make a copy of the original Key, stripping the prefix off of the qualifier int index = colqual.indexOf(PERIOD, repeatedPrefix.length()); if (0 > index) { throw new RuntimeException("Could not find period after dollar sign: " + colqual); } Integer repetition = Integer .parseInt(colqual.substring(repeatedPrefix.length(), index)); Key copy = new Key(key.getRow(), key.getColumnFamily(), new Text(colqual.substring(index + 1)), key.getColumnVisibility(), key.getTimestamp()); List<Entry<Key, Value>> kvPairs = fieldsForNestedMessage.get(repetition); if (null == kvPairs) { kvPairs = new LinkedList<>(); fieldsForNestedMessage.put(repetition, kvPairs); } kvPairs.add(Maps.immutableEntry(copy, entry.getValue())); // Remove it from the list as we should never have to reread this one again leftoverFieldsIter.remove(); } } if (!fieldsForNestedMessage.isEmpty()) { // We have keys, pass them down to the nested message String nestedMsgClzName = getClassName(fieldDesc); log.debug("Found {} Key-Value pairs for {}. Reconstituting the message.", fieldsForNestedMessage.size(), nestedMsgClzName); try { @SuppressWarnings("unchecked") // Get the class, builder and InstanceOrBuilder for the nested message Class<GeneratedMessage> msgClz = (Class<GeneratedMessage>) Class .forName(nestedMsgClzName); Method newBuilderMethod = msgClz.getMethod("newBuilder"); for (Entry<Integer, List<Entry<Key, Value>>> pairsPerRepetition : fieldsForNestedMessage .entrySet()) { Message.Builder subBuilder = (Message.Builder) newBuilderMethod.invoke(null); InstanceOrBuilder<GeneratedMessage> subIob = new InstanceOrBuilderImpl<>(subBuilder, msgClz); // Get the mapping from the registry ProtobufMessageMapping<GeneratedMessage> subMapping = (ProtobufMessageMapping<GeneratedMessage>) registry .get(subIob); // Invoke update on the mapping with the subset of Key-Values subMapping.update(pairsPerRepetition.getValue(), subIob); // Set the result on the top-level obj if (fieldDesc.isRepeated()) { builder.addRepeatedField(fieldDesc, subBuilder.build()); } else { builder.setField(fieldDesc, subBuilder.build()); } } } catch (Exception e) { throw new RuntimeException(e); } } // No fields for the sub message, therefore it's empty log.debug("Found no Key-Value pairs for {}", fieldName); } // Not a message, so we can ignore it } if (!leftoverFields.isEmpty()) { log.warn("Found {} leftover Key-Value pairs that were not consumed", leftoverFields.size()); } } }