Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterResultReducer.java

protected void reduce(Text key, Iterable<Text> values, Context context)
        throws java.io.IOException, InterruptedException {
    BigInteger partialCount = BigInteger.ZERO;
    long numPartialSolutions = 0;

    for (Text item : values) {
        String value = item.toString();

        if (NQueenIncrementalCounterResultMapper.PARTIAL_SOLUTION_ID.equals(value)
                && numPartialSolutions < Long.MAX_VALUE) {
            numPartialSolutions++;/*from  www .j  a va 2 s.c o  m*/
        } else {
            BigInteger solutionCount = new BigInteger(value);
            partialCount = partialCount.add(solutionCount);
        }
    }

    context.write(NullWritable.get(), new Text(key.toString() + "=" + partialCount.toString() + ";"));

    if (numPartialSolutions > 0) {
        context.write(NullWritable.get(), new Text(
                key.toString() + " NO FINALIZADO - EXISTEM " + numPartialSolutions + " SOLUES ABERTAS."));
    }
}

From source file:br.com.lassal.nqueens.grid.mapreduce.NQueenPartialShotMapper.java

protected void map(LongWritable key, Text value, Context context)
        throws java.io.IOException, InterruptedException {

    NQueenPartialShot record = new NQueenPartialShot(value.toString());

    //   String rowSize = context.getConfiguration().get(NQueenPartialShotMapper.NQueenRowSize_PROP);
    String predicado = record.getNewPrefix();
    // predicado = predicado!= null ? predicado + "," : (rowSize + ":");

    int numIter = record.getNumColToRun(SliceSize);

    boolean isFinalRun = record.isFinalRun(SliceSize);

    if (!isFinalRun && record.hasPrefix() && record.getPrefix().length >= (SliceSize * 2)) {
        if (this.isPositionInvalid(record.getPrefix())) {
            return; // ignora esta linha 
        }/*from w ww  .  j  ava  2s.c  o m*/
    }

    for (int a = 0; a < record.getColumnSize(); a++) {
        if (numIter > 1) {
            for (int b = 0; b < record.getColumnSize(); b++) {
                if (numIter > 2) {
                    for (int c = 0; c < record.getColumnSize(); c++) {
                        if (numIter > 3) {
                            for (int d = 0; d < record.getColumnSize(); d++) {
                                if (!(a == b || a == c || a == d || b == c || b == d || c == d || a == (b + 1)
                                        || a == (b - 1) || a == (c + 2) || a == (c - 2) || a == (d + 3)
                                        || a == (d - 3) || b == (c + 1) || b == (c - 1) || b == (d + 2)
                                        || b == (d - 2) || c == (d + 1) || c == (d - 1) || (isFinalRun
                                                && this.isPositionInvalid(record.getPrefix(), a, b, c, d)))) {
                                    nqueenShot.set(String.format("%s%s,%s,%s,%s", predicado, a, b, c, d));
                                    context.write(nqueenShot, nullValue);
                                }
                            }
                        } else {
                            if (!(a == b || a == c || b == c || a == (b + 1) || a == (b - 1) || a == (c + 2)
                                    || a == (c - 2) || b == (c + 1) || b == (c - 1)
                                    || isFinalRun && this.isPositionInvalid(record.getPrefix(), a, b, c))) {
                                nqueenShot.set(String.format("%s%s,%s,%s", predicado, a, b, c));
                                context.write(nqueenShot, nullValue);
                            }
                        }
                    }
                } else {
                    if (!(a == b || a == (b - 1) || a == (b + 1)
                            || isFinalRun && this.isPositionInvalid(record.getPrefix(), a, b))) {
                        nqueenShot.set(String.format("%s%s,%s", predicado, a, b));
                        context.write(nqueenShot, nullValue);
                    }
                }
            }
        } else {
            if (isFinalRun && this.isPositionInvalid(record.getPrefix(), a)) {
                nqueenShot.set(predicado + a);
                context.write(nqueenShot, nullValue);
            }
        }
    }

}

From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMap.java

License:Open Source License

public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
    String filename = fileSplit.getPath().getName();

    List<String> tokenizedLine = lineTokenizer.tokenize(value.toString());

    for (String auxWord : tokenizedLine) {
        output.collect(new Text(auxWord), new Text(filename));
    }//from w w  w  . jav a  2 s.c  o m
}

From source file:brickhouse.udf.sketch.Md5.java

License:Apache License

public Text evaluate(final Text s) {
    if (s == null) {
        return null;
    }//from w ww  .java  2 s.c  om
    try {
        MessageDigest md = MessageDigest.getInstance("MD5");
        md.update(s.toString().getBytes());
        byte[] md5hash = md.digest();
        StringBuilder builder = new StringBuilder();
        for (byte b : md5hash) {
            builder.append(Integer.toString((b & 0xff) + 0x100, 16).substring(1));
        }
        return new Text(builder.toString());
    } catch (NoSuchAlgorithmException nsae) {
        System.out.println("Cannot find digest algorithm");
        System.exit(1);
    }
    return null;
}

From source file:BU.MET.CS755.SpeciesGraphBuilderMapper.java

public void map(Text key, Text value, OutputCollector output, Reporter reporter) throws IOException {
    String title = null;// w  w  w.  j a  v  a2  s  . c o  m
    String inputString;
    ArrayList<String> outlinks = null;

    //Get the DOM Builder Factory
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    try {
        //Get the DOM Builder
        DocumentBuilder builder = factory.newDocumentBuilder();

        //Load and Parse the XML document
        //document contains the complete XML as a Tree.
        inputString = key.toString();
        InputSource is = new InputSource(new StringReader(key.toString()));
        Document document = builder.parse(is);

        // Look for taxonavigation marker.
        if ((inputString.indexOf("== Taxonavigation ==") == -1)
                && (inputString.indexOf("==Taxonavigation==") == -1)) {
            return;
        }

        // Get the title node
        NodeList nodeList = document.getDocumentElement().getChildNodes();
        NodeList theTitle = document.getElementsByTagName("title");

        // Parse the species name from the title node.
        for (int i = 0; i < theTitle.getLength(); i++) {
            Node theNode = theTitle.item(i);
            Node nodeVal = theNode.getFirstChild();
            title = nodeVal.getNodeValue();
            title = title.replace(":", "_");
        }

        // Get the sub-species list from <text>
        NodeList theText = document.getElementsByTagName("text");

        for (int i = 0; i < theText.getLength(); i++) {
            Node theNode = theText.item(i);
            Node nodeVal = theNode.getFirstChild();

            if (nodeVal != null) {
                outlinks = GetOutlinks(nodeVal.getNodeValue());
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    if (title != null && title.length() > 0) {
        reporter.setStatus(title);
    } else {
        return;
    }

    StringBuilder builder = new StringBuilder();

    if (outlinks != null) {
        for (String link : outlinks) {
            link = link.replace(" ", "_");
            link = link.replace("\n", "");
            builder.append(" ");
            builder.append(link);
        }
    }

    // remove any newlines
    if (builder.toString().contains("\n")) {
        builder.toString().replace("\n", "");
    }

    output.collect(new Text(title), new Text(builder.toString()));
}

From source file:BU.MET.CS755.SpeciesGraphBuilderReducer.java

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    reporter.setStatus(key.toString());
    String toWrite = "";
    int count = 0;

    while (values.hasNext()) {
        String page = ((Text) values.next()).toString();
        page.replaceAll(" ", "_");
        toWrite += " " + page;
        count += 1;//  w ww.  j  av  a 2  s . c  om
    }

    IntWritable i = new IntWritable(count);
    String num = (i).toString();
    toWrite = num + ":" + toWrite;

    output.collect(key, new Text(toWrite));
}

From source file:cascading.tap.hadoop.ZipInputFormatTest.java

License:Open Source License

public void testSplits() throws Exception {
    JobConf job = new JobConf();
    FileSystem currentFs = FileSystem.get(job);

    Path file = new Path(workDir, "test.zip");

    Reporter reporter = Reporter.NULL;/*from w  ww .j  av  a 2 s.c  om*/

    int seed = new Random().nextInt();
    LOG.info("seed = " + seed);
    Random random = new Random(seed);
    FileInputFormat.setInputPaths(job, file);

    for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream);
        long length = 0;

        LOG.debug("creating; zip file with entries = " + entries);

        // for each entry in the zip file
        for (int entryCounter = 0; entryCounter < entries; entryCounter++) {
            // construct zip entries splitting MAX_LENGTH between entries
            long entryLength = MAX_LENGTH / entries;
            ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt");
            zipEntry.setMethod(ZipEntry.DEFLATED);
            zos.putNextEntry(zipEntry);

            for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) {
                zos.write(Long.toString(length).getBytes());
                zos.write("\n".getBytes());
            }

            zos.flush();
            zos.closeEntry();
        }

        zos.flush();
        zos.close();

        currentFs.delete(file, true);

        OutputStream outputStream = currentFs.create(file);

        byteArrayOutputStream.writeTo(outputStream);
        outputStream.close();

        ZipInputFormat format = new ZipInputFormat();
        format.configure(job);
        LongWritable key = new LongWritable();
        Text value = new Text();
        InputSplit[] splits = format.getSplits(job, 100);

        BitSet bits = new BitSet((int) length);
        for (int j = 0; j < splits.length; j++) {
            LOG.debug("split[" + j + "]= " + splits[j]);
            RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter);

            try {
                int count = 0;

                while (reader.next(key, value)) {
                    int v = Integer.parseInt(value.toString());
                    LOG.debug("read " + v);

                    if (bits.get(v))
                        LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos());

                    assertFalse("key in multiple partitions.", bits.get(v));
                    bits.set(v);
                    count++;
                }

                LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count);
            } finally {
                reader.close();
            }
        }

        assertEquals("some keys in no partition.", length, bits.cardinality());
    }
}

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

public static Map<Integer, String> importParameter(SequenceFile.Reader sequenceFileReader) throws IOException {
    Map<Integer, String> hashMap = new HashMap<Integer, String>();

    IntWritable intWritable = new IntWritable();
    Text text = new Text();
    while (sequenceFileReader.next(intWritable, text)) {
        hashMap.put(intWritable.get(), text.toString());
    }//from  ww  w  .j a  va 2 s  . com

    return hashMap;
}

From source file:cc.slda.DisplayTopic.java

License:Apache License

@SuppressWarnings("unchecked")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(Settings.HELP_OPTION, false, "print the help message");
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("input beta file").create(Settings.INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("term index file").create(ParseCorpus.INDEX));
    options.addOption(OptionBuilder.withArgName(Settings.INTEGER_INDICATOR).hasArg()
            .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION));

    String betaString = null;/*from   w  w  w  .j av  a2s.  c  o m*/
    String indexString = null;
    int topDisplay = TOP_DISPLAY;

    CommandLineParser parser = new GnuParser();
    HelpFormatter formatter = new HelpFormatter();
    try {
        CommandLine line = parser.parse(options, args);

        if (line.hasOption(Settings.HELP_OPTION)) {
            formatter.printHelp(ParseCorpus.class.getName(), options);
            System.exit(0);
        }

        if (line.hasOption(Settings.INPUT_OPTION)) {
            betaString = line.getOptionValue(Settings.INPUT_OPTION);
        } else {
            throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized...");
        }

        if (line.hasOption(ParseCorpus.INDEX)) {
            indexString = line.getOptionValue(ParseCorpus.INDEX);
        } else {
            throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized...");
        }

        if (line.hasOption(TOP_DISPLAY_OPTION)) {
            topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION));
        }
    } catch (ParseException pe) {
        System.err.println(pe.getMessage());
        formatter.printHelp(ParseCorpus.class.getName(), options);
        System.exit(0);
    } catch (NumberFormatException nfe) {
        System.err.println(nfe.getMessage());
        System.exit(0);
    }

    JobConf conf = new JobConf(DisplayTopic.class);
    FileSystem fs = FileSystem.get(conf);

    Path indexPath = new Path(indexString);
    Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path...");

    Path betaPath = new Path(betaString);
    Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path...");

    SequenceFile.Reader sequenceFileReader = null;
    try {
        IntWritable intWritable = new IntWritable();
        Text text = new Text();
        Map<Integer, String> termIndex = new HashMap<Integer, String>();
        sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf);
        while (sequenceFileReader.next(intWritable, text)) {
            termIndex.put(intWritable.get(), text.toString());
        }

        PairOfIntFloat pairOfIntFloat = new PairOfIntFloat();
        // HMapIFW hmap = new HMapIFW();
        HMapIDW hmap = new HMapIDW();
        TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>();
        sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf);
        while (sequenceFileReader.next(pairOfIntFloat, hmap)) {
            treeMap.clear();

            System.out.println("==============================");
            System.out.println(
                    "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement());
            System.out.println("==============================");

            Iterator<Integer> itr1 = hmap.keySet().iterator();
            int temp1 = 0;
            while (itr1.hasNext()) {
                temp1 = itr1.next();
                treeMap.put(-hmap.get(temp1), temp1);
                if (treeMap.size() > topDisplay) {
                    treeMap.remove(treeMap.lastKey());
                }
            }

            Iterator<Double> itr2 = treeMap.keySet().iterator();
            double temp2 = 0;
            while (itr2.hasNext()) {
                temp2 = itr2.next();
                if (termIndex.containsKey(treeMap.get(temp2))) {
                    System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2);
                } else {
                    System.out.println("How embarrassing! Term index not found...");
                }
            }
        }
    } finally {
        IOUtils.closeStream(sequenceFileReader);
    }

    return 0;
}

From source file:cereal.impl.ProtobufMessageMapping.java

License:Apache License

@Override
public void update(Iterable<Entry<Key, Value>> iter, InstanceOrBuilder<T> obj) {
    checkNotNull(iter, "Iterable was null");
    checkNotNull(obj, "InstanceOrBuilder was null");
    checkArgument(Type.BUILDER == obj.getType(), "Expected argument to be a builder");

    final GeneratedMessage.Builder<?> builder = (GeneratedMessage.Builder<?>) obj.get();
    final List<Entry<Key, Value>> leftoverFields = new LinkedList<>();

    for (Entry<Key, Value> entry : iter) {
        String fieldName = entry.getKey().getColumnQualifier().toString();

        int index = fieldName.indexOf(PERIOD);
        if (0 <= index) {
            leftoverFields.add(entry);//  w w w  .  j  a  v  a 2  s.co  m
            continue;
        }

        // Find the FieldDescriptor from the Key
        for (FieldDescriptor fieldDesc : builder.getDescriptorForType().getFields()) {
            if (fieldDesc.isRepeated()) {
                int offset = fieldName.lastIndexOf(DOLLAR);
                if (offset < 0) {
                    throw new RuntimeException(
                            "Could not find offset of separator for repeated field count in " + fieldName);
                }
                fieldName = fieldName.substring(0, offset);
            }
            if (fieldName.equals(fieldDesc.getName())) {
                Value value = entry.getValue();
                switch (fieldDesc.getJavaType()) {
                case INT:
                    Integer intVal = Integer.parseInt(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, intVal);
                    } else {
                        builder.setField(fieldDesc, intVal);
                    }
                    break;
                case LONG:
                    Long longVal = Long.parseLong(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, longVal);
                    } else {
                        builder.setField(fieldDesc, longVal);
                    }
                    break;
                case FLOAT:
                    Float floatVal = Float.parseFloat(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, floatVal);
                    } else {
                        builder.setField(fieldDesc, floatVal);
                    }
                    break;
                case DOUBLE:
                    Double doubleVal = Double.parseDouble(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, doubleVal);
                    } else {
                        builder.setField(fieldDesc, doubleVal);
                    }
                    break;
                case BOOLEAN:
                    Boolean booleanVal = Boolean.parseBoolean(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, booleanVal);
                    } else {
                        builder.setField(fieldDesc, booleanVal);
                    }
                    break;
                case STRING:
                    String strVal = value.toString();
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, strVal);
                    } else {
                        builder.setField(fieldDesc, strVal);
                    }
                    break;
                case BYTE_STRING:
                    ByteString byteStrVal = ByteString.copyFrom(entry.getValue().get());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, byteStrVal);
                    } else {
                        builder.setField(fieldDesc, byteStrVal);
                    }
                    break;
                default:
                    log.warn("Ignoring unknown serialized type {}", fieldDesc.getJavaType());
                    break;
                }
                break;
            }
        }
    }

    // All primitives in object should be filled out.
    // Make sure nested messages get filled out too.

    if (!leftoverFields.isEmpty()) {
        for (FieldDescriptor fieldDesc : builder.getDescriptorForType().getFields()) {
            if (JavaType.MESSAGE == fieldDesc.getJavaType()) {
                // For each Key-Value pair which have this prefix as the fieldname (column qualifier)
                final String fieldName = fieldDesc.getName();
                final String singularPrefix = fieldName + PERIOD, repeatedPrefix = fieldName + DOLLAR;

                log.debug("Extracting Key-Value pairs for {}", fieldDesc.getName());

                // Use a TreeMap to ensure the correct repetition order is preserved
                Map<Integer, List<Entry<Key, Value>>> fieldsForNestedMessage = new TreeMap<>();

                final Text _holder = new Text();
                Iterator<Entry<Key, Value>> leftoverFieldsIter = leftoverFields.iterator();
                while (leftoverFieldsIter.hasNext()) {
                    final Entry<Key, Value> entry = leftoverFieldsIter.next();
                    final Key key = entry.getKey();
                    entry.getKey().getColumnQualifier(_holder);

                    String colqual = _holder.toString();
                    if (colqual.startsWith(singularPrefix)) {
                        // Make a copy of the original Key, stripping the prefix off of the qualifier
                        Key copy = new Key(key.getRow(), key.getColumnFamily(),
                                new Text(colqual.substring(singularPrefix.length())), key.getColumnVisibility(),
                                key.getTimestamp());

                        List<Entry<Key, Value>> kvPairs = fieldsForNestedMessage.get(-1);
                        if (null == kvPairs) {
                            kvPairs = new LinkedList<>();
                            fieldsForNestedMessage.put(-1, kvPairs);
                        }
                        kvPairs.add(Maps.immutableEntry(copy, entry.getValue()));

                        // Remove it from the list as we should never have to reread this one again
                        leftoverFieldsIter.remove();
                    } else if (colqual.startsWith(repeatedPrefix)) {
                        // Make a copy of the original Key, stripping the prefix off of the qualifier
                        int index = colqual.indexOf(PERIOD, repeatedPrefix.length());
                        if (0 > index) {
                            throw new RuntimeException("Could not find period after dollar sign: " + colqual);
                        }

                        Integer repetition = Integer
                                .parseInt(colqual.substring(repeatedPrefix.length(), index));

                        Key copy = new Key(key.getRow(), key.getColumnFamily(),
                                new Text(colqual.substring(index + 1)), key.getColumnVisibility(),
                                key.getTimestamp());

                        List<Entry<Key, Value>> kvPairs = fieldsForNestedMessage.get(repetition);
                        if (null == kvPairs) {
                            kvPairs = new LinkedList<>();
                            fieldsForNestedMessage.put(repetition, kvPairs);
                        }
                        kvPairs.add(Maps.immutableEntry(copy, entry.getValue()));

                        // Remove it from the list as we should never have to reread this one again
                        leftoverFieldsIter.remove();
                    }
                }

                if (!fieldsForNestedMessage.isEmpty()) {
                    // We have keys, pass them down to the nested message
                    String nestedMsgClzName = getClassName(fieldDesc);

                    log.debug("Found {} Key-Value pairs for {}. Reconstituting the message.",
                            fieldsForNestedMessage.size(), nestedMsgClzName);

                    try {
                        @SuppressWarnings("unchecked")
                        // Get the class, builder and InstanceOrBuilder for the nested message
                        Class<GeneratedMessage> msgClz = (Class<GeneratedMessage>) Class
                                .forName(nestedMsgClzName);
                        Method newBuilderMethod = msgClz.getMethod("newBuilder");

                        for (Entry<Integer, List<Entry<Key, Value>>> pairsPerRepetition : fieldsForNestedMessage
                                .entrySet()) {
                            Message.Builder subBuilder = (Message.Builder) newBuilderMethod.invoke(null);
                            InstanceOrBuilder<GeneratedMessage> subIob = new InstanceOrBuilderImpl<>(subBuilder,
                                    msgClz);

                            // Get the mapping from the registry
                            ProtobufMessageMapping<GeneratedMessage> subMapping = (ProtobufMessageMapping<GeneratedMessage>) registry
                                    .get(subIob);

                            // Invoke update on the mapping with the subset of Key-Values
                            subMapping.update(pairsPerRepetition.getValue(), subIob);

                            // Set the result on the top-level obj
                            if (fieldDesc.isRepeated()) {
                                builder.addRepeatedField(fieldDesc, subBuilder.build());
                            } else {
                                builder.setField(fieldDesc, subBuilder.build());
                            }
                        }
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
                // No fields for the sub message, therefore it's empty
                log.debug("Found no Key-Value pairs for {}", fieldName);
            }
            // Not a message, so we can ignore it
        }

        if (!leftoverFields.isEmpty()) {
            log.warn("Found {} leftover Key-Value pairs that were not consumed", leftoverFields.size());
        }
    }
}