Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other)

Source Link

Document

copy a text.

Usage

From source file:com.ibm.spss.hive.serde2.xml.objectinspector.ObjectInspectorTest.java

License:Open Source License

@SuppressWarnings("rawtypes")
public void testSimpleXmlNotMap() throws SerDeException {
    XmlSerDe xmlSerDe = new XmlSerDe();
    Configuration configuration = new Configuration();
    Properties properties = new Properties();
    properties.put(LIST_COLUMNS, "test");
    properties.put(LIST_COLUMN_TYPES, "map<string,string>");
    properties.setProperty("column.xpath.test", "//*[contains(name(),'test')]/text()");
    xmlSerDe.initialize(configuration, properties);
    Text text = new Text();
    text.set("<root><test1>string1</test1><test2>string2</test2></root>");
    Object o = xmlSerDe.deserialize(text);
    XmlStructObjectInspector structInspector = ((XmlStructObjectInspector) xmlSerDe.getObjectInspector());
    StructField structField = structInspector.getStructFieldRef("test");
    Object data = structInspector.getStructFieldData(o, structField);
    XmlMapObjectInspector fieldInspector = (XmlMapObjectInspector) structField.getFieldObjectInspector();
    Map map = fieldInspector.getMap(data);
    assertEquals(0, map.size());/*from w w w. ja  va2 s.com*/
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.ObjectNodeWritableComparable.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    final Text text = new Text();
    text.set(_object_node.toString());

    text.write(out);//from   w w  w.j  a  v a 2 s . c  o  m
}

From source file:com.inmobi.conduit.distcp.tools.CopyListing.java

License:Apache License

/**
 * Validate the final resulting path listing to see if there are any duplicate entries
 *
 * @param pathToListFile - path listing build by doBuildListing
 * @throws IOException - Any issues while checking for duplicates and throws
 * @throws DuplicateFileException - if there are duplicates
 *//*from   ww  w.j ava 2 s  .c o m*/
protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException {

    Configuration config = getConf();
    FileSystem fs = pathToListFile.getFileSystem(config);

    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config);
    try {
        Text lastKey = new Text("*"); //source relative path can never hold *
        FileStatus lastFileStatus = new FileStatus();

        Text currentKey = new Text();
        while (reader.next(currentKey)) {
            if (currentKey.equals(lastKey)) {
                FileStatus currentFileStatus = new FileStatus();
                reader.getCurrentValue(currentFileStatus);
                throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and "
                        + currentFileStatus.getPath() + " would cause duplicates. Aborting");
            }
            reader.getCurrentValue(lastFileStatus);
            lastKey.set(currentKey);
        }
    } finally {
        IOUtils.closeStream(reader);
    }
}

From source file:com.jeffy.fbds.SequenceFileWriter.java

License:Apache License

public static void main(String[] args) throws IOException {
    // ?//  w w w .j  av  a  2  s .  co m
    String uri = args[0];
    Configuration conf = new Configuration();
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    try (SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(path),
            Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()))) {
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    }
}

From source file:com.jfolson.hive.serde.RBaseSerDe.java

License:Apache License

protected void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();// ww w.j a v  a  2s.c  o  m
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        // Don't use array (typecode: 144) until everything supports NA values in typedbytes
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesSerDe.java

License:Apache License

private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from   ww  w  . j a  v a 2  s . co m*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java

License:Apache License

public Text readText(Text t) throws IOException {
    if (t == null) {
        t = new Text();
    }/*from  w  w w.j  ava  2 s  .  c o m*/
    t.set(in.readString());
    return t;
}

From source file:com.lucidworks.hadoop.utils.ZipFileRecordReader.java

License:Apache License

/**
 * Each ZipEntry is decompressed and readied for the Mapper. If the
 * ZipFileInputFormat has been set to Lenient (not the default), certain
 * exceptions will be gracefully ignored to prevent a larger job from
 * failing.//  w ww.jav  a2 s.  co  m
 */

@Override
public boolean next(Text key, BytesWritable value) throws IOException {
    {
        ZipEntry entry = null;
        try {
            entry = zip.getNextEntry();
        } catch (Throwable e) {
            if (!ZipFileInputFormat.getLenient()) {
                throw new RuntimeException(e);
            }
        }

        // Sanity check
        if (entry == null) {
            processed = true;
            return false;
        }

        // Filename
        key.set(new Text(entry.getName()));

        byte[] bufferOut = null;
        int cummulativeBytesRead = 0;
        while (true) {
            int bytesRead = 0;
            byte[] bufferIn = new byte[8192];
            try {
                bytesRead = zip.read(bufferIn, 0, bufferIn.length);
            } catch (Throwable e) {
                if (!ZipFileInputFormat.getLenient()) {
                    throw new RuntimeException(e);
                }
                return false;
            }
            if (bytesRead > 0) {
                byte[] tmp = head(bufferIn, bytesRead);
                if (cummulativeBytesRead == 0) {
                    bufferOut = tmp;
                } else {
                    bufferOut = add(bufferOut, tmp);
                }
                cummulativeBytesRead += bytesRead;
            } else {
                break;
            }
        }
        try {
            zip.closeEntry();
        } catch (IOException e) {
            if (!ZipFileInputFormat.getLenient()) {
                throw new RuntimeException(e);
            }
        }
        // Uncompressed contents
        if (bufferOut != null) {
            value.setCapacity(bufferOut.length);
            value.set(bufferOut, 0, bufferOut.length);
        } else {
            log.warn("bufferOut is null for " + key);//should we return false here?  I don't think so, since I think that would mean we can't process any more records
        }
        return true;
    }
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*/*from w  w w. j a v a  2 s. c  o  m*/
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                if (!crushOut.equals(prevCrushOut)) {
                    swap(crushInput, prevCrushOut.toString());

                    prevCrushOut.set(crushOut);
                    crushInput = new LinkedList<Path>();
                }

                crushInput.add(new Path(srcFile.toString()));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }

        swap(crushInput, prevCrushOut.toString());
    }

    /*
     * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in
     * the input dir, the difference being there are fewer files in the output dir.
     */
    if (removableFiles.size() > 0) {
        String srcDirName = fs.makeQualified(srcDir).toUri().getPath();
        String destName = fs.makeQualified(dest).toUri().getPath();
        print(Verbosity.INFO, "\n\nMoving removed files to " + destName);
        for (String name : removableFiles) {
            Path srcPath = new Path(name);
            Path destPath = new Path(destName + name).getParent();

            print(Verbosity.INFO, "\n  Moving " + srcPath + " to " + destPath);
            rename(srcPath, destPath, null);
        }
    }
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\nUsing temporary directory " + tmpDir.toUri().getPath() + "\n");

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();
    removableFiles = new HashSet<String>();

    /*/*from w w  w.  j a va2s. co m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Bucketer partitionBucketer = new Bucketer(maxTasks, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();
    int fileCount = 0;

    //Path bucketFile = new Path(tmpIn, "dirs_" + fileCount++);
    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                String dirPath = dir.toUri().getPath();
                print(Verbosity.INFO, "\n\n[" + dirPath + "]");

                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFilesMatcher == null)
                            return true;
                        ignoredFilesMatcher.reset(testPath.toUri().getPath());
                        boolean ignores = ignoredFilesMatcher.matches();
                        if (ignores)
                            LOG.info("Ignoring file " + testPath);
                        return !ignores;
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, "\n  Directory is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            String filePath = path.toUri().getPath();
                            boolean skipFile = false;
                            if (skippedFilesMatcher != null) {
                                skippedFilesMatcher.reset(filePath);
                                if (skippedFilesMatcher.matches()) {
                                    skipFile = true;
                                }
                            }

                            boolean changed = uncrushedFiles.add(filePath);
                            assert changed : path.toUri().getPath();
                            long fileLength = content.getLen();

                            if (!skipFile && fileLength <= maxEligibleSize) {
                                if (removeEmptyFiles && fileLength == 0)
                                    removableFiles.add(filePath);
                                else {
                                    crushables.add(content);
                                    crushableBytes += fileLength;
                                }
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, "\n  Directory has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;
                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);
                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();
                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                            print(Verbosity.INFO, "\n  Directory skipped");
                        } else {
                            nBuckets += crushFiles.size();
                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);
                            print(Verbosity.INFO, "\n  Generating " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> filesInBucket = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), filesInBucket.size()));

                                key.set(bucketId);

                                for (String f : filesInBucket) {
                                    boolean changed = uncrushedFiles.remove(f);
                                    assert changed : f;

                                    pathMatcher.reset(f);
                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    /*
                                     * Write one row per file to maximize the number of mappers
                                     */
                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, filesInBucket.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!removableFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Marked " + removableFiles.size() + " files for removal");

                        for (String removable : removableFiles) {
                            uncrushedFiles.remove(removable);
                            print(Verbosity.VERBOSE, "\n    " + removable);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_REMOVED, removableFiles.size());
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        writer.close();
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();
    assert partitions.size() <= maxTasks;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);
    IntWritable partNum = new IntWritable();
    int totalReducers = 0;
    for (Bucket partition : partitions) {
        String partitionName = partition.name();

        int p = Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1));
        partNum.set(p);

        if (partition.contents().size() > 0)
            totalReducers++;

        for (String bucketId : partition.contents()) {
            key.set(bucketId);
            writer.append(key, partNum);
        }
    }
    writer.close();

    print(Verbosity.INFO, "\n\nNumber of allocated reducers = " + totalReducers);
    job.setInt("mapreduce.job.reduces", totalReducers);

    DataOutputStream countersStream = fs.create(this.counters);
    jobCounters.write(countersStream);
    countersStream.close();
}