Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:com.ibm.spss.hive.serde2.xml.objectinspector.ObjectInspectorTest.java

License:Open Source License

@SuppressWarnings("rawtypes")
public void testSimpleXmlNotMap() throws SerDeException {
    XmlSerDe xmlSerDe = new XmlSerDe();
    Configuration configuration = new Configuration();
    Properties properties = new Properties();
    properties.put(LIST_COLUMNS, "test");
    properties.put(LIST_COLUMN_TYPES, "map<string,string>");
    properties.setProperty("column.xpath.test", "//*[contains(name(),'test')]/text()");
    xmlSerDe.initialize(configuration, properties);
    Text text = new Text();
    text.set("<root><test1>string1</test1><test2>string2</test2></root>");
    Object o = xmlSerDe.deserialize(text);
    XmlStructObjectInspector structInspector = ((XmlStructObjectInspector) xmlSerDe.getObjectInspector());
    StructField structField = structInspector.getStructFieldRef("test");
    Object data = structInspector.getStructFieldData(o, structField);
    XmlMapObjectInspector fieldInspector = (XmlMapObjectInspector) structField.getFieldObjectInspector();
    Map map = fieldInspector.getMap(data);
    assertEquals(0, map.size());/*from w w w. ja  va2 s.com*/
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.ObjectNodeWritableComparable.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    final Text text = new Text();
    text.set(_object_node.toString());

    text.write(out);//from   w w  w.j  a  v a 2 s . c  o  m
}

From source file:com.inmobi.conduit.distcp.tools.CopyListing.java

License:Apache License

/**
 * Validate the final resulting path listing to see if there are any duplicate entries
 *
 * @param pathToListFile - path listing build by doBuildListing
 * @throws IOException - Any issues while checking for duplicates and throws
 * @throws DuplicateFileException - if there are duplicates
 *//*from   ww  w.j ava 2 s  .c o m*/
protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException {

    Configuration config = getConf();
    FileSystem fs = pathToListFile.getFileSystem(config);

    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config);
    try {
        Text lastKey = new Text("*"); //source relative path can never hold *
        FileStatus lastFileStatus = new FileStatus();

        Text currentKey = new Text();
        while (reader.next(currentKey)) {
            if (currentKey.equals(lastKey)) {
                FileStatus currentFileStatus = new FileStatus();
                reader.getCurrentValue(currentFileStatus);
                throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and "
                        + currentFileStatus.getPath() + " would cause duplicates. Aborting");
            }
            reader.getCurrentValue(lastFileStatus);
            lastKey.set(currentKey);
        }
    } finally {
        IOUtils.closeStream(reader);
    }
}

From source file:com.jeffy.fbds.SequenceFileWriter.java

License:Apache License

public static void main(String[] args) throws IOException {
    // ?//  w w w .j  av  a  2  s .  co m
    String uri = args[0];
    Configuration conf = new Configuration();
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    try (SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(path),
            Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()))) {
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    }
}

From source file:com.jfolson.hive.serde.RBaseSerDe.java

License:Apache License

protected void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();// ww w.j a v  a  2s.c  o  m
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        // Don't use array (typecode: 144) until everything supports NA values in typedbytes
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesSerDe.java

License:Apache License

private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from   ww  w  . j a  v a 2  s . co m*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java

License:Apache License

public Text readText(Text t) throws IOException {
    if (t == null) {
        t = new Text();
    }/*from  w  w w.j  ava  2 s  .  c o m*/
    t.set(in.readString());
    return t;
}

From source file:com.lucidworks.hadoop.utils.ZipFileRecordReader.java

License:Apache License

/**
 * Each ZipEntry is decompressed and readied for the Mapper. If the
 * ZipFileInputFormat has been set to Lenient (not the default), certain
 * exceptions will be gracefully ignored to prevent a larger job from
 * failing.//  w ww.jav  a2 s.  co  m
 */

@Override
public boolean next(Text key, BytesWritable value) throws IOException {
    {
        ZipEntry entry = null;
        try {
            entry = zip.getNextEntry();
        } catch (Throwable e) {
            if (!ZipFileInputFormat.getLenient()) {
                throw new RuntimeException(e);
            }
        }

        // Sanity check
        if (entry == null) {
            processed = true;
            return false;
        }

        // Filename
        key.set(new Text(entry.getName()));

        byte[] bufferOut = null;
        int cummulativeBytesRead = 0;
        while (true) {
            int bytesRead = 0;
            byte[] bufferIn = new byte[8192];
            try {
                bytesRead = zip.read(bufferIn, 0, bufferIn.length);
            } catch (Throwable e) {
                if (!ZipFileInputFormat.getLenient()) {
                    throw new RuntimeException(e);
                }
                return false;
            }
            if (bytesRead > 0) {
                byte[] tmp = head(bufferIn, bytesRead);
                if (cummulativeBytesRead == 0) {
                    bufferOut = tmp;
                } else {
                    bufferOut = add(bufferOut, tmp);
                }
                cummulativeBytesRead += bytesRead;
            } else {
                break;
            }
        }
        try {
            zip.closeEntry();
        } catch (IOException e) {
            if (!ZipFileInputFormat.getLenient()) {
                throw new RuntimeException(e);
            }
        }
        // Uncompressed contents
        if (bufferOut != null) {
            value.setCapacity(bufferOut.length);
            value.set(bufferOut, 0, bufferOut.length);
        } else {
            log.warn("bufferOut is null for " + key);//should we return false here?  I don't think so, since I think that would mean we can't process any more records
        }
        return true;
    }
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*/*from w  w w. j a v a  2 s. c  o  m*/
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                if (!crushOut.equals(prevCrushOut)) {
                    swap(crushInput, prevCrushOut.toString());

                    prevCrushOut.set(crushOut);
                    crushInput = new LinkedList<Path>();
                }

                crushInput.add(new Path(srcFile.toString()));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }

        swap(crushInput, prevCrushOut.toString());
    }

    /*
     * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in
     * the input dir, the difference being there are fewer files in the output dir.
     */
    if (removableFiles.size() > 0) {
        String srcDirName = fs.makeQualified(srcDir).toUri().getPath();
        String destName = fs.makeQualified(dest).toUri().getPath();
        print(Verbosity.INFO, "\n\nMoving removed files to " + destName);
        for (String name : removableFiles) {
            Path srcPath = new Path(name);
            Path destPath = new Path(destName + name).getParent();

            print(Verbosity.INFO, "\n  Moving " + srcPath + " to " + destPath);
            rename(srcPath, destPath, null);
        }
    }
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\nUsing temporary directory " + tmpDir.toUri().getPath() + "\n");

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();
    removableFiles = new HashSet<String>();

    /*/*from w w  w.  j a va2s. co m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Bucketer partitionBucketer = new Bucketer(maxTasks, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();
    int fileCount = 0;

    //Path bucketFile = new Path(tmpIn, "dirs_" + fileCount++);
    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                String dirPath = dir.toUri().getPath();
                print(Verbosity.INFO, "\n\n[" + dirPath + "]");

                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFilesMatcher == null)
                            return true;
                        ignoredFilesMatcher.reset(testPath.toUri().getPath());
                        boolean ignores = ignoredFilesMatcher.matches();
                        if (ignores)
                            LOG.info("Ignoring file " + testPath);
                        return !ignores;
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, "\n  Directory is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            String filePath = path.toUri().getPath();
                            boolean skipFile = false;
                            if (skippedFilesMatcher != null) {
                                skippedFilesMatcher.reset(filePath);
                                if (skippedFilesMatcher.matches()) {
                                    skipFile = true;
                                }
                            }

                            boolean changed = uncrushedFiles.add(filePath);
                            assert changed : path.toUri().getPath();
                            long fileLength = content.getLen();

                            if (!skipFile && fileLength <= maxEligibleSize) {
                                if (removeEmptyFiles && fileLength == 0)
                                    removableFiles.add(filePath);
                                else {
                                    crushables.add(content);
                                    crushableBytes += fileLength;
                                }
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, "\n  Directory has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;
                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);
                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();
                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                            print(Verbosity.INFO, "\n  Directory skipped");
                        } else {
                            nBuckets += crushFiles.size();
                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);
                            print(Verbosity.INFO, "\n  Generating " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> filesInBucket = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), filesInBucket.size()));

                                key.set(bucketId);

                                for (String f : filesInBucket) {
                                    boolean changed = uncrushedFiles.remove(f);
                                    assert changed : f;

                                    pathMatcher.reset(f);
                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    /*
                                     * Write one row per file to maximize the number of mappers
                                     */
                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, filesInBucket.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!removableFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Marked " + removableFiles.size() + " files for removal");

                        for (String removable : removableFiles) {
                            uncrushedFiles.remove(removable);
                            print(Verbosity.VERBOSE, "\n    " + removable);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_REMOVED, removableFiles.size());
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        writer.close();
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();
    assert partitions.size() <= maxTasks;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);
    IntWritable partNum = new IntWritable();
    int totalReducers = 0;
    for (Bucket partition : partitions) {
        String partitionName = partition.name();

        int p = Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1));
        partNum.set(p);

        if (partition.contents().size() > 0)
            totalReducers++;

        for (String bucketId : partition.contents()) {
            key.set(bucketId);
            writer.append(key, partNum);
        }
    }
    writer.close();

    print(Verbosity.INFO, "\n\nNumber of allocated reducers = " + totalReducers);
    job.setInt("mapreduce.job.reduces", totalReducers);

    DataOutputStream countersStream = fs.create(this.counters);
    jobCounters.write(countersStream);
    countersStream.close();
}