Example usage for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len)

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java

License:Open Source License

/**
 * Compute the Deluanay triangulation in the local machine
 * @param inPaths//from w ww. j  a v  a  2s . com
 * @param outPath
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            try {
                int numPoints = 0;
                for (int i = i1; i < i2; i++) {
                    List<Point> points = new ArrayList<Point>();
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                }
                return numPoints;
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    if (params.getBoolean("dedup", true)) {
        float threshold = params.getFloat("threshold", 1E-5f);
        allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold);
    }

    LOG.info("Computing DT for " + allPoints.length + " points");
    GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null);
    LOG.info("DT computed");

    Rectangle mbr = FileMBR.fileMBR(inPaths, params);
    double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10;
    Rectangle bigMBR = mbr.buffer(buffer, buffer);
    if (outPath != null && params.getBoolean("output", true)) {
        LOG.info("Writing the output as a soup of triangles");
        Triangulation answer = dtAlgorithm.getFinalTriangulation();
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream out = new PrintStream(outFS.create(outPath));

        Text text = new Text2();
        byte[] tab = "\t".getBytes();
        for (Point[] triangle : answer.iterateTriangles()) {
            text.clear();
            triangle[0].toText(text);
            text.append(tab, 0, tab.length);
            triangle[1].toText(text);
            text.append(tab, 0, tab.length);
            triangle[2].toText(text);
            out.println(text);
        }
        out.close();
    }

    //    dtAlgorithm.getFinalTriangulation().draw();
    //Triangulation finalPart = new Triangulation();
    //Triangulation nonfinalPart = new Triangulation();
    //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart);
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static void serializeGeometry(Text text, OGCGeometry geom, char toAppend) {
    String str = bytesToHex(geom.asBinary().array());
    byte[] str_b = str.getBytes();
    text.append(str_b, 0, str_b.length);
    if (toAppend != '\0')
        text.append(new byte[] { (byte) toAppend }, 0, 1);
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static void serializeGeometry(Text text, Geometry geom, char toAppend) {
    String wkt = geom == null ? "" : geom.toText();
    byte[] wkt_b = wkt.getBytes();
    text.append(wkt_b, 0, wkt_b.length);
    if (toAppend != '\0')
        text.append(new byte[] { (byte) toAppend }, 0, 1);
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java

License:Open Source License

/**
 * Reads the next line from input and return true if a line was read.
 * If no more lines are available in this split, a false is returned.
 * @param value/*from   w  w w  . jav a 2s.c  om*/
 * @return
 * @throws IOException
 */
protected boolean nextLine(Text value) throws IOException {
    if (blockType == BlockType.RTREE && pos == 8) {
        // File is positioned at the RTree header
        // Skip the header and go to first data object in file
        pos += RTree.skipHeader(in);
        LOG.info("Skipped R-tree to position: " + pos);
        // Reinitialize record reader at the new position
        lineReader = new LineReader(in);
    }
    while (getFilePosition() <= end) {
        value.clear();
        int b = 0;
        if (buffer != null) {
            // Read the first line encountered in buffer
            int eol = RTree.skipToEOL(buffer, 0);
            b += eol;
            value.append(buffer, 0, eol);
            if (eol < buffer.length) {
                // There are still some bytes remaining in buffer
                byte[] tmp = new byte[buffer.length - eol];
                System.arraycopy(buffer, eol, tmp, 0, tmp.length);
                buffer = tmp;
            } else {
                buffer = null;
            }
            // Check if a complete line has been read from the buffer
            byte last_byte = value.getBytes()[value.getLength() - 1];
            if (last_byte == '\n' || last_byte == '\r')
                return true;
        }

        // Read the first line from stream
        Text temp = new Text();
        b += lineReader.readLine(temp);
        if (b == 0) {
            // Indicates an end of stream
            return false;
        }
        pos += b;

        // Append the part read from stream to the part extracted from buffer
        value.append(temp.getBytes(), 0, temp.getLength());

        if (value.getLength() > 1) {
            // Read a non-empty line. Note that end-of-line character is included
            return true;
        }
    }
    // Reached end of file
    return false;
}

From source file:edu.umn.cs.spatialHadoop.nasa.NASAPoint.java

License:Open Source License

@Override
public Text toText(Text text) {
    super.toText(text);
    text.append(Separator, 0, Separator.length);
    TextSerializerHelper.serializeInt(value, text, ',');

    TextSerializerHelper.serializeLong(timestamp, text, '\0');

    return text;//from   ww w .  jav  a2  s.c o  m
}

From source file:edu.umn.cs.spatialHadoop.nasa.NASARectangle.java

License:Open Source License

@Override
public Text toText(Text text) {
    super.toText(text);
    text.append(Separator, 0, Separator.length);
    TextSerializerHelper.serializeInt(value, text, ',');
    TextSerializerHelper.serializeLong(timestamp, text, '\0');
    return text;/*  w  ww . j  a v  a  2s.  c o m*/
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Read from the given stream until end-of-line is reached.
 * @param in - the input stream from where to read the line
 * @param line - the line that has been read from file not including EOL
 * @return - number of bytes read including EOL characters
 * @throws IOException /*from w  w  w . j  a va2s  . c om*/
 */
public static int readUntilEOL(InputStream in, Text line) throws IOException {
    final byte[] bufferBytes = new byte[1024];
    int bufferLength = 0; // Length of the buffer
    do {
        if (bufferLength == bufferBytes.length) {
            // Buffer full. Copy to the output text
            line.append(bufferBytes, 0, bufferLength);
            bufferLength = 0;
        }
        if (bufferLength == 0) {
            // Read and skip any initial EOL characters
            do {
                bufferBytes[0] = (byte) in.read();
            } while (bufferBytes[0] != -1 && (bufferBytes[0] == '\n' || bufferBytes[0] == '\r'));
            if (bufferBytes[0] != -1)
                bufferLength++;
        } else {
            bufferBytes[bufferLength++] = (byte) in.read();
        }
    } while (bufferLength > 0 && bufferBytes[bufferLength - 1] != -1 && bufferBytes[bufferLength - 1] != '\n'
            && bufferBytes[bufferLength - 1] != '\r');
    if (bufferLength > 0) {
        bufferLength--;
        line.append(bufferBytes, 0, bufferLength);
    }
    return line.getLength();
}

From source file:edu.umn.cs.spatialHadoop.osm.OSMEdge.java

License:Open Source License

@Override
public Text toText(Text text) {
    TextSerializerHelper.serializeLong(edgeId, text, ',');
    TextSerializerHelper.serializeLong(nodeId1, text, ',');
    TextSerializerHelper.serializeDouble(lat1, text, ',');
    TextSerializerHelper.serializeDouble(lon1, text, ',');
    TextSerializerHelper.serializeLong(nodeId2, text, ',');
    TextSerializerHelper.serializeDouble(lat2, text, ',');
    TextSerializerHelper.serializeDouble(lon2, text, ',');
    TextSerializerHelper.serializeLong(wayId, text, ',');
    if (tags != null) {
        byte[] tagsBytes = tags.getBytes();
        text.append(tagsBytes, 0, tagsBytes.length);
    }//  www.  ja  v a 2  s  .c  o m
    return text;
}

From source file:edu.umn.cs.spatialHadoop.util.JspSpatialHelper.java

License:Open Source License

/**
 * Runs the given process and returns the result code. Feeds the given string
 * to the stdin of the run process. If stdout or stderr is non-null, they are
 * filled with the stdout or stderr of the run process, respectively.
 * If wait is set to true, the process is run in synchronous mode where we
 * wait until it is finished. Otherwise, this function call returns
 * immediately and leaves the process running in the background. In the later
 * case, stdout, stderr and the return value are not valid.
 * /* ww  w . j a  v a2 s. c  om*/
 * @param workingDir - The working directory to run the script. Set null for
 *   default.
 * @param cmd - The command line to run including all parameters
 * @param stdin - The string to feed to the stdin of the run process.
 * @param stdout - If non-null, the stdout of the process is fed here.
 * @param stderr - If non-null, the stderr of the process is fed here.
 * @param wait - Set to true to wait until the process exits.
 * @return
 * @throws IOException
 */
public static int runProcess(File workingDir, String cmd, String stdin, Text stdout, Text stderr, boolean wait)
        throws IOException {
    new File("asdf").list(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return false;
        }
    });
    Process process;
    if (workingDir == null)
        process = Runtime.getRuntime().exec(cmd);
    else
        process = Runtime.getRuntime().exec(cmd, null, workingDir);
    if (stdin != null) {
        PrintStream ps = new PrintStream(process.getOutputStream());
        ps.print(stdin);
        ps.close();
    }

    if (!wait)
        return 0;

    try {
        int exitCode = process.waitFor();
        byte[] buffer = new byte[4096];
        if (stdout != null) {
            stdout.clear();
            InputStream in = process.getInputStream();
            while (in.available() > 0) {
                int bytesRead = in.read(buffer);
                stdout.append(buffer, 0, bytesRead);
            }
            in.close();
        }
        if (stderr != null) {
            stderr.clear();
            InputStream err = process.getErrorStream();
            while (err.available() > 0) {
                int bytesRead = err.read(buffer);
                stderr.append(buffer, 0, bytesRead);
            }
            err.close();
        }
        return exitCode;
    } catch (InterruptedException e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:edu.umn.cs.sthadoop.core.STRectangle.java

License:Open Source License

@Override
public Text toText(Text text) {
    byte[] separator = new String(",").getBytes();
    text.append(time.getBytes(), 0, time.getBytes().length);
    text.append(separator, 0, separator.length);
    return super.toText(text);
}