Example usage for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes()

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java

License:Open Source License

/**
 * Close the given cell freeing all memory reserved by it.
 * Once a cell is closed, we should not write more data to it.
 * @param intermediateCellPath// www .ja  v  a 2s  . c  o  m
 * @param finalCellPath
 * @param intermediateCellStream
 * @param masterFile
 * @param cellMbr
 * @param recordCount
 * @param cellSize
 * @throws IOException
 */
protected void closeCellBackground(final Path intermediateCellPath, final Path finalCellPath,
        final OutputStream intermediateCellStream, final OutputStream masterFile, final CellInfo cellMbr,
        final long recordCount, final long cellSize) throws IOException {

    Thread closingThread = new Thread() {
        @Override
        public void run() {
            try {
                Path finalfinalCellPath = flushAllEntries(intermediateCellPath, intermediateCellStream,
                        finalCellPath);
                // Write an entry to the master file

                // Write a line to the master file including file name and cellInfo
                if (masterFile != null) {
                    Partition partition = new Partition(finalfinalCellPath.getName(), cellMbr);
                    partition.recordCount = recordCount;
                    partition.size = cellSize;
                    Text line = partition.toText(new Text());
                    masterFile.write(line.getBytes(), 0, line.getLength());
                    masterFile.write(NEW_LINE);
                }
            } catch (IOException e) {
                throw new RuntimeException("Error closing thread", e);
            }
        }
    };

    closingThreads.add(closingThread);
    // Remove previously terminated threads
    while (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.TERMINATED) {
        closingThreads.remove(0);
    }
    // Start first thread (if exists)
    if (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.NEW)
        closingThreads.get(0).start();
}

From source file:edu.umn.cs.spatialHadoop.core.JTSShape.java

License:Apache License

@Override
public void fromText(Text text) {
    // Read and skip a long
    //    TextSerializerHelper.consumeLong(text, '\t');
    try {/*from  w  ww.  j av  a  2  s  . co  m*/
        // Check whether this text is a Well Known Text (WKT) or a hexed string
        boolean wkt = false;
        byte[] bytes = text.getBytes();
        int length = text.getLength();
        int i_shape = 0;
        while (!wkt && i_shape < ShapeNames.length) {
            byte[] shapeName = ShapeNames[i_shape];
            if (length > shapeName.length) {
                int i = 0;
                while (i < shapeName.length && shapeName[i] == bytes[i])
                    i++;
                if (i == shapeName.length) {
                    wkt = true;
                    break;
                }
            }
            i_shape++;
        }

        // Look for the terminator of the shape text
        byte terminator = Separator[0];
        int i1 = 0;
        if (bytes[i1] == '\'' || bytes[i1] == '\"') {
            terminator = bytes[i1++];
        }
        int i2 = i1;
        while (i2 < length && bytes[i2] != terminator)
            i2++;

        String str = new String(bytes, i1, i2 - i1);
        geom = parseText(str);

        if (++i2 < length) {
            extra = new String(bytes, i2, length - i2);
        } else {
            extra = null;
        }
    } catch (RuntimeException e) {
        LOG.error("Error parsing: " + text);
        throw e;
    } catch (ParseException e) {
        LOG.error("Error parsing: " + text);
        e.printStackTrace();
    }
}

From source file:edu.umn.cs.spatialHadoop.core.OGCShape.java

License:Apache License

@Override
public void fromText(Text text) {
    try {/*w  w w .  j  a  va 2s  .  co m*/
        // Check whether this text is a Well Known Text (WKT) or a hexed string
        boolean wkt = false;
        byte[] bytes = text.getBytes();
        int length = text.getLength();
        int i_shape = 0;
        while (!wkt && i_shape < ShapeNames.length) {
            byte[] shapeName = ShapeNames[i_shape];
            if (length > shapeName.length) {
                int i = 0;
                while (i < shapeName.length && shapeName[i] == bytes[i])
                    i++;
                if (i == shapeName.length) {
                    wkt = true;
                    break;
                }
            }
            i_shape++;
        }

        // Look for the terminator of the shape text
        byte terminator = Separator[0];
        int i1 = 0;
        if (bytes[i1] == '\'' || bytes[i1] == '\"') {
            terminator = bytes[i1++];
        }
        int i2 = i1;
        while (i2 < length && bytes[i2] != terminator)
            i2++;

        String str = new String(bytes, i1, i2 - i1);
        geom = parseText(str);

        if (++i2 < length) {
            extra = new String(bytes, i2, length - i2);
        } else {
            extra = null;
        }
    } catch (RuntimeException e) {
        LOG.error("Error parsing: " + text);
        throw e;
    }
}

From source file:edu.umn.cs.spatialHadoop.core.Partition.java

License:Open Source License

@Override
public void fromText(Text text) {
    super.fromText(text);
    text.set(text.getBytes(), 1, text.getLength() - 1); // Skip comma
    this.recordCount = TextSerializerHelper.consumeLong(text, ',');
    this.size = TextSerializerHelper.consumeLong(text, ',');
    filename = text.toString();//  w  w  w . ja  va2s  . c o m
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

/**
 * Deserializes and consumes a double from the given text. Consuming means all
 * characters read for deserialization are removed from the given text.
 * If separator is non-zero, a double is read and consumed up to the first
 * occurrence of this separator. The separator is also consumed.
 * @param text//from  w  w  w .j  a  v  a2 s .  c o m
 * @param separator
 * @return
 */
public static double consumeDouble(Text text, char separator) {
    int i = 0;
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E'
            || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.'))
        i++;
    double d = Double.parseDouble(new String(bytes, 0, i));
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return d;
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static void consumeMap(Text text, Map<String, String> tags) {
    tags.clear();/*from w ww .j ava2 s .co m*/
    if (text.getLength() > 0) {
        byte[] tagsBytes = text.getBytes();
        if (tagsBytes[0] != Separators[MapStart])
            return;
        int i1 = 1;
        while (i1 < text.getLength() && tagsBytes[i1] != Separators[MapEnd]) {
            int i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[KeyValueSeparator])
                i2++;
            String key = new String(tagsBytes, i1, i2 - i1);
            i1 = i2 + 1;

            i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[FieldSeparator]
                    && tagsBytes[i2] != Separators[MapEnd])
                i2++;
            String value = new String(tagsBytes, i1, i2 - i1);
            tags.put(key, value);
            i1 = i2;
            if (i1 < text.getLength() && tagsBytes[i1] == Separators[FieldSeparator])
                i1++;
        }
        if (i1 < text.getLength())
            text.set(tagsBytes, i1, text.getLength() - i1);
    }
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static OGCGeometry consumeGeometryESRI(Text text, char separator) {
    // Check whether this text is a Well Known Text (WKT) or a hexed string
    boolean wkt = false;
    byte[] bytes = text.getBytes();
    int length = text.getLength();
    int i_shape = 0;
    while (!wkt && i_shape < ShapeNames.length) {
        byte[] shapeName = ShapeNames[i_shape];
        if (length > shapeName.length) {
            int i = 0;
            while (i < shapeName.length && shapeName[i] == bytes[i])
                i++;/*from  w  w w  . j  a  v a 2  s  .com*/
            if (i == shapeName.length) {
                wkt = true;
                break;
            }
        }
        i_shape++;
    }

    // Look for the terminator of the shape text
    int i1 = 0;
    if (bytes[i1] == '\'' || bytes[i1] == '\"') {
        separator = (char) bytes[i1++];
    }
    int i2 = i1;
    while (i2 < length && bytes[i2] != separator)
        i2++;

    String str = new String(bytes, i1, i2 - i1);

    // Remove consumed bytes from the text
    text.set(bytes, i2, text.getLength() - i2);

    OGCGeometry geom = parseText(str);

    return geom;
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static synchronized Geometry consumeGeometryJTS(Text text, char separator) {
    // Check whether this text is a Well Known Text (WKT) or a hexed string
    boolean wkt = false;
    byte[] bytes = text.getBytes();
    int length = text.getLength();
    Geometry geom;/*from  w  ww . j  a v  a  2  s.co m*/
    int i1, i2; // Start and end offset of the geometry being parsed
    int i_next; // Beginning of the next field
    boolean isWKT = false;
    boolean isHex = false;
    if (bytes[0] == '\'' || bytes[0] == '\"') {
        // A quoted string. Find terminating quote and trim the quotes
        i1 = 1;
        i2 = 2;
        while (i2 < length && bytes[i2] != bytes[0])
            i2++;
        if (i2 == length)
            throw new RuntimeException("Unterminated quoted string");
        i_next = i2 + 1;
        i2--; // Back one step to remove the terminating quote
        isWKT = true; // Assume any quoted string to be WKT
    } else {
        // Not a quoted string, check if the type is WKT
        int i_shape = 0;
        while (!wkt && i_shape < ShapeNames.length) {
            byte[] shapeName = ShapeNames[i_shape];
            if (length > shapeName.length) {
                int i = 0;
                while (i < shapeName.length && shapeName[i] == bytes[i])
                    i++;
                if (i == shapeName.length) {
                    wkt = true;
                    break;
                }
            }
            i_shape++;
        }

        if (i_shape < ShapeNames.length) {
            isWKT = true;
            // Look for the terminator of the shape text
            i1 = 0;
            i2 = 1;
            // Search for the first open parenthesis
            while (i2 < length && bytes[i2] != '(')
                i2++;
            if (i2 < length)
                i2++; // Skip the open parenthesis itself
            int nesting = 1;
            while (i2 < length && nesting > 0) {
                if (bytes[i2] == '(')
                    nesting++;
                else if (bytes[i2] == ')')
                    nesting--;
                i2++;
            }
            i_next = i2 + 1;
        } else {
            // Check if the type is hex-encoded WKB
            i1 = 0;
            i2 = 0;
            while (i2 < length && IsHex[bytes[i2]])
                i2++;
            isHex = i2 > 1;
            i_next = i2;
        }
    }

    String geom_text = new String(bytes, i1, i2);

    try {
        if (isWKT) {
            geom = wktReader.read(geom_text);
        } else if (isHex) {
            byte[] binary = hexToBytes(geom_text);
            geom = wkbReader.read(binary);
        } else {
            geom = null;
        }
    } catch (ParseException e) {
        throw new RuntimeException(String.format("Error parsing '%s'", geom_text), e);
    }

    // Remove consumed bytes from the text
    if (i_next >= text.getLength())
        text.clear();
    else {
        if (bytes[i_next] == separator)
            i_next++;
        text.set(bytes, i_next, length - i_next);
    }

    return geom;
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java

License:Open Source License

/**
 * Reads the next line from input and return true if a line was read.
 * If no more lines are available in this split, a false is returned.
 * @param value//from  www .j a  v a  2 s  . c  o  m
 * @return
 * @throws IOException
 */
protected boolean nextLine(Text value) throws IOException {
    if (blockType == BlockType.RTREE && pos == 8) {
        // File is positioned at the RTree header
        // Skip the header and go to first data object in file
        pos += RTree.skipHeader(in);
        LOG.info("Skipped R-tree to position: " + pos);
        // Reinitialize record reader at the new position
        lineReader = new LineReader(in);
    }
    while (getFilePosition() <= end) {
        value.clear();
        int b = 0;
        if (buffer != null) {
            // Read the first line encountered in buffer
            int eol = RTree.skipToEOL(buffer, 0);
            b += eol;
            value.append(buffer, 0, eol);
            if (eol < buffer.length) {
                // There are still some bytes remaining in buffer
                byte[] tmp = new byte[buffer.length - eol];
                System.arraycopy(buffer, eol, tmp, 0, tmp.length);
                buffer = tmp;
            } else {
                buffer = null;
            }
            // Check if a complete line has been read from the buffer
            byte last_byte = value.getBytes()[value.getLength() - 1];
            if (last_byte == '\n' || last_byte == '\r')
                return true;
        }

        // Read the first line from stream
        Text temp = new Text();
        b += lineReader.readLine(temp);
        if (b == 0) {
            // Indicates an end of stream
            return false;
        }
        pos += b;

        // Append the part read from stream to the part extracted from buffer
        value.append(temp.getBytes(), 0, temp.getLength());

        if (value.getLength() > 1) {
            // Read a non-empty line. Note that end-of-line character is included
            return true;
        }
    }
    // Reached end of file
    return false;
}

From source file:edu.umn.cs.spatialHadoop.nasa.NASAPoint.java

License:Open Source License

@Override
public void fromText(Text text) {
    super.fromText(text);
    byte[] bytes = text.getBytes();
    text.set(bytes, 1, text.getLength() - 1);
    value = TextSerializerHelper.consumeInt(text, ',');
    timestamp = TextSerializerHelper.consumeLong(text, '\0');
}