List of usage examples for org.apache.hadoop.io Text append
public void append(byte[] utf8, int start, int len)
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * Compute the Deluanay triangulation in the local machine * @param inPaths//from w ww. j a v a 2s . com * @param outPath * @param params * @throws IOException * @throws InterruptedException */ public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inPaths); final List<InputSplit> splits = inputFormat.getSplits(job); final Point[][] allLists = new Point[splits.size()][]; // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { try { int numPoints = 0; for (int i = i1; i < i2; i++) { List<Point> points = new ArrayList<Point>(); FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points.add(p.clone()); } } reader.close(); numPoints += points.size(); allLists[i] = points.toArray(new Point[points.size()]); } return numPoints; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (int numPoints : numsPoints) totalNumPoints += numPoints; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (int iList = 0; iList < allLists.length; iList++) { System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length); pointer += allLists[iList].length; allLists[iList] = null; // To let the GC collect it } if (params.getBoolean("dedup", true)) { float threshold = params.getFloat("threshold", 1E-5f); allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold); } LOG.info("Computing DT for " + allPoints.length + " points"); GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null); LOG.info("DT computed"); Rectangle mbr = FileMBR.fileMBR(inPaths, params); double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10; Rectangle bigMBR = mbr.buffer(buffer, buffer); if (outPath != null && params.getBoolean("output", true)) { LOG.info("Writing the output as a soup of triangles"); Triangulation answer = dtAlgorithm.getFinalTriangulation(); FileSystem outFS = outPath.getFileSystem(params); PrintStream out = new PrintStream(outFS.create(outPath)); Text text = new Text2(); byte[] tab = "\t".getBytes(); for (Point[] triangle : answer.iterateTriangles()) { text.clear(); triangle[0].toText(text); text.append(tab, 0, tab.length); triangle[1].toText(text); text.append(tab, 0, tab.length); triangle[2].toText(text); out.println(text); } out.close(); } // dtAlgorithm.getFinalTriangulation().draw(); //Triangulation finalPart = new Triangulation(); //Triangulation nonfinalPart = new Triangulation(); //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart); }
From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java
License:Open Source License
public static void serializeGeometry(Text text, OGCGeometry geom, char toAppend) { String str = bytesToHex(geom.asBinary().array()); byte[] str_b = str.getBytes(); text.append(str_b, 0, str_b.length); if (toAppend != '\0') text.append(new byte[] { (byte) toAppend }, 0, 1); }
From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java
License:Open Source License
public static void serializeGeometry(Text text, Geometry geom, char toAppend) { String wkt = geom == null ? "" : geom.toText(); byte[] wkt_b = wkt.getBytes(); text.append(wkt_b, 0, wkt_b.length); if (toAppend != '\0') text.append(new byte[] { (byte) toAppend }, 0, 1); }
From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java
License:Open Source License
/** * Reads the next line from input and return true if a line was read. * If no more lines are available in this split, a false is returned. * @param value/*from w w w . jav a 2s.c om*/ * @return * @throws IOException */ protected boolean nextLine(Text value) throws IOException { if (blockType == BlockType.RTREE && pos == 8) { // File is positioned at the RTree header // Skip the header and go to first data object in file pos += RTree.skipHeader(in); LOG.info("Skipped R-tree to position: " + pos); // Reinitialize record reader at the new position lineReader = new LineReader(in); } while (getFilePosition() <= end) { value.clear(); int b = 0; if (buffer != null) { // Read the first line encountered in buffer int eol = RTree.skipToEOL(buffer, 0); b += eol; value.append(buffer, 0, eol); if (eol < buffer.length) { // There are still some bytes remaining in buffer byte[] tmp = new byte[buffer.length - eol]; System.arraycopy(buffer, eol, tmp, 0, tmp.length); buffer = tmp; } else { buffer = null; } // Check if a complete line has been read from the buffer byte last_byte = value.getBytes()[value.getLength() - 1]; if (last_byte == '\n' || last_byte == '\r') return true; } // Read the first line from stream Text temp = new Text(); b += lineReader.readLine(temp); if (b == 0) { // Indicates an end of stream return false; } pos += b; // Append the part read from stream to the part extracted from buffer value.append(temp.getBytes(), 0, temp.getLength()); if (value.getLength() > 1) { // Read a non-empty line. Note that end-of-line character is included return true; } } // Reached end of file return false; }
From source file:edu.umn.cs.spatialHadoop.nasa.NASAPoint.java
License:Open Source License
@Override public Text toText(Text text) { super.toText(text); text.append(Separator, 0, Separator.length); TextSerializerHelper.serializeInt(value, text, ','); TextSerializerHelper.serializeLong(timestamp, text, '\0'); return text;//from ww w . jav a2 s.c o m }
From source file:edu.umn.cs.spatialHadoop.nasa.NASARectangle.java
License:Open Source License
@Override public Text toText(Text text) { super.toText(text); text.append(Separator, 0, Separator.length); TextSerializerHelper.serializeInt(value, text, ','); TextSerializerHelper.serializeLong(timestamp, text, '\0'); return text;/* w ww . j a v a 2s. c o m*/ }
From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java
License:Open Source License
/** * Read from the given stream until end-of-line is reached. * @param in - the input stream from where to read the line * @param line - the line that has been read from file not including EOL * @return - number of bytes read including EOL characters * @throws IOException /*from w w w . j a va2s . c om*/ */ public static int readUntilEOL(InputStream in, Text line) throws IOException { final byte[] bufferBytes = new byte[1024]; int bufferLength = 0; // Length of the buffer do { if (bufferLength == bufferBytes.length) { // Buffer full. Copy to the output text line.append(bufferBytes, 0, bufferLength); bufferLength = 0; } if (bufferLength == 0) { // Read and skip any initial EOL characters do { bufferBytes[0] = (byte) in.read(); } while (bufferBytes[0] != -1 && (bufferBytes[0] == '\n' || bufferBytes[0] == '\r')); if (bufferBytes[0] != -1) bufferLength++; } else { bufferBytes[bufferLength++] = (byte) in.read(); } } while (bufferLength > 0 && bufferBytes[bufferLength - 1] != -1 && bufferBytes[bufferLength - 1] != '\n' && bufferBytes[bufferLength - 1] != '\r'); if (bufferLength > 0) { bufferLength--; line.append(bufferBytes, 0, bufferLength); } return line.getLength(); }
From source file:edu.umn.cs.spatialHadoop.osm.OSMEdge.java
License:Open Source License
@Override public Text toText(Text text) { TextSerializerHelper.serializeLong(edgeId, text, ','); TextSerializerHelper.serializeLong(nodeId1, text, ','); TextSerializerHelper.serializeDouble(lat1, text, ','); TextSerializerHelper.serializeDouble(lon1, text, ','); TextSerializerHelper.serializeLong(nodeId2, text, ','); TextSerializerHelper.serializeDouble(lat2, text, ','); TextSerializerHelper.serializeDouble(lon2, text, ','); TextSerializerHelper.serializeLong(wayId, text, ','); if (tags != null) { byte[] tagsBytes = tags.getBytes(); text.append(tagsBytes, 0, tagsBytes.length); }// www. ja v a 2 s .c o m return text; }
From source file:edu.umn.cs.spatialHadoop.util.JspSpatialHelper.java
License:Open Source License
/** * Runs the given process and returns the result code. Feeds the given string * to the stdin of the run process. If stdout or stderr is non-null, they are * filled with the stdout or stderr of the run process, respectively. * If wait is set to true, the process is run in synchronous mode where we * wait until it is finished. Otherwise, this function call returns * immediately and leaves the process running in the background. In the later * case, stdout, stderr and the return value are not valid. * /* ww w . j a v a2 s. c om*/ * @param workingDir - The working directory to run the script. Set null for * default. * @param cmd - The command line to run including all parameters * @param stdin - The string to feed to the stdin of the run process. * @param stdout - If non-null, the stdout of the process is fed here. * @param stderr - If non-null, the stderr of the process is fed here. * @param wait - Set to true to wait until the process exits. * @return * @throws IOException */ public static int runProcess(File workingDir, String cmd, String stdin, Text stdout, Text stderr, boolean wait) throws IOException { new File("asdf").list(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return false; } }); Process process; if (workingDir == null) process = Runtime.getRuntime().exec(cmd); else process = Runtime.getRuntime().exec(cmd, null, workingDir); if (stdin != null) { PrintStream ps = new PrintStream(process.getOutputStream()); ps.print(stdin); ps.close(); } if (!wait) return 0; try { int exitCode = process.waitFor(); byte[] buffer = new byte[4096]; if (stdout != null) { stdout.clear(); InputStream in = process.getInputStream(); while (in.available() > 0) { int bytesRead = in.read(buffer); stdout.append(buffer, 0, bytesRead); } in.close(); } if (stderr != null) { stderr.clear(); InputStream err = process.getErrorStream(); while (err.available() > 0) { int bytesRead = err.read(buffer); stderr.append(buffer, 0, bytesRead); } err.close(); } return exitCode; } catch (InterruptedException e) { e.printStackTrace(); return 1; } }
From source file:edu.umn.cs.sthadoop.core.STRectangle.java
License:Open Source License
@Override public Text toText(Text text) { byte[] separator = new String(",").getBytes(); text.append(time.getBytes(), 0, time.getBytes().length); text.append(separator, 0, separator.length); return super.toText(text); }