List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:edu.umn.cs.spatialHadoop.util.FileUtil.java
License:Open Source License
/** * Get the actual size of all data in the given directory. If the input is * a single file, its size is returned immediately. If the input is a * directory, we returns the total size of all data in that directory. * If there is a global index, the size is retrieved from that global index. * Otherwise, we add up all the sizes of single files. * @param fs - the file system that contains the path * @param path - the path that contains the data * @return/*from www.j av a 2 s . c o m*/ * @throws IOException */ public static long getPathSize(FileSystem fs, Path path) throws IOException { FileStatus fileStatus = fs.getFileStatus(path); // 1- Check if the path points to a file if (!fileStatus.isDir()) return fileStatus.getLen(); // 2- Check if the input is indexed and get the cached size GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, path); if (gIndex != null) { long totalSize = 0; for (Partition partition : gIndex) totalSize += partition.size; return totalSize; } // 3- Get the total size of all non-hidden files long totalSize = 0; FileStatus[] allFiles = fs.listStatus(path, SpatialSite.NonHiddenFileFilter); for (FileStatus subFile : allFiles) { if (!subFile.isDir()) totalSize += subFile.getLen(); } return totalSize; }
From source file:edu.umn.cs.spatialHadoop.visualization.GeometricPlot.java
License:Open Source License
/** * Combines images of different datasets into one image that is displayed * to users./*w w w . ja v a 2 s . co m*/ * This method is called from the web interface to display one image for * multiple selected datasets. * @param conf * @param files Paths to directories which contains the datasets * @param includeBoundaries Also plot the indexing boundaries of datasets * @param width * @param height * @return An image that is the combination of all datasets images * @throws IOException * @throws InterruptedException */ public static BufferedImage combineImages(Configuration conf, Path[] files, boolean includeBoundaries, int width, int height) throws IOException, InterruptedException { BufferedImage result; // Retrieve the MBRs of all datasets Rectangle allMbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : files) { Rectangle mbr = FileMBR.fileMBR(file, new OperationsParams(conf)); allMbr.expand(mbr); } // Adjust width and height to maintain aspect ratio if ((allMbr.x2 - allMbr.x1) / (allMbr.y2 - allMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((allMbr.y2 - allMbr.y1) * width / (allMbr.x2 - allMbr.x1)); } else { width = (int) ((allMbr.x2 - allMbr.x1) * height / (allMbr.y2 - allMbr.y1)); } result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); for (Path file : files) { FileSystem fs = file.getFileSystem(conf); if (fs.getFileStatus(file).isDir()) { // Retrieve the MBR of this dataset Rectangle mbr = FileMBR.fileMBR(file, new OperationsParams(conf)); // Compute the coordinates of this image in the whole picture mbr.x1 = (mbr.x1 - allMbr.x1) * width / allMbr.getWidth(); mbr.x2 = (mbr.x2 - allMbr.x1) * width / allMbr.getWidth(); mbr.y1 = (mbr.y1 - allMbr.y1) * height / allMbr.getHeight(); mbr.y2 = (mbr.y2 - allMbr.y1) * height / allMbr.getHeight(); // Retrieve the image of this dataset Path imagePath = new Path(file, "_data.png"); if (!fs.exists(imagePath)) throw new RuntimeException("Image " + imagePath + " not ready"); FSDataInputStream imageFile = fs.open(imagePath); BufferedImage image = ImageIO.read(imageFile); imageFile.close(); // Draw the image Graphics graphics = result.getGraphics(); graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(), (int) mbr.getHeight(), null); graphics.dispose(); if (includeBoundaries) { // Plot also the image of the boundaries // Retrieve the image of the dataset boundaries imagePath = new Path(file, "_partitions.png"); if (fs.exists(imagePath)) { imageFile = fs.open(imagePath); image = ImageIO.read(imageFile); imageFile.close(); // Draw the image graphics = result.getGraphics(); graphics.drawImage(image, (int) mbr.x1, (int) mbr.y1, (int) mbr.getWidth(), (int) mbr.getHeight(), null); graphics.dispose(); } } } } return result; }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Visualizes a dataset.// w w w . ja va 2 s. c o m * @param request * @param response */ private void handleVisualize(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); final Path path = new Path(pathStr); FileSystem fs = path.getFileSystem(commonParams); // Check if the input is already visualized final Path imagePath = new Path(path, "_data.png"); if (fs.exists(imagePath)) { // Image is already visualized response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY); response.setHeader("Location", "/hdfs" + imagePath); } else { // This dataset has never been visualized before String shapeName = request.getParameter("shape"); final OperationsParams vizParams = new OperationsParams(commonParams); vizParams.set("shape", shapeName); vizParams.setBoolean("background", true); vizParams.setInt("width", 2000); vizParams.setInt("height", 2000); // Retrieve the owner of the data directory String owner = fs.getFileStatus(path).getOwner(); UserGroupInformation ugi = UserGroupInformation.createRemoteUser(owner); Job vizJob = ugi.doAs(new PrivilegedExceptionAction<Job>() { public Job run() throws Exception { return GeometricPlot.plot(new Path[] { path }, imagePath, vizParams); } }); // Write the response response.setStatus(HttpServletResponse.SC_OK); response.setContentType("application/json;charset=utf-8"); PrintWriter out = response.getWriter(); out.printf("{\"JobID\":\"%s\", \"TrackURL\": \"%s\"}", vizJob.getJobID().toString(), vizJob.getTrackingURL()); out.close(); } } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java
License:Open Source License
private static void plotLocal(Path[] inFiles, final Path outPath, final Class<? extends Plotter> plotterClass, final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { final boolean vflip = params.getBoolean("vflip", true); OperationsParams mbrParams = new OperationsParams(params); mbrParams.setBoolean("background", false); final Rectangle inputMBR = params.get("mbr") != null ? params.getShape("mbr").getMBR() : FileMBR.fileMBR(inFiles, mbrParams); OperationsParams.setShape(params, InputMBR, inputMBR); // Retrieve desired output image size and keep aspect ratio if needed int tileWidth = params.getInt("tilewidth", 256); int tileHeight = params.getInt("tileheight", 256); // Adjust width and height if aspect ratio is to be kept if (params.getBoolean("keepratio", true)) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (inputMBR.getWidth() > inputMBR.getHeight()) { inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2; inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth(); } else {/* w w w . ja v a 2s . com*/ inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2; inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight(); } } String outFName = outPath.getName(); int extensionStart = outFName.lastIndexOf('.'); final String extension = extensionStart == -1 ? ".png" : outFName.substring(extensionStart); // Start reading input file Vector<InputSplit> splits = new Vector<InputSplit>(); final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); for (Path inFile : inFiles) { FileSystem inFs = inFile.getFileSystem(params); if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) { if (SpatialSite.NonHiddenFileFilter.accept(inFile)) { // Use the normal input format splitter to add this non-hidden file Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFile); splits.addAll(inputFormat.getSplits(job)); } else { // A hidden file, add it immediately as one split // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0])); } } else { Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFile); splits.addAll(inputFormat.getSplits(job)); } } try { Plotter plotter = plotterClass.newInstance(); plotter.configure(params); String[] strLevels = params.get("levels", "7").split("\\.\\."); int minLevel, maxLevel; if (strLevels.length == 1) { minLevel = 0; maxLevel = Integer.parseInt(strLevels[0]); } else { minLevel = Integer.parseInt(strLevels[0]); maxLevel = Integer.parseInt(strLevels[1]); } GridInfo bottomGrid = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2); bottomGrid.rows = bottomGrid.columns = 1 << maxLevel; TileIndex key = new TileIndex(); // All canvases in the pyramid, one per tile Map<TileIndex, Canvas> canvases = new HashMap<TileIndex, Canvas>(); for (InputSplit split : splits) { FileSplit fsplit = (FileSplit) split; RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Rectangle partition = reader.getCurrentKey(); if (!partition.isValid()) partition.set(inputMBR); Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { Rectangle shapeMBR = shape.getMBR(); if (shapeMBR == null) continue; java.awt.Rectangle overlappingCells = bottomGrid.getOverlappingCells(shapeMBR); // Iterate over levels from bottom up for (key.level = maxLevel; key.level >= minLevel; key.level--) { for (key.x = overlappingCells.x; key.x < overlappingCells.x + overlappingCells.width; key.x++) { for (key.y = overlappingCells.y; key.y < overlappingCells.y + overlappingCells.height; key.y++) { Canvas canvas = canvases.get(key); if (canvas == null) { Rectangle tileMBR = new Rectangle(); int gridSize = 1 << key.level; tileMBR.x1 = (inputMBR.x1 * (gridSize - key.x) + inputMBR.x2 * key.x) / gridSize; tileMBR.x2 = (inputMBR.x1 * (gridSize - (key.x + 1)) + inputMBR.x2 * (key.x + 1)) / gridSize; tileMBR.y1 = (inputMBR.y1 * (gridSize - key.y) + inputMBR.y2 * key.y) / gridSize; tileMBR.y2 = (inputMBR.y1 * (gridSize - (key.y + 1)) + inputMBR.y2 * (key.y + 1)) / gridSize; canvas = plotter.createCanvas(tileWidth, tileHeight, tileMBR); canvases.put(key.clone(), canvas); } plotter.plot(canvas, shape); } } // Update overlappingCells for the higher level int updatedX1 = overlappingCells.x / 2; int updatedY1 = overlappingCells.y / 2; int updatedX2 = (overlappingCells.x + overlappingCells.width - 1) / 2; int updatedY2 = (overlappingCells.y + overlappingCells.height - 1) / 2; overlappingCells.x = updatedX1; overlappingCells.y = updatedY1; overlappingCells.width = updatedX2 - updatedX1 + 1; overlappingCells.height = updatedY2 - updatedY1 + 1; } } } reader.close(); } // Done with all splits. Write output to disk LOG.info("Done with plotting. Now writing the output"); final FileSystem outFS = outPath.getFileSystem(params); LOG.info("Writing default empty image"); // Write a default empty image to be displayed for non-generated tiles BufferedImage emptyImg = new BufferedImage(tileWidth, tileHeight, BufferedImage.TYPE_INT_ARGB); Graphics2D g = new SimpleGraphics(emptyImg); g.setBackground(new Color(0, 0, 0, 0)); g.clearRect(0, 0, tileWidth, tileHeight); g.dispose(); // Write HTML file to browse the mutlielvel image OutputStream out = outFS.create(new Path(outPath, "default.png")); ImageIO.write(emptyImg, "png", out); out.close(); // Add an HTML file that visualizes the result using Google Maps LOG.info("Writing the HTML viewer file"); LineReader templateFileReader = new LineReader( MultilevelPlot.class.getResourceAsStream("/zoom_view.html")); PrintStream htmlOut = new PrintStream(outFS.create(new Path(outPath, "index.html"))); Text line = new Text(); while (templateFileReader.readLine(line) > 0) { String lineStr = line.toString(); lineStr = lineStr.replace("#{TILE_WIDTH}", Integer.toString(tileWidth)); lineStr = lineStr.replace("#{TILE_HEIGHT}", Integer.toString(tileHeight)); lineStr = lineStr.replace("#{MAX_ZOOM}", Integer.toString(maxLevel)); lineStr = lineStr.replace("#{MIN_ZOOM}", Integer.toString(minLevel)); lineStr = lineStr.replace("#{TILE_URL}", "'tile-' + zoom + '-' + coord.x + '-' + coord.y + '" + extension + "'"); htmlOut.println(lineStr); } templateFileReader.close(); htmlOut.close(); // Write the tiles final Entry<TileIndex, Canvas>[] entries = canvases.entrySet().toArray(new Map.Entry[canvases.size()]); // Clear the hash map to save memory as it is no longer needed canvases.clear(); int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors()); Parallel.forEach(entries.length, new RunnableRange<Object>() { @Override public Object run(int i1, int i2) { boolean output = params.getBoolean("output", true); try { Plotter plotter = plotterClass.newInstance(); plotter.configure(params); for (int i = i1; i < i2; i++) { Map.Entry<TileIndex, Canvas> entry = entries[i]; TileIndex key = entry.getKey(); if (vflip) key.y = ((1 << key.level) - 1) - key.y; Path imagePath = new Path(outPath, key.getImageFileName() + extension); // Write this tile to an image DataOutputStream outFile = output ? outFS.create(imagePath) : new DataOutputStream(new NullOutputStream()); plotter.writeImage(entry.getValue(), outFile, vflip); outFile.close(); // Remove entry to allows GC to collect it entries[i] = null; } return null; } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } }, parallelism); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } }
From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java
License:Open Source License
public static void plotLocal(Path[] inFiles, Path outFile, final Class<? extends Plotter> plotterClass, final OperationsParams params) throws IOException, InterruptedException { OperationsParams mbrParams = new OperationsParams(params); mbrParams.setBoolean("background", false); final Rectangle inputMBR = params.get(InputMBR) != null ? params.getShape("mbr").getMBR() : FileMBR.fileMBR(inFiles, mbrParams); if (params.get(InputMBR) == null) OperationsParams.setShape(params, InputMBR, inputMBR); // Retrieve desired output image size and keep aspect ratio if needed int width = params.getInt("width", 1000); int height = params.getInt("height", 1000); if (params.getBoolean("keepratio", true)) { // Adjust width and height to maintain aspect ratio and store the adjusted // values back in params in case the caller needs to retrieve them if (inputMBR.getWidth() / inputMBR.getHeight() > (double) width / height) params.setInt("height", height = (int) (inputMBR.getHeight() * width / inputMBR.getWidth())); else/*from w w w.j a v a 2 s . co m*/ params.setInt("width", width = (int) (inputMBR.getWidth() * height / inputMBR.getHeight())); } // Store width and height in final variables to make them accessible in parallel final int fwidth = width, fheight = height; // Start reading input file List<InputSplit> splits = new ArrayList<InputSplit>(); final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); for (Path inFile : inFiles) { FileSystem inFs = inFile.getFileSystem(params); if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) { if (SpatialSite.NonHiddenFileFilter.accept(inFile)) { // Use the normal input format splitter to add this non-hidden file Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFile); splits.addAll(inputFormat.getSplits(job)); } else { // A hidden file, add it immediately as one split // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0])); } } else { // Use the normal input format splitter to add this non-hidden file Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFile); splits.addAll(inputFormat.getSplits(job)); } } // Copy splits to a final array to be used in parallel final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]); int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors()); List<Canvas> partialCanvases = Parallel.forEach(fsplits.length, new RunnableRange<Canvas>() { @Override public Canvas run(int i1, int i2) { Plotter plotter; try { plotter = plotterClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } plotter.configure(params); // Create the partial layer that will contain the plot of the assigned partitions Canvas partialCanvas = plotter.createCanvas(fwidth, fheight, inputMBR); for (int i = i1; i < i2; i++) { try { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplits[i], null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplits[i], params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplits[i], params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplits[i], params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Rectangle partition = reader.getCurrentKey(); if (!partition.isValid()) partition.set(inputMBR); Iterable<Shape> shapes = reader.getCurrentValue(); // Run the plot step plotter.plot(partialCanvas, plotter.isSmooth() ? plotter.smooth(shapes) : shapes); } reader.close(); } catch (IOException e) { throw new RuntimeException("Error reading the file ", e); } catch (InterruptedException e) { throw new RuntimeException("Interrupt error ", e); } } return partialCanvas; } }, parallelism); boolean merge = params.getBoolean("merge", true); Plotter plotter; try { plotter = plotterClass.newInstance(); plotter.configure(params); } catch (InstantiationException e) { throw new RuntimeException("Error creating plotter", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating plotter", e); } // Whether we should vertically flip the final image or not boolean vflip = params.getBoolean("vflip", true); if (merge) { LOG.info("Merging " + partialCanvases.size() + " partial canvases"); // Create the final canvas that will contain the final image Canvas finalCanvas = plotter.createCanvas(fwidth, fheight, inputMBR); for (Canvas partialCanvas : partialCanvases) plotter.merge(finalCanvas, partialCanvas); // Finally, write the resulting image to the given output path LOG.info("Writing final image"); FileSystem outFs = outFile.getFileSystem(params); FSDataOutputStream outputFile = outFs.create(outFile); plotter.writeImage(finalCanvas, outputFile, vflip); outputFile.close(); } else { // No merge LOG.info("Writing partial images"); FileSystem outFs = outFile.getFileSystem(params); for (int i = 0; i < partialCanvases.size(); i++) { Path filename = new Path(outFile, String.format("part-%05d.png", i)); FSDataOutputStream outputFile = outFs.create(filename); plotter.writeImage(partialCanvases.get(i), outputFile, vflip); outputFile.close(); } } }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<HSPKNNQ.ShapeWithDistance<Partition>>() { {/*from w w w . ja v a 2s .c om*/ initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNNDTW.ShapeWithDistance<Partition>>() { {//from w w w . java 2s .c o m initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:etl.cmd.test.XFsTestCase.java
License:Apache License
private void setAllPermissions(FileSystem fileSystem, Path path) throws IOException { FsPermission fsPermission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); try {//from w ww . j av a 2 s .co m fileSystem.setPermission(path, fsPermission); } catch (IOException ex) { //NOP } FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { for (FileStatus status : fileSystem.listStatus(path)) { setAllPermissions(fileSystem, status.getPath()); } } }
From source file:eu.edisonproject.classification.tfidf.mapreduce.TermWordFrequency.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration jobconf = getConf(); Job job = Job.getInstance(jobconf);//from www. j av a2 s . co m FileSystem fs = FileSystem.get(jobconf); fs.delete(new Path(args[1]), true); Path dictionary = new Path(args[0]); Path dictionaryHdfs = dictionary; Path localDocs = new Path(args[2]); Path hdfsDocs = localDocs; Path stopwordsLocal = new Path(args[3]); Path stopwordsHDFS = stopwordsLocal; if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { dictionaryHdfs = new Path(dictionary.getName()); if (!fs.exists(dictionaryHdfs)) { fs.copyFromLocalFile(dictionary, dictionaryHdfs); } hdfsDocs = new Path(localDocs.getName()); fs.mkdirs(hdfsDocs); fs.deleteOnExit(hdfsDocs); File[] stats = new File(localDocs.toString()).listFiles(); for (File stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) { Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } stopwordsHDFS = new Path(stopwordsLocal.getName()); if (!fs.exists(stopwordsHDFS)) { fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS); } } FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS); stopwordsHDFS = stopwordsStatus.getPath(); job.addCacheFile(stopwordsHDFS.toUri()); job.addCacheFile(hdfsDocs.toUri()); job.setJarByClass(TermWordFrequency.class); job.setJobName("Word Frequency Term Driver"); FileInputFormat.setInputPaths(job, dictionaryHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dictionaryHdfs); NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4])); NLineInputFormat.setMaxInputSplitSize(job, 500); job.setMapperClass(TermWordFrequencyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(TermWordFrequencyReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.edisonproject.classification.tfidf.mapreduce.WordFrequencyInDocDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { // itemset = new LinkedList<String>(); // BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[2]))); // String line; // while ((line = br.readLine()) != null) { // String[] components = line.split("/"); // itemset.add(components[0]); // }/* w w w . ja va 2 s . c om*/ Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJarByClass(WordFrequencyInDocDriver.class); job.setJobName("Word Frequency In Doc Driver"); FileSystem fs = FileSystem.get(conf); fs.delete(new Path(args[1]), true); Path in = new Path(args[0]); Path inHdfs = in; Path dictionaryLocal = new Path(args[2]); Path dictionaryHDFS = dictionaryLocal; Path stopwordsLocal = new Path(args[3]); Path stopwordsHDFS = stopwordsLocal; if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { inHdfs = new Path(in.getName()); fs.delete(inHdfs, true); fs.copyFromLocalFile(in, inHdfs); fs.deleteOnExit(inHdfs); dictionaryHDFS = new Path(dictionaryLocal.getName()); if (!fs.exists(dictionaryHDFS)) { fs.copyFromLocalFile(dictionaryLocal, dictionaryHDFS); } stopwordsHDFS = new Path(stopwordsLocal.getName()); if (!fs.exists(stopwordsHDFS)) { fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS); } } FileStatus dictionaryStatus = fs.getFileStatus(dictionaryHDFS); dictionaryHDFS = dictionaryStatus.getPath(); job.addCacheFile(dictionaryHDFS.toUri()); FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS); stopwordsHDFS = stopwordsStatus.getPath(); job.addCacheFile(stopwordsHDFS.toUri()); FileInputFormat.setInputPaths(job, inHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(WordFrequencyInDocMapper.class); AvroJob.setInputKeySchema(job, Document.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(WordFrequencyInDocReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }