Java tutorial
/* * Copyright 2009-2016 DigitalGlobe, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. * */ package org.mrgeo.hdfs.vector.shp; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.apache.commons.io.FilenameUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mrgeo.geometry.*; import org.mrgeo.hdfs.utils.HadoopFileUtils; import org.mrgeo.hdfs.vector.GeometryInputStream; import org.mrgeo.hdfs.vector.ShapefileGeometryCollection; import org.mrgeo.hdfs.vector.shp.dbase.DbaseException; import org.mrgeo.hdfs.vector.shp.esri.ESRILayer; import org.mrgeo.hdfs.vector.shp.esri.FormatException; import org.mrgeo.hdfs.vector.shp.esri.geom.*; import java.io.*; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; /** * Reads a Shapefile as an InputStream (no seeking). This implementation does * not read the DBF at this time. * * Limitations: The Shapefile may not contain "skipped" records as a result of * editing. The Shapefile may not have polygons with holes. There are probably * other scenarios that this Shapefile reader will not handle properly. * * @author jason.surratt * */ @SuppressFBWarnings(value = "DESERIALIZATION_GADGET", justification = "verified read/writeObject") public class ShapefileReader implements GeometryInputStream, ShapefileGeometryCollection, Serializable { static class LocalIterator implements Iterator<WritableGeometry> { private int currentIndex = 0; private ShapefileReader parent; public LocalIterator(ShapefileReader parent) { this.parent = parent; } @Override public boolean hasNext() { return currentIndex < parent.size(); } @Override public WritableGeometry next() { if (currentIndex >= parent.size()) { throw new NoSuchElementException("End of iterator"); } return parent.get(currentIndex++); } @Override public void remove() { throw new UnsupportedOperationException(); } } static class ReadOnlyLocalIterator implements Iterator<Geometry> { private int currentIndex = 0; private ShapefileReader parent; public ReadOnlyLocalIterator(ShapefileReader parent) { this.parent = parent; } @Override public boolean hasNext() { return currentIndex < parent.size(); } @Override public Geometry next() { if (currentIndex >= parent.size()) { throw new NoSuchElementException("End of iterator"); } return parent.get(currentIndex++); } @Override public void remove() { throw new UnsupportedOperationException(); } } private enum Source { FILE, HDFS, INVALID } private static final long serialVersionUID = 1L; private static WritableGeometry convertToGeometry(JShape shape) { WritableGeometry result; switch (shape.getType()) { case JShape.POINT: JPoint p = (JPoint) shape; result = GeometryFactory.createPoint(p.getX(), p.getY()); break; case JShape.POINTZ: JPointZ pz = (JPointZ) shape; result = GeometryFactory.createPoint(pz.getX(), pz.getY(), pz.getZ()); break; case JShape.POLYGON: JPolygon poly = (JPolygon) shape; result = convertToPolygon(poly); break; case JShape.POLYGONZ: JPolygonZ polyZ = (JPolygonZ) shape; result = convertToPolygon(polyZ); break; case JShape.POLYLINE: JPolyLine line = (JPolyLine) shape; result = convertToLineString(line); break; case JShape.POLYLINEZ: JPolyLineZ lineZ = (JPolyLineZ) shape; result = convertToLineString(lineZ); break; default: throw new IllegalArgumentException("Unsupported geometry type."); } return result; } private static WritableLineString convertToLineString(JPolyLine line) { WritableLineString result = GeometryFactory.createLineString(); for (int i = 0; i < line.getPointCount(); i++) { Coord c = line.getPoint(i); result.addPoint(GeometryFactory.createPoint(c.x, c.y)); } return result; } private static WritableLineString convertToLineString(JPolyLineZ line) { WritableLineString result = GeometryFactory.createLineString(); for (int i = 0; i < line.getPointCount(); i++) { Coord c = line.getPoint(i); result.addPoint(GeometryFactory.createPoint(c.x, c.y)); } return result; } private static WritableGeometry convertToPolygon(JPolygon poly) { WritableGeometry result = null; WritableGeometryCollection gc = null; if (poly.getPartCount() > 1) { gc = GeometryFactory.createGeometryCollection(); result = gc; } for (int p = 0; p < poly.getPartCount(); p++) { WritablePolygon r = GeometryFactory.createPolygon(); int part = poly.getPart(p); int end; if (p == poly.getPartCount() - 1) { end = poly.getPointCount(); } else { end = poly.getPart(p + 1); } WritableLinearRing exteriorRing = GeometryFactory.createLinearRing(); for (int i = part; i < end; i++) { Coord c = poly.getPoint(i); exteriorRing.addPoint(GeometryFactory.createPoint(c.x, c.y)); } r.setExteriorRing(exteriorRing); // if there is only one exterior ring if (gc == null) { result = r; } // else if there is more than one exterior ring else { gc.addGeometry(r); } } return result; } private static WritablePolygon convertToPolygon(JPolygonZ poly) { if (poly.getPartCount() == 0) { return null; } WritablePolygon result = GeometryFactory.createPolygon(); for (int part = 0; part < poly.getPartCount(); part++) { WritableLinearRing ring = GeometryFactory.createLinearRing(); int maxPointIndex = (part == poly.getPartCount() - 1) ? poly.getPointCount() : poly.getPart(part + 1); for (int i = poly.getPart(part); i < maxPointIndex; i++) { Coord c = poly.getPoint(i); ring.addPoint(GeometryFactory.createPoint(c.x, c.y, poly.getZ(i))); } if (part == 0) { result.setExteriorRing(ring); } else { result.addInteriorRing(ring); } } return result; } transient int currentIndex = 0; // only one of these should be set at any time. If path is set, use Hadoop, // otherwise use // standard files. String fileName = null; transient ESRILayer shpFile; Source source = Source.INVALID; public ShapefileReader(Path path) throws IOException { load(path); } /** * Unfortunately Shapefiles require random access to multiple files. At this * point we will assume that the files are on the local filesystem, not HDFS. * * @param shpFilename * @throws IOException */ public ShapefileReader(String shpFilename) throws IOException { load(shpFilename); } @Override public WritableGeometry get(int index) { if (index >= shpFile.getNumRecords() || index < 0) { throw new IllegalArgumentException( String.format("Index out of range. (%d, %d)", index, shpFile.getNumRecords())); } WritableGeometry g = convertToGeometry(shpFile.getShape(index)); List<?> attributes; try { attributes = shpFile.getTable().getRow(index); } catch (Exception e) { e.printStackTrace(); return null; } String[] columns = shpFile.getDataColumns(); for (int i = 0; i < attributes.size(); i++) { Object a = attributes.get(i); if (a != null) { g.setAttribute(columns[i], a.toString()); } else { g.setAttribute(columns[i], null); } } return g; } @Override public String getProjection() { return shpFile.getProjection(); } /* * (non-Javadoc) * * @see com.spadac.Geometry.GeometryInputStream#hasNext() */ @Override public boolean hasNext() { return (currentIndex < shpFile.getNumRecords()); } @Override public Iterator<WritableGeometry> iterator() { return new LocalIterator(this); } public Iterator<Geometry> readOnlyIterator() { return new ReadOnlyLocalIterator(this); } private void load(Path path) throws IOException { fileName = path.toString(); source = Source.HDFS; currentIndex = 0; try { String baseName = ESRILayer.getBaseName(path.toString()); FileSystem fs = HadoopFileUtils.getFileSystem(path); SeekableHdfsInput shp = new SeekableHdfsInput(path); SeekableHdfsInput shx = new SeekableHdfsInput(new Path(baseName + ".shx")); SeekableHdfsInput dbf = new SeekableHdfsInput(new Path(baseName + ".dbf")); FSDataInputStream prj = fs.open(new Path(baseName + ".prj")); try { shpFile = ESRILayer.open(shp, shx, dbf, prj); } finally { prj.close(); } } catch (FormatException e) { throw new IOException("Shapefile format error", e); } catch (DbaseException e) { throw new IOException("Error reading shapefile", e); } reset(); } private void load(String shpFilename) throws IOException { fileName = shpFilename; source = Source.FILE; currentIndex = 0; try { shpFile = ESRILayer.open(shpFilename); } catch (FormatException e) { throw new IOException("Shapefile format error", e); } catch (DbaseException e) { throw new IOException("Error reading shapefile", e); } reset(); } /* * (non-Javadoc) * * @see com.spadac.Geometry.GeometryInputStream#next() */ @Override public WritableGeometry next() { if (currentIndex < shpFile.getNumRecords()) { return get(currentIndex++); } throw new NoSuchElementException("End of iterator"); } private void writeObject(ObjectOutputStream out) throws IOException { out.writeUTF(fileName); out.writeInt(source.ordinal()); } @SuppressFBWarnings(value = { "WEAK_FILENAMEUTILS", "PATH_TRAVERSAL_IN" }, justification = "Correctly filtered parameters") private void readObject(ObjectInputStream in) throws ClassNotFoundException, IOException { fileName = in.readUTF(); source = Source.values()[in.readInt()]; if (source == Source.FILE) { File f = new File(FilenameUtils.getFullPath(fileName), FilenameUtils.getName(fileName)); if (f.exists()) { load(fileName); } } else { if (HadoopFileUtils.exists(fileName)) { load(new Path(fileName)); } } } /* * (non-Javadoc) * * @see java.util.Iterator#remove() */ @Override public void remove() { throw new UnsupportedOperationException(); } public void reset() { currentIndex = 0; } @Override public int size() { return shpFile.getCount(); } @Override public void close() { if (shpFile != null) { try { shpFile.close(); } catch (IOException e) { e.printStackTrace(); } shpFile = null; } } @Override protected void finalize() { if (shpFile != null) { try { shpFile.close(); } catch (IOException e) { } } } }