List of usage examples for java.util Vector add
public synchronized boolean add(E e)
From source file:mzmatch.ipeak.normalisation.VanDeSompele.java
public static void main(String args[]) { try {/*from w w w . j ava 2 s .co m*/ Tool.init(); // parse the commandline options Options options = new Options(); CmdLineParser cmdline = new CmdLineParser(options); // check whether we need to show the help cmdline.parse(args); if (options.help) { Tool.printHeader(System.out, application, version); cmdline.printUsage(System.out, ""); return; } if (options.verbose) { Tool.printHeader(System.out, application, version); cmdline.printOptions(); } // check the command-line parameters { // if the output directories do not exist, create them if (options.output != null) Tool.createFilePath(options.output, true); } // load the data if (options.verbose) System.out.println("Loading data"); ParseResult result = PeakMLParser.parse(new FileInputStream(options.input), true); Header header = result.header; IPeakSet<IPeakSet<? extends IPeak>> peaksets = (IPeakSet<IPeakSet<? extends IPeak>>) result.measurement; int nrmeasurements = header.getNrMeasurementInfos(); // remove the stability factor annotation for (IPeak peak : peaksets) peak.removeAnnotation("stability factor"); // load the database if (options.verbose) System.out.println("Loading the molecule database"); HashMap<String, Molecule> database = MoleculeIO.parseXml(new FileInputStream(options.database)); // filter the set to include only identifiable metabolites if (options.verbose) System.out.println("Creating selection"); Vector<IPeakSet<? extends IPeak>> selection = new Vector<IPeakSet<? extends IPeak>>(); for (Molecule molecule : database.values()) { double mass = molecule.getMass(Mass.MONOISOTOPIC); double delta = PeriodicTable.PPM(mass, options.ppm); // get the most intense peak containing all the measurements Vector<IPeakSet<? extends IPeak>> neighbourhoud = peaksets.getPeaksInMassRange(mass - delta, mass + delta); Collections.sort(neighbourhoud, IPeak.sort_intensity_descending); for (IPeakSet<? extends IPeak> neighbour : neighbourhoud) if (count(neighbour) == nrmeasurements) { selection.add(neighbour); break; } } // calculate the stability factor for each peak in the selection if (options.verbose) System.out.println("Calculating stability factors"); for (int peakid1 = 0; peakid1 < selection.size(); ++peakid1) { double stddeviations[] = new double[selection.size()]; IPeakSet<? extends IPeak> peakset1 = selection.get(peakid1); for (int peakid2 = 0; peakid2 < selection.size(); ++peakid2) { IPeakSet<? extends IPeak> peakset2 = selection.get(peakid2); double values[] = new double[nrmeasurements]; for (int measurementid = 0; measurementid < nrmeasurements; ++measurementid) { int measurementid1 = peakset1.get(measurementid).getMeasurementID(); int setid1 = header.indexOfSetInfo(header.getSetInfoForMeasurementID(measurementid1)); int measurementid2 = peakset2.get(measurementid).getMeasurementID(); int setid2 = header.indexOfSetInfo(header.getSetInfoForMeasurementID(measurementid2)); if (setid1 != setid2 || measurementid1 != measurementid2) System.err.println("[WARNING]: differing setid or spectrumid for comparison"); values[measurementid] = Math.log(peakset1.get(measurementid).getIntensity() / peakset2.get(measurementid).getIntensity()) / Math.log(2); } stddeviations[peakid2] = Statistical.stddev(values); } peakset1.addAnnotation("stability factor", Statistical.mean(stddeviations)); } // sort on the stability factor Collections.sort(selection, new IPeak.AnnotationAscending("stability factor")); // take the top 10% and calculate the geometric mean if (options.verbose) System.out.println("Calculating normalisation factors"); int nrselected = (int) (0.1 * selection.size()); if (nrselected < 10) nrselected = (10 < selection.size() ? 10 : selection.size()); double normalization_factors[] = new double[nrmeasurements]; for (int measurementid = 0; measurementid < nrmeasurements; ++measurementid) { double values[] = new double[nrselected]; for (int i = 0; i < nrselected; ++i) { IPeak peak = selection.get(i).get(measurementid); values[i] = peak.getIntensity(); } normalization_factors[measurementid] = Statistical.geomean(values); } // scale the found normalization factors double maxnf = Statistical.max(normalization_factors); for (int sampleid = 0; sampleid < nrmeasurements; ++sampleid) normalization_factors[sampleid] /= maxnf; // write the selection if needed if (options.selection != null) { if (options.verbose) System.out.println("Writing original selection data"); PeakMLWriter.write(result.header, selection, null, new GZIPOutputStream(new FileOutputStream(options.selection)), null); } // normalize all the peaks if (options.verbose) System.out.println("Normalizing all the entries"); for (IPeakSet<? extends IPeak> peakset : peaksets) { for (int measurementid = 0; measurementid < nrmeasurements; ++measurementid) { // TODO why did I do this again ? int id = 0; int setid = 0; int spectrumid = 0; for (int i = 0; i < header.getNrSetInfos(); ++i) { SetInfo set = header.getSetInfos().get(i); if (id + set.getNrMeasurementIDs() > measurementid) { setid = i; spectrumid = measurementid - id; break; } else id += set.getNrMeasurementIDs(); } MassChromatogram<Peak> masschromatogram = null; for (IPeak p : peakset) { int mymeasurementid = p.getMeasurementID(); int mysetid = header.indexOfSetInfo(header.getSetInfoForMeasurementID(mymeasurementid)); if (mysetid == setid && mymeasurementid == spectrumid) { masschromatogram = (MassChromatogram<Peak>) p; break; } } if (masschromatogram == null) continue; for (IPeak peak : masschromatogram.getPeaks()) peak.setIntensity(peak.getIntensity() / normalization_factors[measurementid]); } } // write the selection if needed if (options.selection_normalized != null) { if (options.verbose) System.out.println("Writing the normalized selection data"); PeakMLWriter.write(result.header, selection, null, new GZIPOutputStream(new FileOutputStream(options.selection_normalized)), null); } // write the factors if needed if (options.factors != null) { if (options.verbose) System.out.println("Writing the normalization factors"); PrintStream out = new PrintStream(options.factors); for (int measurementid = 0; measurementid < nrmeasurements; ++measurementid) out.println(header.getMeasurementInfo(measurementid).getLabel() + "\t" + normalization_factors[measurementid]); } // write the plot if needed if (options.img != null) { if (options.verbose) System.out.println("Writing the graph"); DefaultCategoryDataset dataset = new DefaultCategoryDataset(); JFreeChart linechart = ChartFactory.createLineChart(null, "measurement", "normalization factor", dataset, PlotOrientation.VERTICAL, false, // legend false, // tooltips false // urls ); CategoryPlot plot = (CategoryPlot) linechart.getPlot(); CategoryAxis axis = (CategoryAxis) plot.getDomainAxis(); axis.setCategoryLabelPositions(CategoryLabelPositions.UP_45); LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer(); renderer.setSeriesShapesFilled(0, true); renderer.setSeriesShapesVisible(0, true); linechart.setBackgroundPaint(Color.WHITE); linechart.setBorderVisible(false); linechart.setAntiAlias(true); plot.setBackgroundPaint(Color.WHITE); plot.setDomainGridlinesVisible(true); plot.setRangeGridlinesVisible(true); // create the datasets for (int measurementid = 0; measurementid < nrmeasurements; ++measurementid) dataset.addValue(normalization_factors[measurementid], "", header.getMeasurementInfo(measurementid).getLabel()); JFreeChartTools.writeAsPDF(new FileOutputStream(options.img), linechart, 800, 500); } // write the normalized values if (options.verbose) System.out.println("Writing the normalized data"); PeakMLWriter.write(result.header, peaksets.getPeaks(), null, new GZIPOutputStream(new FileOutputStream(options.output)), null); } catch (Exception e) { Tool.unexpectedError(e, application); } }
From source file:edu.ku.brc.specify.tools.datamodelgenerator.DataDict.java
/** * @param args/*from w w w. j av a 2s. com*/ */ @SuppressWarnings("unchecked") public static void main(String[] args) { try { DataDict dd = new DataDict(); dd.processSpecify5Schema(); dd.compareSchemas(); //dd.writeSp5Tables(); Vector<String> names = new Vector<String>(); Element doc = XMLHelper .readFileToDOM4J(new File("src/edu/ku/brc/specify/tools/datamodelgenerator/datadict.xml")); Element div = (Element) doc.selectSingleNode("//div[@class='plain']"); if (div != null) { for (Object linkObj : div.selectNodes("p/a")) { //System.out.println(linkObj); String href = XMLHelper.getAttr((Element) linkObj, "href", null); if (StringUtils.isNotEmpty(href)) { //System.out.println("["+href+"]"); names.add(href); } } } for (Element table : (List<Element>) div.selectNodes("table")) { dd.processTable(table); break; } //dd.writeTables(); } catch (Exception ex) { edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(DataDict.class, ex); ex.printStackTrace(); } }
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
public static void main(String[] args) throws IOException { final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();//from ww w. j a v a 2s. co m System.exit(1); } if (paths.length > 1 && !params.checkInputOutput()) { printUsage(); System.exit(1); } final Path inputFile = params.getInputPath(); int count = params.getInt("count", 1); double closeness = params.getFloat("closeness", -1.0f); final Point[] queryPoints = closeness < 0 ? params.getShapes("point", new Point()) : new Point[count]; final FileSystem fs = inputFile.getFileSystem(params); final int k = params.getInt("k", 1); int concurrency = params.getInt("concurrency", 100); if (k == 0) { LOG.warn("k = 0"); } if (queryPoints.length == 0) { printUsage(); throw new RuntimeException("Illegal arguments"); } final Path outputPath = paths.length > 1 ? paths[1] : null; if (closeness >= 0) { // Get query points according to its closeness to grid intersections GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inputFile); long seed = params.getLong("seed", System.currentTimeMillis()); Random random = new Random(seed); for (int i = 0; i < count; i++) { int i_block = random.nextInt(gindex.size()); int direction = random.nextInt(4); // Generate a point in the given direction // Get center point (x, y) Iterator<Partition> iterator = gindex.iterator(); while (i_block-- >= 0) iterator.next(); Partition partition = iterator.next(); double cx = (partition.x1 + partition.x2) / 2; double cy = (partition.y1 + partition.y2) / 2; double cw = partition.x2 - partition.x1; double ch = partition.y2 - partition.y1; int signx = ((direction & 1) == 0) ? 1 : -1; int signy = ((direction & 2) == 1) ? 1 : -1; double x = cx + cw * closeness / 2 * signx; double y = cy + ch * closeness / 2 * signy; queryPoints[i] = new Point(x, y); } } final BooleanWritable exceptionHappened = new BooleanWritable(); Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() { public void uncaughtException(Thread th, Throwable ex) { ex.printStackTrace(); exceptionHappened.set(true); } }; // Run each query in a separate thread final Vector<Thread> threads = new Vector<Thread>(); for (int i = 0; i < queryPoints.length; i++) { Thread thread = new Thread() { @Override public void run() { try { Point query_point = queryPoints[threads.indexOf(this)]; OperationsParams newParams = new OperationsParams(params); OperationsParams.setShape(newParams, "point", query_point); Job job = knn(inputFile, outputPath, params); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }; thread.setUncaughtExceptionHandler(h); threads.add(thread); } long t1 = System.currentTimeMillis(); do { // Ensure that there is at least MaxConcurrentThreads running int i = 0; while (i < concurrency && i < threads.size()) { Thread.State state = threads.elementAt(i).getState(); if (state == Thread.State.TERMINATED) { // Thread already terminated, remove from the queue threads.remove(i); } else if (state == Thread.State.NEW) { // Start the thread and move to next one threads.elementAt(i++).start(); } else { // Thread is still running, skip over it i++; } } if (!threads.isEmpty()) { try { // Sleep for 10 seconds or until the first thread terminates threads.firstElement().join(10000); } catch (InterruptedException e) { e.printStackTrace(); } } } while (!threads.isEmpty()); long t2 = System.currentTimeMillis(); if (exceptionHappened.get()) throw new RuntimeException("Not all jobs finished correctly"); System.out.println("Time for " + queryPoints.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Total iterations: " + TotalIterations); }
From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java
public static void main(String[] args) throws IOException { args = new String[10]; args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife/yyyy-MM/2008-05"; args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result"; args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory"; args[3] = "interval:2008-05-01,2008-05-30"; args[4] = "time:month"; args[5] = "k:1"; args[6] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883"; args[7] = "-overwrite"; args[8] = "-local";// "-no-local"; args[9] = "point:39.9119983,116.606835"; final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();/* w w w . j a va 2 s . com*/ System.exit(1); } if (paths.length > 1 && !params.checkInputOutput()) { printUsage(); System.exit(1); } final Path inputFile = params.getInputPath(); int count = params.getInt("count", 1); double closeness = params.getFloat("closeness", -1.0f); final Point[] queryPoints = closeness < 0 ? params.getShapes("point", new Point()) : new Point[count]; final FileSystem fs = inputFile.getFileSystem(params); final int k = params.getInt("k", 1); int concurrency = params.getInt("concurrency", 100); if (k == 0) { LOG.warn("k = 0"); } if (queryPoints.length == 0) { printUsage(); throw new RuntimeException("Illegal arguments"); } final Path outputPath = paths.length > 1 ? paths[1] : null; if (closeness >= 0) { // Get query points according to its closeness to grid intersections GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inputFile); long seed = params.getLong("seed", System.currentTimeMillis()); Random random = new Random(seed); for (int i = 0; i < count; i++) { int i_block = random.nextInt(gindex.size()); int direction = random.nextInt(4); // Generate a point in the given direction // Get center point (x, y) Iterator<Partition> iterator = gindex.iterator(); while (i_block-- >= 0) iterator.next(); Partition partition = iterator.next(); double cx = (partition.x1 + partition.x2) / 2; double cy = (partition.y1 + partition.y2) / 2; double cw = partition.x2 - partition.x1; double ch = partition.y2 - partition.y1; int signx = ((direction & 1) == 0) ? 1 : -1; int signy = ((direction & 2) == 1) ? 1 : -1; double x = cx + cw * closeness / 2 * signx; double y = cy + ch * closeness / 2 * signy; queryPoints[i] = new Point(x, y); } } final BooleanWritable exceptionHappened = new BooleanWritable(); Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() { public void uncaughtException(Thread th, Throwable ex) { ex.printStackTrace(); exceptionHappened.set(true); } }; // Run each query in a separate thread final Vector<Thread> threads = new Vector<Thread>(); for (int i = 0; i < queryPoints.length; i++) { Thread thread = new Thread() { @Override public void run() { try { Point query_point = queryPoints[threads.indexOf(this)]; OperationsParams newParams = new OperationsParams(params); OperationsParams.setShape(newParams, "point", query_point); Job job = knn(inputFile, outputPath, params); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }; thread.setUncaughtExceptionHandler(h); threads.add(thread); } long t1 = System.currentTimeMillis(); do { // Ensure that there is at least MaxConcurrentThreads running int i = 0; while (i < concurrency && i < threads.size()) { Thread.State state = threads.elementAt(i).getState(); if (state == Thread.State.TERMINATED) { // Thread already terminated, remove from the queue threads.remove(i); } else if (state == Thread.State.NEW) { // Start the thread and move to next one threads.elementAt(i++).start(); } else { // Thread is still running, skip over it i++; } } if (!threads.isEmpty()) { try { // Sleep for 10 seconds or until the first thread terminates threads.firstElement().join(10000); } catch (InterruptedException e) { e.printStackTrace(); } } } while (!threads.isEmpty()); long t2 = System.currentTimeMillis(); if (exceptionHappened.get()) throw new RuntimeException("Not all jobs finished correctly"); System.out.println("Time for " + queryPoints.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Total iterations: " + TotalIterations); }
From source file:com.ricemap.spateDB.operations.RangeQuery.java
public static void main(String[] args) throws IOException { CommandLineArguments cla = new CommandLineArguments(args); final QueryInput query = cla.getQuery(); final Path[] paths = cla.getPaths(); if (paths.length == 0 || (cla.getPrism() == null && cla.getSelectionRatio() < 0.0f)) { printUsage();//from w ww . ja v a 2 s . c om throw new RuntimeException("Illegal parameters"); } JobConf conf = new JobConf(FileMBR.class); final Path inputFile = paths[0]; final FileSystem fs = inputFile.getFileSystem(conf); if (!fs.exists(inputFile)) { printUsage(); throw new RuntimeException("Input file does not exist"); } final Path outputPath = paths.length > 1 ? paths[1] : null; final Prism[] queryRanges = cla.getPrisms(); int concurrency = cla.getConcurrency(); final Shape stockShape = cla.getShape(true); final boolean overwrite = cla.isOverwrite(); final long[] results = new long[queryRanges.length]; final Vector<Thread> threads = new Vector<Thread>(); final BooleanWritable exceptionHappened = new BooleanWritable(); Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() { public void uncaughtException(Thread th, Throwable ex) { ex.printStackTrace(); exceptionHappened.set(true); } }; for (int i = 0; i < queryRanges.length; i++) { Thread t = new Thread() { @Override public void run() { try { int thread_i = threads.indexOf(this); long result_count = rangeQueryMapReduce(fs, inputFile, outputPath, queryRanges[thread_i], stockShape, overwrite, false, query); results[thread_i] = result_count; } catch (IOException e) { throw new RuntimeException(e); } } }; t.setUncaughtExceptionHandler(h); threads.add(t); } long t1 = System.currentTimeMillis(); do { // Ensure that there is at least MaxConcurrentThreads running int i = 0; while (i < concurrency && i < threads.size()) { Thread.State state = threads.elementAt(i).getState(); if (state == Thread.State.TERMINATED) { // Thread already terminated, remove from the queue threads.remove(i); } else if (state == Thread.State.NEW) { // Start the thread and move to next one threads.elementAt(i++).start(); } else { // Thread is still running, skip over it i++; } } if (!threads.isEmpty()) { try { // Sleep for 10 seconds or until the first thread terminates threads.firstElement().join(10000); } catch (InterruptedException e) { e.printStackTrace(); } } } while (!threads.isEmpty()); long t2 = System.currentTimeMillis(); if (exceptionHappened.get()) throw new RuntimeException("Not all jobs finished correctly"); System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.print("Result size: ["); for (long result : results) { System.out.print(result + ", "); } System.out.println("]"); }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
public static void main(String[] args) throws IOException { //./hadoop jar /export/scratch/louai/idea-stHadoop/st-hadoop-uber.jar pknn /mntgIndex/yyyy-MM-dd/2017-08-03 /pknn k:2 point:-78.9659,35.7998 shape:edu.umn.cs.sthadoop.mntg.STPointMntg -overwrite // args = new String[8]; // args[0] = "/export/scratch/mntgData/mntgIndex"; // args[1] = "/export/scratch/mntgData/pknn"; // args[2] = "-overwrite"; // args[3] = "k:10"; // args[4] = "point:-78.9659063204100,35.7903907684998"; // args[5] = "shape:edu.umn.cs.sthadoop.trajectory.STPointTrajectory"; // args[6] = "interval:2017-08-03,2017-08-04"; // args[7] = "-overwrite"; final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();/*from w w w.j a va2 s . c o m*/ System.exit(1); } if (paths.length > 1 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint)) { if (!(inObj instanceof STRectangle)) { LOG.error("Shape is not instance of STPoint or instance of STRectangle"); printUsage(); System.exit(1); } } // path to the spatio-temporal index. List<Path> STPaths = new ArrayList<Path>(); try { STPaths = STRangeQuery.getIndexedSlices(params); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } for (Path input : STPaths) { final Path inputFile = input; int count = params.getInt("count", 1); double closeness = params.getFloat("closeness", -1.0f); final Point[] queryPoints = closeness < 0 ? params.getShapes("point", new Point()) : new Point[count]; final FileSystem fs = inputFile.getFileSystem(params); final int k = params.getInt("k", 1); int concurrency = params.getInt("concurrency", 100); if (k == 0) { LOG.warn("k = 0"); } if (queryPoints.length == 0) { printUsage(); throw new RuntimeException("Illegal arguments"); } final Path outputPath = paths.length > 1 ? new Path(paths[1].toUri() + "-" + input.getName()) : null; if (closeness >= 0) { // Get query points according to its closeness to grid intersections GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inputFile); long seed = params.getLong("seed", System.currentTimeMillis()); Random random = new Random(seed); for (int i = 0; i < count; i++) { int i_block = random.nextInt(gindex.size()); int direction = random.nextInt(4); // Generate a point in the given direction // Get center point (x, y) Iterator<Partition> iterator = gindex.iterator(); while (i_block-- >= 0) iterator.next(); Partition partition = iterator.next(); double cx = (partition.x1 + partition.x2) / 2; double cy = (partition.y1 + partition.y2) / 2; double cw = partition.x2 - partition.x1; double ch = partition.y2 - partition.y1; int signx = ((direction & 1) == 0) ? 1 : -1; int signy = ((direction & 2) == 1) ? 1 : -1; double x = cx + cw * closeness / 2 * signx; double y = cy + ch * closeness / 2 * signy; queryPoints[i] = new Point(x, y); } } final BooleanWritable exceptionHappened = new BooleanWritable(); Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() { public void uncaughtException(Thread th, Throwable ex) { ex.printStackTrace(); exceptionHappened.set(true); } }; // Run each query in a separate thread final Vector<Thread> threads = new Vector<Thread>(); for (int i = 0; i < queryPoints.length; i++) { Thread thread = new Thread() { @Override public void run() { try { Point query_point = queryPoints[threads.indexOf(this)]; OperationsParams newParams = new OperationsParams(params); OperationsParams.setShape(newParams, "point", query_point); Job job = knn(inputFile, outputPath, params); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }; thread.setUncaughtExceptionHandler(h); threads.add(thread); } long t1 = System.currentTimeMillis(); do { // Ensure that there is at least MaxConcurrentThreads running int i = 0; while (i < concurrency && i < threads.size()) { Thread.State state = threads.elementAt(i).getState(); if (state == Thread.State.TERMINATED) { // Thread already terminated, remove from the queue threads.remove(i); } else if (state == Thread.State.NEW) { // Start the thread and move to next one threads.elementAt(i++).start(); } else { // Thread is still running, skip over it i++; } } if (!threads.isEmpty()) { try { // Sleep for 10 seconds or until the first thread terminates threads.firstElement().join(10000); } catch (InterruptedException e) { e.printStackTrace(); } } } while (!threads.isEmpty()); long t2 = System.currentTimeMillis(); if (exceptionHappened.get()) throw new RuntimeException("Not all jobs finished correctly"); System.out.println("Time for " + queryPoints.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Total iterations: " + TotalIterations); } }
From source file:net.java.sen.tools.MkSenDic.java
/** * Build sen dictionary.//w w w .j a v a2s . co m * * @param args * custom dictionary files. see dic/build.xml. */ public static void main(String args[]) { ResourceBundle rb = ResourceBundle.getBundle("dictionary"); DictionaryMaker dm1 = new DictionaryMaker(); DictionaryMaker dm2 = new DictionaryMaker(); DictionaryMaker dm3 = new DictionaryMaker(); // 1st field information of connect file. Vector rule1 = new Vector(); // 2nd field information of connect file. Vector rule2 = new Vector(); // 3rd field information of connect file. Vector rule3 = new Vector(); // 4th field information of connect file. // this field shows cost of morpheme connection // [size3*(x3*size2+x2)+x1] // [size3*(Attr1*size2+Attr2)+Attl] short score[] = new short[20131]; long start = System.currentTimeMillis(); // ///////////////////////////////////////// // // Step1. Loading connetion file. // log.info("(1/7): reading connection matrix ... "); try { log.info("connection file = " + rb.getString("text_connection_file")); log.info("charset = " + rb.getString("dic.charset")); CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")), rb.getString("dic.charset")); String t[]; int line = 0; while ((t = csvparser.nextTokens()) != null) { if (t.length < 4) { log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line); log.warn(rb.getString("text_connection_file") + "may be broken."); break; } dm1.add(t[0]); rule1.add(t[0]); dm2.add(t[1]); rule2.add(t[1]); dm3.add(t[2]); rule3.add(t[2]); if (line == score.length) { score = resize(score); } score[line++] = (short) Integer.parseInt(t[3]); } // ///////////////////////////////////////// // // Step2. Building internal dictionary // log.info("(2/7): building type dictionary ... "); dm1.build(); dm2.build(); dm3.build(); // if you want check specified morpheme, you uncomment and modify // following line: /* * System.out.print("22="); dm3.getById(22); * System.out.print("368="); dm3.getById(368); * * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * DictionaryMaker.debug = true; * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?")); */ } catch (IOException e) { e.printStackTrace(); System.exit(0); } // ------------------------------------------------- int size1 = dm1.size(); int size2 = dm2.size(); int size3 = dm3.size(); int ruleSize = rule1.size(); short matrix[] = new short[size1 * size2 * size3]; short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost")); // ///////////////////////////////////////// // // Step3. Writing Connection Matrix // log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = " + size1 * size2 * size3 + ") ..."); for (int i = 0; i < (int) (size1 * size2 * size3); i++) matrix[i] = default_cost; for (int i = 0; i < ruleSize; i++) { Vector r1 = dm1.getRuleIdList((String) rule1.get(i)); Vector r2 = dm2.getRuleIdList((String) rule2.get(i)); Vector r3 = dm3.getRuleIdList((String) rule3.get(i)); for (Iterator i1 = r1.iterator(); i1.hasNext();) { int ii1 = ((Integer) i1.next()).intValue(); for (Iterator i2 = r2.iterator(); i2.hasNext();) { int ii2 = ((Integer) i2.next()).intValue(); for (Iterator i3 = r3.iterator(); i3.hasNext();) { int ii3 = ((Integer) i3.next()).intValue(); int pos = size3 * (size2 * ii1 + ii2) + ii3; matrix[pos] = score[i]; } } } } try { DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file")))); out.writeShort(size1); out.writeShort(size2); out.writeShort(size3); for (int i1 = 0; i1 < size1; i1++) for (int i2 = 0; i2 < size2; i2++) for (int i3 = 0; i3 < size3; i3++) { out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]); // if (matrix[size3 * (size2 * i1 + i2) + i3] != // default_cost) { // } } out.close(); } catch (IOException e) { e.printStackTrace(); System.exit(0); } matrix = null; score = null; // ------------------------------------------------- int pos_start = Integer.parseInt(rb.getString("pos_start")); int pos_size = Integer.parseInt(rb.getString("pos_size")); int di = 0; int offset = 0; ArrayList dicList = new ArrayList(); // ///////////////////////////////////////// // // Step4. Reading Morpheme Information // log.info("(4/7): reading morpheme information ... "); String t = null; String[] csv = null; try { // writer for feature file. BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset"))); log.info("load dic: " + rb.getString("text_dic_file")); BufferedReader dicStream = null; int custom_dic = -1; if (args.length == 0) { dicStream = new BufferedReader(new InputStreamReader( new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset"))); } else { custom_dic = 0; dicStream = new BufferedReader( new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } int line = 0; CSVData key_b = new CSVData(); CSVData pos_b = new CSVData(); while (true) { t = dicStream.readLine(); if (t == null) { dicStream.close(); custom_dic++; if (args.length == custom_dic) { break; } else { // read custum dictionary log.info("load dic: " + "args[custum_dic]"); dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } continue; } CSVParser parser = new CSVParser(t); csv = parser.nextTokens(); if (csv.length < (pos_size + pos_start)) { throw new RuntimeException("format error:" + t); } key_b.clear(); pos_b.clear(); for (int i = pos_start; i < (pos_start + pos_size - 1); i++) { key_b.append(csv[i]); pos_b.append(csv[i]); } key_b.append(csv[pos_start + pos_size - 1]); pos_b.append(csv[pos_start + pos_size - 1]); for (int i = pos_start + pos_size; i < (csv.length - 1); i++) { pos_b.append(csv[i]); } pos_b.append(csv[csv.length - 1]); CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(key_b.toString()); token.rcAttr1 = (short) dm2.getDicId(key_b.toString()); token.lcAttr = (short) dm3.getDicId(key_b.toString()); token.posid = 0; token.posID = offset; token.length = (short) csv[0].length(); token.cost = (short) Integer.parseInt(csv[1]); dicList.add(new PairObject(csv[0], token)); byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset")); offset += (b.length + 1); String pos_b_str = pos_b.toString(); bw.write(pos_b_str, 0, pos_b_str.length()); // bw.write(b, 0, b.length); bw.write(0); if (++di % 50000 == 0) log.info("" + di + "... "); } bw.close(); // ----end of writing feature.cha ---- } catch (Exception e) { log.error("Error: " + t); e.printStackTrace(); System.exit(1); } rule1 = null; rule2 = null; rule3 = null; // ///////////////////////////////////////// // // Step5. Sort lexs and write to file // log.info("(5/7): sorting lex... "); int value[] = new int[dicList.size()]; char key[][] = new char[dicList.size()][]; int spos = 0; int dsize = 0; int bsize = 0; String prev = ""; Collections.sort(dicList); // ///////////////////////////////////////// // // Step6. Writing Token Information // log.info("(6/7): writing token... "); try { // writer for token file. DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("token_file")))); // writing 'bos' and 'eos' and 'unknown' token. CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos")); token.posID = -1; token.write(out); log.info("key size = " + key.length); for (int i = 0; i < key.length; i++) { String k = (String) ((PairObject) dicList.get(i)).key; if (!prev.equals(k) && i != 0) { key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; bsize = 1; spos = i; } else { bsize++; } prev = (String) ((PairObject) dicList.get(i)).key; ((CToken) (((PairObject) dicList.get(i)).value)).write(out); } out.flush(); out.close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; dm1 = null; dm2 = null; dm3 = null; dicList = null; // ///////////////////////////////////////// // // Step7. Build Double Array // log.info("(7/7): building Double-Array (size = " + dsize + ") ..."); DoubleArrayTrie da = new DoubleArrayTrie(); da.build(key, null, value, dsize); try { da.save(rb.getString("double_array_file")); } catch (Exception e) { e.printStackTrace(); } log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]"); }
From source file:it.acubelab.smaph.learn.TuneModel.java
public static void main(String[] args) throws Exception { Locale.setDefault(Locale.US); String freebKey = "<FREEBASE_KEY>"; String bingKey = "<BING_KEY>"; WikipediaApiInterface wikiApi = new WikipediaApiInterface("benchmark/cache/wid.cache", "benchmark/cache/redirect.cache"); FreebaseApi freebApi = new FreebaseApi(freebKey, "freeb.cache"); Vector<ModelConfigurationResult> bestEQFModels = new Vector<>(); Vector<ModelConfigurationResult> bestEFModels = new Vector<>(); int wikiSearchTopK = 5; // <======== mind this double gamma = 1.0; double C = 1.0; for (double editDistanceThr = 0.7; editDistanceThr <= 0.7; editDistanceThr += 0.1) { WikipediaToFreebase wikiToFreebase = new WikipediaToFreebase("mapdb"); SmaphAnnotator bingAnnotator = GenerateTrainingAndTest.getDefaultBingAnnotator(wikiApi, wikiToFreebase, editDistanceThr, wikiSearchTopK, bingKey); SmaphAnnotator.setCache("bing.cache.full"); BinaryExampleGatherer trainEntityFilterGatherer = new BinaryExampleGatherer(); BinaryExampleGatherer develEntityFilterGatherer = new BinaryExampleGatherer(); GenerateTrainingAndTest.gatherExamplesTrainingAndDevel(bingAnnotator, trainEntityFilterGatherer, develEntityFilterGatherer, wikiApi, wikiToFreebase, freebApi); SmaphAnnotator.unSetCache();// www . java 2 s .co m Pair<Vector<ModelConfigurationResult>, ModelConfigurationResult> modelAndStatsEF = trainIterative( trainEntityFilterGatherer, develEntityFilterGatherer, editDistanceThr, OptimizaionProfiles.MAXIMIZE_MACRO_F1, -1.0, gamma, C); /* * Pair<Vector<ModelConfigurationResult>, ModelConfigurationResult> * modelAndStatsEF = trainIterative( trainEmptyQueryGatherer, * develEmptyQueryGatherer, editDistanceThr, * OptimizaionProfiles.MAXIMIZE_TN, 0.02); */ /* * for (ModelConfigurationResult res : modelAndStatsEQF.first) * System.out.println(res.getReadable()); */ for (ModelConfigurationResult res : modelAndStatsEF.first) System.out.println(res.getReadable()); /* bestEQFModels.add(modelAndStatsEQF.second); */ bestEFModels.add(modelAndStatsEF.second); System.gc(); } for (ModelConfigurationResult modelAndStatsEQF : bestEQFModels) System.out.println("Best EQF:" + modelAndStatsEQF.getReadable()); for (ModelConfigurationResult modelAndStatsEF : bestEFModels) System.out.println("Best EF:" + modelAndStatsEF.getReadable()); System.out.println("Flushing Bing API..."); SmaphAnnotator.flush(); wikiApi.flush(); }
From source file:net.iiit.siel.analysis.lang.LanguageIdentifier.java
/** * The main method.//from ww w . jav a 2 s . c o m * * @param args the arguments */ public static void main(String args[]) { String usage = "Usage: LanguageIdentifier " + "[-identifyrows filename maxlines] " + "[-identifyfile charset filename] " + "[-identifyfileset charset files] " + "[-identifytext text] " + "[-identifyurl url]"; int command = 0; final int IDFILE = 1; final int IDTEXT = 2; final int IDURL = 3; final int IDFILESET = 4; final int IDROWS = 5; Vector fileset = new Vector(); String filename = ""; String charset = ""; String url = ""; String text = ""; int max = 0; // TODO niket writing test args here.. /* args = new String[2]; args[0] = "-identifyurl"; args[1] = "file:/home1/niket/TamilSamplePage.html"; //args[2] = "/home1/niket/nutch-clia/input.txt"; */ // TODO niket end here if (args.length == 0) { System.err.println(usage); System.exit(-1); } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].equals("-identifyfile")) { command = IDFILE; charset = args[++i]; filename = args[++i]; } if (args[i].equals("-identifyurl")) { command = IDURL; filename = args[++i]; } if (args[i].equals("-identifyrows")) { command = IDROWS; filename = args[++i]; max = Integer.parseInt(args[++i]); } if (args[i].equals("-identifytext")) { command = IDTEXT; for (i++; i < args.length - 1; i++) text += args[i] + " "; } if (args[i].equals("-identifyfileset")) { command = IDFILESET; charset = args[++i]; for (i++; i < args.length; i++) { File[] files = null; File f = new File(args[i]); if (f.isDirectory()) { files = f.listFiles(); } else { files = new File[] { f }; } for (int j = 0; j < files.length; j++) { fileset.add(files[j].getAbsolutePath()); } } } } Configuration conf = NutchConfiguration.create(); String lang = null; LanguageIdentifier idfr = new LanguageIdentifier(conf); File f; FileInputStream fis; try { switch (command) { case IDTEXT: lang = idfr.identify(text); System.out.println("Lang :" + lang); break; case IDFILE: f = new File(filename); fis = new FileInputStream(f); lang = idfr.identify(fis, charset); fis.close(); break; case IDURL: lang = LangIdentifierUtility.IdentifyLangFromURLDirectly(filename); /* * our url identifier is confused or couldn't identify lang from * URL */ if (lang == null || lang.equalsIgnoreCase("en")) { System.out.println("Ambuguity in identifying language from URL"); } else { System.out.println("Lang was identified(using URL) as: " + lang); } break; case IDROWS: f = new File(filename); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line; while (max > 0 && (line = br.readLine()) != null) { line = line.trim(); if (line.length() > 2) { max--; lang = idfr.identify(line); System.out.println("R=" + lang + ":" + line); } } br.close(); System.exit(0); break; case IDFILESET: /* * used for benchs for (int j=128; j<=524288; j*=2) { long start * = System.currentTimeMillis(); idfr.analyzeLength = j; */ System.out.println("FILESET"); Iterator i = fileset.iterator(); while (i.hasNext()) { try { filename = (String) i.next(); f = new File(filename); fis = new FileInputStream(f); lang = idfr.identify(fis, charset); fis.close(); } catch (Exception e) { System.out.println(e); } System.out.println(filename + " was identified as " + lang); } /* * used for benchs System.out.println(j + "/" + * (System.currentTimeMillis()-start)); } */ System.exit(0); break; } } catch (Exception e) { System.out.println(e); System.out.println("lang could not be identified properly"); e.printStackTrace(); } System.out.println("text was identified as " + lang); /* * DONOT delete the next few lines, they should be enabled, when a lang. * mapping map needs to be generated. TODO this is for printing * the hashMapRangeLangIDTable only * * idfr.langMarkerObject.printHashmapTableWithFormatting(); * * System.out * .println("\n\n\n Printing english text contents in this file:\n"); * System.out.println(idfr.langMarkerObject.getLangCharacters( * LanguageIdentifierConstants.LangShortNames.ENGLISH * .langShortName()).toString()); * * System.out * .println("\n\n\n Printing telugu text contents in this file:\n"); * System.out.println(idfr.langMarkerObject.getLangCharacters( * LanguageIdentifierConstants.LangShortNames.TELUGU * .langShortName()).toString()); * * System.out * .println("\n\n\n Printing unknown text contents in this file:\n"); * System.out.println(idfr.langMarkerObject.getLangCharacters( * LanguageIdentifierConstants.LangShortNames.UNKNOWN_LANG * .langShortName()).toString()); */ }
From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();//from w w w . jav a 2 s .co m System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("rect") == null) { System.err.println("You must provide a query range"); printUsage(); System.exit(1); } final Path inPath = params.getInputPath(); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outPath, queryParams); jobs.add(job); } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }