List of usage examples for java.util LinkedList addAll
public boolean addAll(Collection<? extends E> c)
From source file:com.yahoo.egads.models.adm.AdaptiveKernelDensityChangePointDetector.java
protected float[] computeKLScore(float residual, int preWindowSize, int postWindowSize, float confidence) { float dKL = 0; float levelThreshold = 0; int len = buffer.size(); // Filling the pre-window if (len < preWindowSize) { buffer.addLast(residual);/*from w w w . j av a2s.c o m*/ postKernelSum.addLast(0F); if (len == (preWindowSize - 1)) { int n = preWindowSize; sumBuffer = ListUtils.sumQ(buffer); sqrSumBuffer = ListUtils.sum2Q(buffer); float temp = (float) Math.max(1e-5, Math.sqrt(2 * (n * sqrSumBuffer - sumBuffer * sumBuffer) / (n * (n - 1)))); ListUtils.repQ(sdBuffer, temp, n); for (float x : buffer) { preKernelSum.addLast(ListUtils.kernelSum(x, buffer, sdBuffer)); } } } else if (len < (preWindowSize + postWindowSize)) { // Filling the post-window sumBuffer = sumBuffer + residual; sqrSumBuffer = sqrSumBuffer + residual * residual; int n = len + 1; float temp = (float) Math.max(1e-5, Math.sqrt(2 * (n * sqrSumBuffer - sumBuffer * sumBuffer) / (n * (n - 1)))); sdBuffer.addLast(temp); LinkedList<Float> tempQ1 = new LinkedList<Float>(); tempQ1.add(residual); LinkedList<Float> tempQ2 = new LinkedList<Float>(); tempQ2.add(temp); ListUtils.addQ(postKernelSum, ListUtils.kernelQ(buffer, tempQ1, tempQ2)); buffer.addLast(residual); preKernelSum.addLast(ListUtils.kernelSubSum(residual, buffer, sdBuffer, 0, preWindowSize - 1)); postKernelSum .addLast(ListUtils.kernelSubSum(residual, buffer, sdBuffer, preWindowSize, buffer.size() - 1)); } else { // updating the pre-stats LinkedList<Float> preRemovedValues = ListUtils.kernelQ(buffer, buffer.subList(0, 1), sdBuffer.subList(0, 1)); ListUtils.subtractQ(preKernelSum, preRemovedValues); LinkedList<Float> midExchangedValues = ListUtils.kernelQ(buffer, buffer.subList(preWindowSize, preWindowSize + 1), sdBuffer.subList(preWindowSize, preWindowSize + 1)); ListUtils.addQ(preKernelSum, midExchangedValues); // Computing the new sd int n = len; sumBuffer += (residual - buffer.getFirst()); sqrSumBuffer += (residual * residual - Math.pow(buffer.getFirst(), 2)); float temp = (float) Math.max(1e-5, Math.sqrt(2 * (n * sqrSumBuffer - sumBuffer * sumBuffer) / (n * (n - 1)))); // updating the post-stats LinkedList<Float> tempQ1 = new LinkedList<Float>(); tempQ1.add(residual); LinkedList<Float> tempQ2 = new LinkedList<Float>(); tempQ2.add(temp); ListUtils.subtractQ(postKernelSum, midExchangedValues); LinkedList<Float> postAddedValues = ListUtils.kernelQ(buffer, tempQ1, tempQ2); ListUtils.addQ(postKernelSum, postAddedValues); // updating the window buffer.addLast(residual); buffer.removeFirst(); sdBuffer.addLast(temp); sdBuffer.removeFirst(); preKernelSum.addLast(ListUtils.kernelSubSum(residual, buffer, sdBuffer, 0, preWindowSize - 1)); postKernelSum.addLast(ListUtils.kernelSubSum(residual, buffer, sdBuffer, preWindowSize, preWindowSize + postWindowSize - 1)); preKernelSum.removeFirst(); postKernelSum.removeFirst(); float eps = 1e-10F; LinkedList<Float> preDensity = ListUtils .maxQ(preKernelSum.subList(preWindowSize, preWindowSize + postWindowSize), eps); LinkedList<Float> postDensity = ListUtils .maxQ(postKernelSum.subList(preWindowSize, preWindowSize + postWindowSize), eps); tempQ1.clear(); tempQ1.addAll(preKernelSum.subList(0, preWindowSize)); tempQ2.clear(); tempQ2.add(1.0F / preWindowSize); ListUtils.multiplyQ(tempQ1, tempQ2); float levelSet = ListUtils.quantile(tempQ1, 1 - confidence); levelThreshold = (float) (-Math.log(levelSet) - Math.log(2 * Math.PI) / 2 - ListUtils.sumLog(sdBuffer.subList(preWindowSize, preWindowSize + postWindowSize)) / postWindowSize); // computing the KL-divergence dKL = (float) ((ListUtils.sumLog(postDensity) - ListUtils.sumLog(preDensity) + Math.log(preWindowSize / postWindowSize)) / postWindowSize + Math.log(levelSet * Math.sqrt(2 * Math.PI)) + ListUtils.sumLog(sdBuffer.subList(preWindowSize, preWindowSize + postWindowSize)) / postWindowSize); } return new float[] { dKL, levelThreshold }; }
From source file:com.redsqirl.workflow.server.Workflow.java
protected List<DataFlowElement> subsetElementToRun(List<String> dataFlowElements) throws Exception { LinkedList<DataFlowElement> elsIn = new LinkedList<DataFlowElement>(); if (!isSchedule() && dataFlowElements.size() < element.size()) { Iterator<DataFlowElement> itIn = getEls(dataFlowElements).iterator(); while (itIn.hasNext()) { DataFlowElement cur = itIn.next(); elsIn = getAllWithoutDuplicate(elsIn, getItAndAllElementsNeeded(cur)); }// w w w . ja v a2 s. c om } else { elsIn.addAll(getEls(dataFlowElements)); } // Run only what have not been calculated in the workflow. List<DataFlowElement> toRun = new LinkedList<DataFlowElement>(); Iterator<DataFlowElement> itE = elsIn.iterator(); while (itE.hasNext()) { DataFlowElement cur = itE.next(); // Never run an element that have no action if (cur.getOozieAction() != null && !toRun.contains(cur)) { boolean haveTobeRun = false; List<DataFlowElement> outAllComp = cur.getAllOutputComponent(); Collection<DFEOutput> outData = cur.getDFEOutput().values(); Map<String, List<DataFlowElement>> outComp = cur.getOutputComponent(); boolean lastElement = outAllComp.size() == 0; // If the current element has output, check if those has to run Iterator<DataFlowElement> itE2 = outAllComp.iterator(); while (itE2.hasNext() && !lastElement) { lastElement = !elsIn.contains(itE2.next()); } if (lastElement) { // Element at the end of what need to run // Check if one element buffered/recorded exist or not // if all elements are temporary and not exist calculate the // element Iterator<DFEOutput> itOutData = outData.iterator(); int nbTemporary = 0; while (itOutData.hasNext() && !haveTobeRun) { DFEOutput outC = itOutData.next(); if ((!SavingState.TEMPORARY.equals(outC.getSavingState())) && !outC.isPathExist()) { haveTobeRun = true; } else if (SavingState.TEMPORARY.equals(outC.getSavingState()) && !outC.isPathExist()) { ++nbTemporary; } } if (nbTemporary == outData.size()) { haveTobeRun = true; } } else { // Check if among the output several elements some are // recorded/buffered and does not exist Iterator<DFEOutput> itOutData = outData.iterator(); while (itOutData.hasNext() && !haveTobeRun) { DFEOutput outC = itOutData.next(); if ((!SavingState.TEMPORARY.equals(outC.getSavingState())) && !outC.isPathExist()) { haveTobeRun = true; } } if (!haveTobeRun) { // Check if among the output several elements to run are // in the list // Check if it is true the corresponded outputs is saved // or not Iterator<String> searchOutIt = outComp.keySet().iterator(); while (searchOutIt.hasNext() && !haveTobeRun) { boolean foundOne = false; String searchOut = searchOutIt.next(); Iterator<DataFlowElement> outCIt = outComp.get(searchOut).iterator(); while (outCIt.hasNext() && !foundOne) { foundOne = elsIn.contains(outCIt.next()); } if (foundOne) { haveTobeRun = !cur.getDFEOutput().get(searchOut).isPathExist(); } } } } if (haveTobeRun) { // If this element have to be run // if one element exist and one recorded/buffered does not // send an error cur.cleanDataOut(); toRun.add(cur); } } } List<DataFlowElement> toRunSort = null; if (toRun != null) { toRunSort = new LinkedList<DataFlowElement>(); Iterator<DataFlowElement> it = element.iterator(); while (it.hasNext()) { DataFlowElement cur = it.next(); if (toRun.contains(cur)) { toRunSort.add(cur); } } } return toRunSort; }
From source file:org.trnltk.experiment.morphology.ambiguity.DataDiffUtil.java
/** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix./* www . j av a2s. c om*/ * * @param list1 Old string to be diffed. * @param list2 New string to be diffed. * @param checklines Speedup flag. If false, then don't run a * line-level diff first to identify the changed areas. * If true, then run a faster slightly less optimal diff. * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ private LinkedList<Diff<T>> diff_compute(List<T> list1, List<T> list2, boolean checklines, long deadline) { LinkedList<Diff<T>> diffs = new LinkedList<Diff<T>>(); if (list1.size() == 0) { // Just add some text (speedup). diffs.add(new Diff(Operation.INSERT, list2)); return diffs; } if (list2.size() == 0) { // Just delete some text (speedup). diffs.add(new Diff(Operation.DELETE, list1)); return diffs; } List<T> longtext = list1.size() > list2.size() ? list1 : list2; List<T> shorttext = list1.size() > list2.size() ? list2 : list1; int i = longtext.indexOf(shorttext); //TODO if (i != -1) { // Shorter text is inside the longer text (speedup). Operation op = (list1.size() > list2.size()) ? Operation.DELETE : Operation.INSERT; diffs.add(new Diff(op, longtext.subList(0, i))); diffs.add(new Diff(Operation.EQUAL, shorttext)); diffs.add(new Diff(op, longtext.subList(i + shorttext.size(), longtext.size()))); return diffs; } if (shorttext.size() == 1) { // Single character string. // After the previous speedup, the character can't be an equality. diffs.add(new Diff(Operation.DELETE, list1)); diffs.add(new Diff(Operation.INSERT, list2)); return diffs; } // Check to see if the problem can be split in two. List<List<T>> hm = diff_halfMatch(list1, list2); if (hm != null) { // A half-match was found, sort out the return data. List<T> text1_a = hm.get(0); List<T> text1_b = hm.get(1); List<T> text2_a = hm.get(2); List<T> text2_b = hm.get(3); List<T> mid_common = hm.get(4); // Send both pairs off for separate processing. LinkedList<Diff<T>> diffs_a = diff_main(text1_a, text2_a, checklines, deadline); LinkedList<Diff<T>> diffs_b = diff_main(text1_b, text2_b, checklines, deadline); // Merge the results. diffs = diffs_a; diffs.add(new Diff<T>(Operation.EQUAL, mid_common)); diffs.addAll(diffs_b); return diffs; } return diff_bisect(list1, list2, deadline); }
From source file:com.ikanow.aleph2.management_db.services.DataBucketCrudService.java
/** Validates whether the new or updated bucket is valid: both in terms of authorization and in terms of format * @param bucket// w w w .j a v a 2 s . c o m * @return * @throws ExecutionException * @throws InterruptedException */ protected Tuple2<DataBucketBean, Collection<BasicMessageBean>> validateBucket(final DataBucketBean bucket, final Optional<DataBucketBean> old_version, boolean do_full_checks, final boolean allow_system_names) throws InterruptedException, ExecutionException { // (will live with this being mutable) final LinkedList<BasicMessageBean> errors = new LinkedList<BasicMessageBean>(); final JsonNode bucket_json = BeanTemplateUtils.toJson(bucket); ///////////////// // PHASE 1 // Check for missing fields ManagementDbErrorUtils.NEW_BUCKET_ERROR_MAP.keySet().stream() .filter(s -> !bucket_json.has(s) || (bucket_json.get(s).isTextual() && bucket_json.get(s).asText().isEmpty())) .forEach(s -> errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.NEW_BUCKET_ERROR_MAP.get(s), Optional.ofNullable(bucket.full_name()).orElse("(unknown)"))))); // We have a full name if we're here, so no check for uniqueness // Check for some bucket path restrictions if (null != bucket.full_name()) { if (!BucketValidationUtils.bucketPathFormatValidationCheck(bucket.full_name())) { errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.BUCKET_FULL_NAME_FORMAT_ERROR, Optional.ofNullable(bucket.full_name()).orElse("(unknown)")))); return Tuples._2T(bucket, errors); // (this is catastrophic obviously) } if (!old_version.isPresent()) { // (create not update) if (do_full_checks) { if (this._underlying_data_bucket_db.get().countObjectsBySpec(CrudUtils .allOf(DataBucketBean.class).when(DataBucketBean::full_name, bucket.full_name())) .get() > 0) { errors.add(MgmtCrudUtils.createValidationError( ErrorUtils.get(ManagementDbErrorUtils.BUCKET_FULL_NAME_UNIQUENESS, Optional.ofNullable(bucket.full_name()).orElse("(unknown)")))); return Tuples._2T(bucket, errors); // (this is catastrophic obviously) } } } } else return Tuples._2T(bucket, errors); // (this is catastrophic obviously) // Some static validation moved into a separate function for testability errors.addAll(BucketValidationUtils.staticValidation(bucket, allow_system_names)); // OK before I do any more stateful checking, going to stop if we have logic errors first if (!errors.isEmpty()) { return Tuples._2T(bucket, errors); } ///////////////// // PHASE 2 //TODO (ALEPH-19): multi buckets - authorization; other - authorization if (do_full_checks) { final CompletableFuture<Collection<BasicMessageBean>> bucket_path_errors_future = validateOtherBucketsInPathChain( bucket); errors.addAll(bucket_path_errors_future.join()); // OK before I do any more stateful checking, going to stop if we have logic errors first if (!errors.isEmpty()) { return Tuples._2T(bucket, errors); } } ///////////////// // PHASE 3 // Finally Check whether I am allowed to update the various fields if old_version.isPresent() if (old_version.isPresent()) { final DataBucketBean old_bucket = old_version.get(); if (!bucket.full_name().equals(old_bucket.full_name())) { errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.BUCKET_UPDATE_FULLNAME_CHANGED, bucket.full_name(), old_bucket.full_name()))); } if (!bucket.owner_id().equals(old_bucket.owner_id())) { errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.BUCKET_UPDATE_OWNERID_CHANGED, bucket.full_name(), old_bucket.owner_id()))); } } ///////////////// // PHASE 4 - DATA SCHEMA NOT MESSAGES AT THIS POINT CAN BE INFO, YOU NEED TO CHECK THE SUCCESS() Tuple2<Map<String, String>, List<BasicMessageBean>> schema_validation = BucketValidationUtils .validateSchema(bucket, _service_context); errors.addAll(schema_validation._2()); return Tuples._2T( BeanTemplateUtils.clone(bucket).with(DataBucketBean::data_locations, schema_validation._1()).done(), errors); }
From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java
/** * @param searchDBKeys//from w w w . j av a 2 s. c om * @return list with all languages for searchDBKeys indexed by lucene * @throws WGAPIException */ private List<WGLanguage> getLanguagesForSearchDBKeys(List searchDBKeys) throws WGAPIException { LinkedList<WGLanguage> languages = new LinkedList<WGLanguage>(); Iterator it = searchDBKeys.iterator(); while (it.hasNext()) { String dbKey = (String) it.next(); WGDatabase db = (WGDatabase) _core.getContentdbs().get(dbKey); if (db != null) { if (_indexedDbs.containsKey(dbKey)) { languages.addAll(db.getLanguages().values()); } } } return languages; }
From source file:org.openscience.cdk.applications.taverna.weka.regression.EvaluateRegressionResultsAsPDFActivity.java
@Override public void work() throws Exception { // Get input//www.j a va 2 s . c o m String[] options = ((String) this.getConfiguration() .getAdditionalProperty(CDKTavernaConstants.PROPERTY_SCATTER_PLOT_OPTIONS)).split(";"); List<File> modelFiles = this.getInputAsFileList(this.INPUT_PORTS[0]); List<Instances> trainDatasets = this.getInputAsList(this.INPUT_PORTS[1], Instances.class); List<Instances> testDatasets = null; if (options[0].equals("" + TEST_TRAININGSET_PORT)) { testDatasets = this.getInputAsList(this.INPUT_PORTS[2], Instances.class); } else { testDatasets = null; } String directory = modelFiles.get(0).getParent(); // Do work ArrayList<String> resultFiles = new ArrayList<String>(); HashMap<UUID, Double> orgClassMap = new HashMap<UUID, Double>(); HashMap<UUID, Double> calcClassMap = new HashMap<UUID, Double>(); WekaTools tools = new WekaTools(); ChartTool chartTool = new ChartTool(); List<Object> rmseCharts = new ArrayList<Object>(); List<Double> trainMeanRMSE = new ArrayList<Double>(); List<Double> testMeanRMSE = new ArrayList<Double>(); List<Double> cvMeanRMSE = new ArrayList<Double>(); DefaultCategoryDataset[] ratioRMSESet = new DefaultCategoryDataset[trainDatasets.size()]; for (int i = 0; i < trainDatasets.size(); i++) { ratioRMSESet[i] = new DefaultCategoryDataset(); } List<Double> trainingSetRatios = null; int fileIDX = 1; while (!modelFiles.isEmpty()) { trainingSetRatios = new ArrayList<Double>(); List<Double> trainRMSE = new ArrayList<Double>(); HashSet<Integer> trainSkippedRMSE = new HashSet<Integer>(); List<Double> testRMSE = new ArrayList<Double>(); HashSet<Integer> testSkippedRMSE = new HashSet<Integer>(); List<Double> cvRMSE = new ArrayList<Double>(); HashSet<Integer> cvSkippedRMSE = new HashSet<Integer>(); List<Object> chartsObjects = new LinkedList<Object>(); File modelFile = null; Classifier classifier = null; String name = ""; for (int j = 0; j < trainDatasets.size(); j++) { LinkedList<Double> predictedValues = new LinkedList<Double>(); LinkedList<Double> orgValues = new LinkedList<Double>(); LinkedList<Double[]> yResidueValues = new LinkedList<Double[]>(); LinkedList<String> yResidueNames = new LinkedList<String>(); if (modelFiles.isEmpty()) { break; } calcClassMap.clear(); modelFile = modelFiles.remove(0); classifier = (Classifier) SerializationHelper.read(modelFile.getPath()); Instances testset = null; if (testDatasets != null) { testset = testDatasets.get(j); } name = classifier.getClass().getSimpleName(); String sum = "Method: " + name + " " + tools.getOptionsFromFile(modelFile, name) + "\n\n"; // Produce training set data Instances trainset = trainDatasets.get(j); Instances trainUUIDSet = Filter.useFilter(trainset, tools.getIDGetter(trainset)); trainset = Filter.useFilter(trainset, tools.getIDRemover(trainset)); double trainingSetRatio = 1.0; if (testset != null) { trainingSetRatio = trainset.numInstances() / (double) (trainset.numInstances() + testset.numInstances()); } trainingSetRatios.add(trainingSetRatio * 100); // Predict for (int k = 0; k < trainset.numInstances(); k++) { UUID uuid = UUID.fromString(trainUUIDSet.instance(k).stringValue(0)); orgClassMap.put(uuid, trainset.instance(k).classValue()); calcClassMap.put(uuid, classifier.classifyInstance(trainset.instance(k))); } // Evaluate Evaluation trainEval = new Evaluation(trainset); trainEval.evaluateModel(classifier, trainset); // Chart data DefaultXYDataset xyDataSet = new DefaultXYDataset(); String trainSeries = "Training Set (RMSE: " + String.format("%.2f", trainEval.rootMeanSquaredError()) + ")"; XYSeries series = new XYSeries(trainSeries); Double[] yTrainResidues = new Double[trainUUIDSet.numInstances()]; Double[] orgTrain = new Double[trainUUIDSet.numInstances()]; Double[] calc = new Double[trainUUIDSet.numInstances()]; for (int k = 0; k < trainUUIDSet.numInstances(); k++) { UUID uuid = UUID.fromString(trainUUIDSet.instance(k).stringValue(0)); orgTrain[k] = orgClassMap.get(uuid); calc[k] = calcClassMap.get(uuid); if (calc[k] != null && orgTrain[k] != null) { series.add(orgTrain[k].doubleValue(), calc[k]); yTrainResidues[k] = calc[k].doubleValue() - orgTrain[k].doubleValue(); } else { ErrorLogger.getInstance().writeError("Can't find value for UUID: " + uuid.toString(), this.getActivityName()); throw new CDKTavernaException(this.getActivityName(), "Can't find value for UUID: " + uuid.toString()); } } orgValues.addAll(Arrays.asList(orgTrain)); predictedValues.addAll(Arrays.asList(calc)); CollectionUtilities.sortTwoArrays(orgTrain, yTrainResidues); yResidueValues.add(yTrainResidues); yResidueNames.add(trainSeries); xyDataSet.addSeries(trainSeries, series.toArray()); // Summary sum += "Training Set:\n"; if (trainEval.rootRelativeSquaredError() > 300) { trainSkippedRMSE.add(j); } trainRMSE.add(trainEval.rootMeanSquaredError()); sum += trainEval.toSummaryString(true); // Produce test set data if (testset != null) { Instances testUUIDSet = Filter.useFilter(testset, tools.getIDGetter(testset)); testset = Filter.useFilter(testset, tools.getIDRemover(testset)); // Predict for (int k = 0; k < testset.numInstances(); k++) { UUID uuid = UUID.fromString(testUUIDSet.instance(k).stringValue(0)); orgClassMap.put(uuid, testset.instance(k).classValue()); calcClassMap.put(uuid, classifier.classifyInstance(testset.instance(k))); } // Evaluate Evaluation testEval = new Evaluation(testset); testEval.evaluateModel(classifier, testset); // Chart data String testSeries = "Test Set (RMSE: " + String.format("%.2f", testEval.rootMeanSquaredError()) + ")"; series = new XYSeries(testSeries); Double[] yTestResidues = new Double[testUUIDSet.numInstances()]; Double[] orgTest = new Double[testUUIDSet.numInstances()]; calc = new Double[testUUIDSet.numInstances()]; for (int k = 0; k < testUUIDSet.numInstances(); k++) { UUID uuid = UUID.fromString(testUUIDSet.instance(k).stringValue(0)); orgTest[k] = orgClassMap.get(uuid); calc[k] = calcClassMap.get(uuid); if (calc[k] != null && orgTest[k] != null) { series.add(orgTest[k].doubleValue(), calc[k].doubleValue()); yTestResidues[k] = calc[k].doubleValue() - orgTest[k].doubleValue(); } else { ErrorLogger.getInstance().writeError("Can't find value for UUID: " + uuid.toString(), this.getActivityName()); throw new CDKTavernaException(this.getActivityName(), "Can't find value for UUID: " + uuid.toString()); } } orgValues.addAll(Arrays.asList(orgTest)); predictedValues.addAll(Arrays.asList(calc)); CollectionUtilities.sortTwoArrays(orgTest, yTestResidues); yResidueValues.add(yTestResidues); yResidueNames.add(testSeries); xyDataSet.addSeries(testSeries, series.toArray()); // Create summary sum += "\nTest Set:\n"; if (testEval.rootRelativeSquaredError() > 300) { testSkippedRMSE.add(j); } testRMSE.add(testEval.rootMeanSquaredError()); sum += testEval.toSummaryString(true); } // Produce cross validation data if (Boolean.parseBoolean(options[1])) { Evaluation cvEval = new Evaluation(trainset); if (testset != null) { Instances fullSet = tools.getFullSet(trainset, testset); cvEval.crossValidateModel(classifier, fullSet, 10, new Random(1)); } else { cvEval.crossValidateModel(classifier, trainset, 10, new Random(1)); } sum += "\n10-fold cross-validation:\n"; if (cvEval.rootRelativeSquaredError() > 300) { cvSkippedRMSE.add(j); } cvRMSE.add(cvEval.rootMeanSquaredError()); sum += cvEval.toSummaryString(true); } // Create scatter plot String header = classifier.getClass().getSimpleName() + "\n Training set ratio: " + String.format("%.2f", trainingSetRatios.get(j)) + "%" + "\n Model name: " + modelFile.getName(); chartsObjects .add(chartTool.createScatterPlot(xyDataSet, header, "Original values", "Predicted values")); // Create residue plot chartsObjects.add(chartTool.createResiduePlot(yResidueValues, header, "Index", "(Predicted - Original)", yResidueNames)); // Create curve Double[] tmpOrg = new Double[orgValues.size()]; tmpOrg = orgValues.toArray(tmpOrg); Double[] tmpPred = new Double[predictedValues.size()]; tmpPred = predictedValues.toArray(tmpPred); CollectionUtilities.sortTwoArrays(tmpOrg, tmpPred); DefaultXYDataset dataSet = new DefaultXYDataset(); String orgName = "Original"; XYSeries orgSeries = new XYSeries(orgName); String predName = "Predicted"; XYSeries predSeries = new XYSeries(predName); for (int k = 0; k < tmpOrg.length; k++) { orgSeries.add((k + 1), tmpOrg[k]); predSeries.add((k + 1), tmpPred[k]); } dataSet.addSeries(orgName, orgSeries.toArray()); dataSet.addSeries(predName, predSeries.toArray()); chartsObjects.add(chartTool.createXYLineChart(header, "Index", "Value", dataSet, true, false)); // Add summary chartsObjects.add(sum); } // Create RMSE Plot DefaultCategoryDataset dataSet = new DefaultCategoryDataset(); double meanRMSE = 0; for (int i = 0; i < trainRMSE.size(); i++) { if (!trainSkippedRMSE.contains(i)) { dataSet.addValue(trainRMSE.get(i), "Training Set", "(" + String.format("%.2f", trainingSetRatios.get(i)) + "%/" + (i + 1) + ")"); ratioRMSESet[i].addValue(trainRMSE.get(i), "Training Set", "(" + String.format("%.2f", trainingSetRatios.get(i)) + "%/" + (i + 1) + "/" + fileIDX + ")"); } meanRMSE += trainRMSE.get(i); } trainMeanRMSE.add(meanRMSE / trainRMSE.size()); meanRMSE = 0; if (!testRMSE.isEmpty()) { for (int i = 0; i < testRMSE.size(); i++) { if (!testSkippedRMSE.contains(i)) { dataSet.addValue(testRMSE.get(i), "Test Set", "(" + String.format("%.2f", trainingSetRatios.get(i)) + "%/" + (i + 1) + ")"); ratioRMSESet[i].addValue(testRMSE.get(i), "Test Set", "(" + String.format("%.2f", trainingSetRatios.get(i)) + "%/" + (i + 1) + "/" + fileIDX + ")"); } meanRMSE += testRMSE.get(i); } testMeanRMSE.add(meanRMSE / testRMSE.size()); } meanRMSE = 0; if (!cvRMSE.isEmpty()) { for (int i = 0; i < cvRMSE.size(); i++) { if (!cvSkippedRMSE.contains(i)) { dataSet.addValue(cvRMSE.get(i), "10-fold Cross-validation", "(" + String.format("%.2f", trainingSetRatios.get(i)) + "%/" + (i + 1) + ")"); ratioRMSESet[i].addValue(cvRMSE.get(i), "10-fold Cross-validation", "(" + String.format("%.2f", trainingSetRatios.get(i)) + "%/" + (i + 1) + "/" + fileIDX + ")"); } meanRMSE += cvRMSE.get(i); } cvMeanRMSE.add(meanRMSE / cvRMSE.size()); } JFreeChart rmseChart = chartTool.createLineChart( "RMSE Plot\n Classifier:" + name + " " + tools.getOptionsFromFile(modelFile, name), "(Training set ratio/Set Index/File index)", "RMSE", dataSet, false, true); chartsObjects.add(rmseChart); rmseCharts.add(rmseChart); // Write PDF File file = FileNameGenerator.getNewFile(directory, ".pdf", "ScatterPlot"); chartTool.writeChartAsPDF(file, chartsObjects); resultFiles.add(file.getPath()); fileIDX++; } // Create set ratio RMSE plots for (int i = 0; i < ratioRMSESet.length; i++) { JFreeChart rmseChart = chartTool .createLineChart( "Set RMSE plot\n" + "(" + String.format("%.2f", trainingSetRatios.get(i)) + "%/" + (i + 1) + ")", "(Training set ratio/Index)", "RMSE", ratioRMSESet[i], false, true); rmseCharts.add(rmseChart); } // Create mean RMSE plot DefaultCategoryDataset dataSet = new DefaultCategoryDataset(); for (int i = 0; i < trainMeanRMSE.size(); i++) { dataSet.addValue(trainMeanRMSE.get(i), "Training Set", "" + (i + 1)); } for (int i = 0; i < testMeanRMSE.size(); i++) { dataSet.addValue(testMeanRMSE.get(i), "Test Set", "" + (i + 1)); } for (int i = 0; i < cvMeanRMSE.size(); i++) { dataSet.addValue(cvMeanRMSE.get(i), "10-fold Cross-validation", "" + (i + 1)); } JFreeChart rmseChart = chartTool.createLineChart("RMSE Mean Plot", "Dataset number", "Mean RMSE", dataSet); rmseCharts.add(rmseChart); File file = FileNameGenerator.getNewFile(directory, ".pdf", "RMSE-Sum"); chartTool.writeChartAsPDF(file, rmseCharts); resultFiles.add(file.getPath()); // Set output this.setOutputAsStringList(resultFiles, this.OUTPUT_PORTS[0]); }
From source file:eu.stratosphere.nephele.multicast.MulticastManager.java
/** * Returns a list of (physical) Nodes (=hosts) within the multicast tree. Each node contains the local ChannelIDs, * records/*from w w w .j av a 2 s .c o m*/ * must be forwarded to. The first node in the List is the only multicast sender. * * @param sourceChannelID * @return */ private LinkedList<TreeNode> extractTreeNodes(final InstanceConnectionInfo source, final JobID jobID, final ChannelID sourceChannelID, final boolean randomize) { final ExecutionGraph eg = this.scheduler.getExecutionGraphByID(jobID); final ExecutionEdge outputChannel = eg.getEdgeByID(sourceChannelID); final ExecutionGate broadcastGate = outputChannel.getOutputGate(); final LinkedList<ExecutionEdge> outputChannels = new LinkedList<ExecutionEdge>(); // Get all broadcast output channels final int numberOfOutputChannels = broadcastGate.getNumberOfEdges(); for (int i = 0; i < numberOfOutputChannels; ++i) { final ExecutionEdge c = broadcastGate.getEdge(i); if (c.isBroadcast()) { outputChannels.add(c); } } final LinkedList<TreeNode> treeNodes = new LinkedList<TreeNode>(); LinkedList<ChannelID> actualLocalTargets = new LinkedList<ChannelID>(); int firstConnectionID = 0; // search for local targets for the tree node for (Iterator<ExecutionEdge> iter = outputChannels.iterator(); iter.hasNext();) { final ExecutionEdge actualOutputChannel = iter.next(); // the connection ID should not be needed for the root node (as it is not set as remote receiver) // but in order to maintain consistency, it also gets the connectionID of the first channel pointing to it firstConnectionID = actualOutputChannel.getConnectionID(); final ExecutionVertex targetVertex = actualOutputChannel.getInputGate().getVertex(); // is the target vertex running on the same instance? if (targetVertex.getAllocatedResource().getInstance().getInstanceConnectionInfo().equals(source)) { actualLocalTargets.add(actualOutputChannel.getInputChannelID()); iter.remove(); } } // create sender node (root) with source instance TreeNode actualNode = new TreeNode( eg.getVertexByChannelID(sourceChannelID).getAllocatedResource().getInstance(), source, firstConnectionID, actualLocalTargets); treeNodes.add(actualNode); // now we have the root-node.. lets extract all other nodes LinkedList<TreeNode> receiverNodes = new LinkedList<TreeNode>(); while (outputChannels.size() > 0) { final ExecutionEdge firstChannel = outputChannels.pollFirst(); // each receiver nodes' endpoint is associated with the connection ID // of the first channel pointing to this node. final int connectionID = firstChannel.getConnectionID(); final ExecutionVertex firstTarget = firstChannel.getInputGate().getVertex(); final InstanceConnectionInfo actualInstance = firstTarget.getAllocatedResource().getInstance() .getInstanceConnectionInfo(); actualLocalTargets = new LinkedList<ChannelID>(); // add first local target actualLocalTargets.add(firstChannel.getInputChannelID()); // now we iterate through the remaining channels to find other local targets... for (Iterator<ExecutionEdge> iter = outputChannels.iterator(); iter.hasNext();) { final ExecutionEdge actualOutputChannel = iter.next(); final ExecutionVertex actualTarget = actualOutputChannel.getInputGate().getVertex(); // is the target vertex running on the same instance? if (actualTarget.getAllocatedResource().getInstance().getInstanceConnectionInfo() .equals(actualInstance)) { actualLocalTargets.add(actualOutputChannel.getInputChannelID()); iter.remove(); } } // end for // create tree node for current instance actualNode = new TreeNode(firstTarget.getAllocatedResource().getInstance(), actualInstance, connectionID, actualLocalTargets); receiverNodes.add(actualNode); } // end while // Do we want to shuffle the receiver nodes? // Only randomize the receivers, as the sender (the first one) has to stay the same if (randomize) { Collections.shuffle(receiverNodes); } else { // Sort Tree Nodes according to host name.. Collections.sort(receiverNodes); } treeNodes.addAll(receiverNodes); return treeNodes; }
From source file:org.apache.axis2.context.MessageContext.java
/** * Using meta data for phases/handlers, create a linked list of actual * phase/handler objects. The created list is composed of the objects * from the base list at the top of the created list followed by the * restored objects./* w w w .ja va 2 s.c o m*/ * * @param base Linked list of phase/handler objects * @param metaDataEntries Linked list of MetaDataEntry objects * @return LinkedList of objects or NULL if none available */ private LinkedList<Handler> restoreExecutedList(LinkedList<Handler> base, LinkedList<MetaDataEntry> metaDataEntries) { if (metaDataEntries == null) { return base; } // get a list of existing handler/phase objects for the restored objects ArrayList<MetaDataEntry> tmpMetaDataList = new ArrayList<MetaDataEntry>(metaDataEntries); ArrayList<Handler> existingList = restoreHandlerList(tmpMetaDataList); if ((existingList == null) || (existingList.isEmpty())) { return base; } // set up a list to return LinkedList<Handler> returnedList = new LinkedList<Handler>(); if (base != null) { returnedList.addAll(base); } returnedList.addAll(existingList); return returnedList; }
From source file:hudson.plugins.project_inheritance.projects.InheritanceProject.java
public Deque<Version> getVersions() { Object obj = onSelfChangeBuffer.get(this, "getVersions()"); if (obj != null && obj instanceof Deque) { return (Deque) obj; }/*from w ww. j a v a2 s . co m*/ LinkedList<Version> lst = new LinkedList<Version>(); if (this.versionStore == null) { return lst; } lst.addAll(this.versionStore.getAllVersions()); onSelfChangeBuffer.set(this, "getVersions()", lst); return lst; }
From source file:org.alfresco.solr.query.Solr4QueryParser.java
@SuppressWarnings("unchecked") protected Query getFieldQueryImpl(String field, String queryText, AnalysisMode analysisMode, LuceneFunction luceneFunction) throws ParseException, IOException { // make sure the field exists or return a dummy query so we have no error ....ACE-3231 SchemaField schemaField = schema.getFieldOrNull(field); boolean isNumeric = false; if (schemaField == null) { return new TermQuery(new Term("_dummy_", "_miss_")); } else {/*from w ww . jav a 2 s.c o m*/ isNumeric = (schemaField.getType().getNumericType() != null); } // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or noth // TODO: Untokenised columns with functions require special handling if (luceneFunction != LuceneFunction.FIELD) { throw new UnsupportedOperationException( "Field queries are not supported on lucene functions (UPPER, LOWER, etc)"); } // if the incoming string already has a language identifier we strip it iff and addit back on again String localePrefix = ""; String toTokenise = queryText; if (queryText.startsWith("{")) { int position = queryText.indexOf("}"); if (position > 0) { String language = queryText.substring(0, position + 1); Locale locale = new Locale(queryText.substring(1, position)); String token = queryText.substring(position + 1); boolean found = false; for (Locale current : Locale.getAvailableLocales()) { if (current.toString().equalsIgnoreCase(locale.toString())) { found = true; break; } } if (found) { localePrefix = language; toTokenise = token; } else { //toTokenise = token; } } } String testText = toTokenise; boolean requiresMLTokenDuplication = false; String localeString = null; if (isPropertyField(field) && (localePrefix.length() == 0)) { if ((queryText.length() > 0) && (queryText.charAt(0) == '\u0000')) { int position = queryText.indexOf("\u0000", 1); testText = queryText.substring(position + 1); requiresMLTokenDuplication = true; localeString = queryText.substring(1, position); } } // find the positions of any escaped * and ? and ignore them Set<Integer> wildcardPoistions = getWildcardPositions(testText); TokenStream source = null; ArrayList<org.apache.lucene.analysis.Token> list = new ArrayList<org.apache.lucene.analysis.Token>(); boolean severalTokensAtSamePosition = false; org.apache.lucene.analysis.Token nextToken; int positionCount = 0; try { org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); source = getAnalyzer().tokenStream(field, new StringReader(toTokenise)); source.reset(); while (source.incrementToken()) { CharTermAttribute cta = source.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = null; if (source.hasAttribute(TypeAttribute.class)) { typeAtt = source.getAttribute(TypeAttribute.class); } PositionIncrementAttribute posIncAtt = null; if (source.hasAttribute(PositionIncrementAttribute.class)) { posIncAtt = source.getAttribute(PositionIncrementAttribute.class); } nextToken = new Token(cta.buffer(), 0, cta.length(), offsetAtt.startOffset(), offsetAtt.endOffset()); if (typeAtt != null) { nextToken.setType(typeAtt.type()); } if (posIncAtt != null) { nextToken.setPositionIncrement(posIncAtt.getPositionIncrement()); } list.add(nextToken); if (nextToken.getPositionIncrement() != 0) positionCount += nextToken.getPositionIncrement(); else severalTokensAtSamePosition = true; } } catch (SolrException e) { // MNT-15336 // Text against a numeric field should fail silently rather then tell you it is not possible. if (isNumeric && e.getMessage() != null && e.getMessage().startsWith("Invalid Number:")) { // Generate a query that does not match any document - rather than nothing return createNoMatchQuery(); } else { throw e; } } finally { try { if (source != null) { source.close(); } } catch (IOException e) { // ignore } } // add any alpha numeric wildcards that have been missed // Fixes most stop word and wild card issues for (int index = 0; index < testText.length(); index++) { char current = testText.charAt(index); if (((current == '*') || (current == '?')) && wildcardPoistions.contains(index)) { StringBuilder pre = new StringBuilder(10); if (index == 0) { // "*" and "?" at the start boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= 0) && (0 < test.endOffset())) { found = true; break; } } if (!found && (list.size() == 0)) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token("", 0, 0); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, MLAnalysisMode.EXACT_LANGUAGE); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } else if (index > 0) { // Add * and ? back into any tokens from which it has been removed boolean tokenFound = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= index) && (index < test.endOffset())) { if (requiresMLTokenDuplication) { String termText = test.toString(); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); if (index >= test.startOffset() + token.length()) { test.setEmpty(); test.append(language + token + current); } } else { if (index >= test.startOffset() + test.length()) { test.setEmpty(); test.append(test.toString() + current); } } tokenFound = true; break; } } if (!tokenFound) { for (int i = index - 1; i >= 0; i--) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { pre.insert(0, c); } } else { break; } } if (pre.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token( pre.toString(), index - pre.length(), index); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, MLAnalysisMode.EXACT_LANGUAGE); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } StringBuilder post = new StringBuilder(10); if (index > 0) { for (int i = index + 1; i < testText.length(); i++) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { post.append(c); } } else { break; } } if (post.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token( post.toString(), index + 1, index + 1 + post.length()); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, MLAnalysisMode.EXACT_LANGUAGE); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } } // Put in real position increments as we treat them correctly int curentIncrement = -1; for (org.apache.lucene.analysis.Token c : list) { if (curentIncrement == -1) { curentIncrement = c.getPositionIncrement(); } else if (c.getPositionIncrement() > 0) { curentIncrement = c.getPositionIncrement(); } else { c.setPositionIncrement(curentIncrement); } } // Remove small bits already covered in larger fragments list = getNonContained(list); Collections.sort(list, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); return dif; } }); // Combined * and ? based strings - should redo the tokeniser // Build tokens by position LinkedList<LinkedList<org.apache.lucene.analysis.Token>> tokensByPosition = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); LinkedList<org.apache.lucene.analysis.Token> currentList = null; int lastStart = 0; for (org.apache.lucene.analysis.Token c : list) { if (c.startOffset() == lastStart) { if (currentList == null) { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); } currentList.add(c); } else { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); currentList.add(c); } lastStart = c.startOffset(); } // Build all the token sequences and see which ones get strung together OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>> allTokenSequencesSet = new OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokensAtPosition : tokensByPosition) { OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>> positionalSynonymSequencesSet = new OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>>(); OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>> newAllTokenSequencesSet = new OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>>(); FOR_FIRST_TOKEN_AT_POSITION_ONLY: for (org.apache.lucene.analysis.Token t : tokensAtPosition) { org.apache.lucene.analysis.Token replace = new org.apache.lucene.analysis.Token(t, t.startOffset(), t.endOffset()); replace.setType(t.type()); replace.setPositionIncrement(t.getPositionIncrement()); boolean tokenFoundSequence = false; for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequencesSet) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.addAll(tokenSequence); if ((newEntry.getLast().endOffset() == replace.endOffset()) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { if ((newEntry.getLast().startOffset() == replace.startOffset()) && newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); newEntry.add(replace); tokenFoundSequence = true; } else if (newEntry.getLast().type().equals(CommonGramsFilter.GRAM_TYPE)) { if (newEntry.toString().endsWith(replace.toString())) { // already in the gram positionalSynonymSequencesSet.add(tokenSequence); tokenFoundSequence = true; } else { // need to replace the synonym in the current gram tokenFoundSequence = true; StringBuffer old = new StringBuffer(newEntry.getLast().toString()); old.replace(replace.startOffset() - newEntry.getLast().startOffset(), replace.endOffset() - newEntry.getLast().startOffset(), replace.toString()); Token newToken = new org.apache.lucene.analysis.Token(old.toString(), newEntry.getLast().startOffset(), newEntry.getLast().endOffset()); newEntry.removeLast(); newEntry.add(newToken); } } } else if ((newEntry.getLast().startOffset() < replace.startOffset()) && (newEntry.getLast().endOffset() < replace.endOffset())) { if (newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); } newEntry.add(replace); tokenFoundSequence = true; } newAllTokenSequencesSet.add(newEntry); } if (false == tokenFoundSequence) { for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : newAllTokenSequencesSet) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.addAll(tokenSequence); if ((newEntry.getLast().endOffset() == replace.endOffset()) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { if ((newEntry.getLast().startOffset() == replace.startOffset()) && newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); newEntry.add(replace); tokenFoundSequence = true; } else if (newEntry.getLast().type().equals(CommonGramsFilter.GRAM_TYPE)) { if (newEntry.toString().endsWith(replace.toString())) { // already in the gram positionalSynonymSequencesSet.add(tokenSequence); tokenFoundSequence = true; } else { // need to replace the synonym in the current gram tokenFoundSequence = true; StringBuffer old = new StringBuffer(newEntry.getLast().toString()); old.replace(replace.startOffset() - newEntry.getLast().startOffset(), replace.endOffset() - newEntry.getLast().startOffset(), replace.toString()); Token newToken = new org.apache.lucene.analysis.Token(old.toString(), newEntry.getLast().startOffset(), newEntry.getLast().endOffset()); newEntry.removeLast(); newEntry.add(newToken); positionalSynonymSequencesSet.add(newEntry); } } } else if ((newEntry.getLast().startOffset() < replace.startOffset()) && (newEntry.getLast().endOffset() < replace.endOffset())) { if (newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); newEntry.add(replace); tokenFoundSequence = true; } } } } if (false == tokenFoundSequence) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.add(replace); newAllTokenSequencesSet.add(newEntry); } // Limit the max number of permutations we consider if (newAllTokenSequencesSet.size() > 64) { break FOR_FIRST_TOKEN_AT_POSITION_ONLY; } } allTokenSequencesSet = newAllTokenSequencesSet; allTokenSequencesSet.addAll(positionalSynonymSequencesSet); } LinkedList<LinkedList<org.apache.lucene.analysis.Token>> allTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>( allTokenSequencesSet); // build the unique LinkedList<LinkedList<org.apache.lucene.analysis.Token>> fixedTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) { LinkedList<org.apache.lucene.analysis.Token> fixedTokenSequence = new LinkedList<org.apache.lucene.analysis.Token>(); fixedTokenSequences.add(fixedTokenSequence); org.apache.lucene.analysis.Token replace = null; for (org.apache.lucene.analysis.Token c : tokenSequence) { if (replace == null) { StringBuilder prefix = new StringBuilder(); for (int i = c.startOffset() - 1; i >= 0; i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { prefix.insert(0, test); } else { break; } } String pre = prefix.toString(); if (requiresMLTokenDuplication) { String termText = c.toString(); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); replace = new org.apache.lucene.analysis.Token(language + pre + token, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String termText = c.toString(); replace = new org.apache.lucene.analysis.Token(pre + termText, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } else { StringBuilder prefix = new StringBuilder(); StringBuilder postfix = new StringBuilder(); StringBuilder builder = prefix; for (int i = c.startOffset() - 1; i >= replace.endOffset(); i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { builder.insert(0, test); } else { builder = postfix; postfix.setLength(0); } } String pre = prefix.toString(); String post = postfix.toString(); // Does it bridge? if ((pre.length() > 0) && (replace.endOffset() + pre.length()) == c.startOffset()) { String termText = c.toString(); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); @SuppressWarnings("unused") String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = replace.toString(); replace = new org.apache.lucene.analysis.Token(replaceTermText + pre + token, replace.startOffset(), c.endOffset()); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } else { int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = replace.toString(); replace = new org.apache.lucene.analysis.Token(replaceTermText + pre + termText, replace.startOffset(), c.endOffset()); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } } else { String termText = c.toString(); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); String replaceTermText = replace.toString(); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replaceTermText + post, replace.startOffset(), replace.endOffset() + post.length()); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(language + pre + token, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String replaceTermText = replace.toString(); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replaceTermText + post, replace.startOffset(), replace.endOffset() + post.length()); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(pre + termText, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } } } // finish last if (replace != null) { StringBuilder postfix = new StringBuilder(); if ((replace.endOffset() >= 0) && (replace.endOffset() < testText.length())) { for (int i = replace.endOffset(); i < testText.length(); i++) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { postfix.append(test); } else { break; } } } String post = postfix.toString(); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = replace.toString(); replace = new org.apache.lucene.analysis.Token(replaceTermText + post, replace.startOffset(), replace.endOffset() + post.length()); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); fixedTokenSequence.add(replace); } } // rebuild fixed list ArrayList<org.apache.lucene.analysis.Token> fixed = new ArrayList<org.apache.lucene.analysis.Token>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { for (org.apache.lucene.analysis.Token token : tokenSequence) { fixed.add(token); } } // reorder by start position and increment Collections.sort(fixed, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); if (dif != 0) { return dif; } else { return o1.getPositionIncrement() - o2.getPositionIncrement(); } } }); // make sure we remove any tokens we have duplicated @SuppressWarnings("rawtypes") OrderedHashSet unique = new OrderedHashSet(); unique.addAll(fixed); fixed = new ArrayList<org.apache.lucene.analysis.Token>(unique); list = fixed; // add any missing locales back to the tokens if (localePrefix.length() > 0) { for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token currentToken = list.get(j); String termText = currentToken.toString(); currentToken.setEmpty(); currentToken.append(localePrefix + termText); } } SchemaField sf = schema.getField(field); TokenizerChain tokenizerChain = (sf.getType().getQueryAnalyzer() instanceof TokenizerChain) ? ((TokenizerChain) sf.getType().getQueryAnalyzer()) : null; boolean isShingled = false; if (tokenizerChain != null) { for (TokenFilterFactory factory : tokenizerChain.getTokenFilterFactories()) { if (factory instanceof ShingleFilterFactory) { isShingled = true; break; } } } AlfrescoAnalyzerWrapper analyzerWrapper = (sf.getType() .getQueryAnalyzer() instanceof AlfrescoAnalyzerWrapper) ? ((AlfrescoAnalyzerWrapper) sf.getType().getQueryAnalyzer()) : null; if (analyzerWrapper != null) { // assume if there are no term positions it is shingled .... isShingled = true; } boolean forceConjuncion = rerankPhase == RerankPhase.QUERY_PHASE; if (list.size() == 0) return null; else if (list.size() == 1) { nextToken = list.get(0); String termText = nextToken.toString(); if (!isNumeric && (termText.contains("*") || termText.contains("?"))) { return newWildcardQuery(new Term(field, termText)); } else { return newTermQuery(new Term(field, termText)); } } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = newBooleanQuery(true); for (int i = 0; i < list.size(); i++) { Query currentQuery; nextToken = list.get(i); String termText = nextToken.toString(); if (termText.contains("*") || termText.contains("?")) { currentQuery = newWildcardQuery(new Term(field, termText)); } else { currentQuery = newTermQuery(new Term(field, termText)); } q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else if (forceConjuncion) { BooleanQuery or = new BooleanQuery(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { BooleanQuery and = new BooleanQuery(); for (int i = 0; i < tokenSequence.size(); i++) { nextToken = (org.apache.lucene.analysis.Token) tokenSequence.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); and.add(wildQuery, Occur.MUST); } else { TermQuery termQuery = new TermQuery(term); and.add(termQuery, Occur.MUST); } } if (and.clauses().size() > 0) { or.add(and, Occur.SHOULD); } } return or; } // shingle else if (sf.omitPositions() && isShingled) { ArrayList<org.apache.lucene.analysis.Token> nonContained = getNonContained(list); Query currentQuery; BooleanQuery weakPhrase = new BooleanQuery(); for (org.apache.lucene.analysis.Token shingleToken : nonContained) { String termText = shingleToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { currentQuery = new org.apache.lucene.search.WildcardQuery(term); } else { currentQuery = new TermQuery(term); } weakPhrase.add(currentQuery, Occur.MUST); } return weakPhrase; } // Consider if we can use a multi-phrase query (e.g for synonym use rather then WordDelimiterFilterFactory) else if (canUseMultiPhraseQuery(fixedTokenSequences)) { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(internalSlop); ArrayList<Term> multiTerms = new ArrayList<Term>(); int position = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { throw new IllegalStateException("Wildcards are not allowed in multi phrase anymore"); } else { multiTerms.add(term); } if (nextToken.getPositionIncrement() > 0 && multiTerms.size() > 0) { if (getEnablePositionIncrements()) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } checkTermCount(field, queryText, mpq); multiTerms.clear(); } position += nextToken.getPositionIncrement(); } if (getEnablePositionIncrements()) { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0]), position); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }, position); // } } else { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0])); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }); // } } checkTermCount(field, queryText, mpq); return mpq; } // Word delimiter factory and other odd things generate complex token patterns // Smart skip token sequences with small tokens that generate toomany wildcards // Fall back to the larger pattern // e.g Site1* will not do (S ite 1*) or (Site 1*) if 1* matches too much (S ite1*) and (Site1*) will still be OK // If we skip all (for just 1* in the input) this is still an issue. else { return generateSpanOrQuery(field, fixedTokenSequences); } } else { if (forceConjuncion) { BooleanQuery or = new BooleanQuery(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { BooleanQuery and = new BooleanQuery(); for (int i = 0; i < tokenSequence.size(); i++) { nextToken = (org.apache.lucene.analysis.Token) tokenSequence.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); and.add(wildQuery, Occur.MUST); } else { TermQuery termQuery = new TermQuery(term); and.add(termQuery, Occur.MUST); } } if (and.clauses().size() > 0) { or.add(and, Occur.SHOULD); } } return or; } else { SpanQuery spanQuery = null; SpanOrQuery atSamePosition = new SpanOrQuery(); int gap = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if (getEnablePositionIncrements()) { SpanQuery nextSpanQuery; if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); SpanMultiTermQueryWrapper wrapper = new SpanMultiTermQueryWrapper<>(wildQuery); wrapper.setRewriteMethod( new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit)); nextSpanQuery = wrapper; } else { nextSpanQuery = new SpanTermQuery(term); } if (gap == 0) { atSamePosition.addClause(nextSpanQuery); } else { if (atSamePosition.getClauses().length == 0) { if (spanQuery == null) { spanQuery = nextSpanQuery; } else { spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, nextSpanQuery }, (gap - 1) + internalSlop, internalSlop < 2); } atSamePosition = new SpanOrQuery(); } else if (atSamePosition.getClauses().length == 1) { if (spanQuery == null) { spanQuery = atSamePosition.getClauses()[0]; } else { spanQuery = new SpanNearQuery( new SpanQuery[] { spanQuery, atSamePosition.getClauses()[0] }, (gap - 1) + internalSlop, internalSlop < 2); } atSamePosition = new SpanOrQuery(); atSamePosition.addClause(nextSpanQuery); } else { if (spanQuery == null) { spanQuery = atSamePosition; } else { spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePosition }, (gap - 1) + internalSlop, internalSlop < 2); } atSamePosition = new SpanOrQuery(); atSamePosition.addClause(nextSpanQuery); } } gap = nextToken.getPositionIncrement(); } else { SpanQuery nextSpanQuery; if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); SpanMultiTermQueryWrapper wrapper = new SpanMultiTermQueryWrapper<>(wildQuery); wrapper.setRewriteMethod( new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit)); nextSpanQuery = wrapper; } else { nextSpanQuery = new SpanTermQuery(term); } if (spanQuery == null) { spanQuery = new SpanOrQuery(); ((SpanOrQuery) spanQuery).addClause(nextSpanQuery); } else { ((SpanOrQuery) spanQuery).addClause(nextSpanQuery); } } } if (atSamePosition.getClauses().length == 0) { return spanQuery; } else if (atSamePosition.getClauses().length == 1) { if (spanQuery == null) { spanQuery = atSamePosition.getClauses()[0]; } else { spanQuery = new SpanNearQuery( new SpanQuery[] { spanQuery, atSamePosition.getClauses()[0] }, (gap - 1) + internalSlop, internalSlop < 2); } return spanQuery; } else { if (spanQuery == null) { spanQuery = atSamePosition; } else { spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePosition }, (gap - 1) + internalSlop, internalSlop < 2); } return spanQuery; } } } } }