List of usage examples for java.util LinkedList toString
public String toString()
From source file:com.ucuenca.pentaho.plugin.step.FusekiLoaderDialog.java
/** * Called when the user confirms the dialog *///from w w w. j a va 2s .com private void ok() { // The "stepname" variable will be the return value for the open() // method. //-----------obtener valores combo meta.getConfigStack().clear(); LinkedList listaPropiedades = new LinkedList<String>(); LinkedList listaValores = new LinkedList<String>(); TableItem[] items = table.getItems(); for (int i = 0; i < items.length; i++) { TableItem fila = table.getItem(i); CCombo cmb = (CCombo) combos.get(i); //System.out.println( cmb.getText()); String propiedad = cmb.getText(); // propiedad String valor = fila.getText(1); // valor if (!valor.isEmpty() && propiedad.compareTo("Propiedades") != 0) { listaPropiedades.add(propiedad); listaValores.add(valor); } } meta.setFuGraph(listaPropiedades.toString()); meta.setFuQuery(listaValores.toString()); meta.setListaPropiedades(listaPropiedades.toString()); meta.setListaValores(listaValores.toString()); //--------------- boolean validado = true; // Setting to step name from the dialog control stepname = wStepname.getText(); // Setting the settings to the meta object if (wHelloFieldName.getText().trim().isEmpty()) { MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "FusekiLoader.input.empty")); dialog.open(); wHelloFieldName.setFocus(); validado = false; } if (wChooseOutput.getText().trim().isEmpty()) { MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "FusekiLoader.output.empty")); dialog.open(); wChooseOutput.setFocus(); validado = false; } if (wTextServName.getText().trim().isEmpty()) { MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "FusekiLoader.output.empty")); dialog.open(); wTextServName.setFocus(); validado = false; } if (wTextServPort.getText().trim().isEmpty()) { MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "FusekiLoader.output.empty")); dialog.open(); wTextServPort.setFocus(); validado = false; } if (wTextBaseUri.getText().trim().isEmpty()) { MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "FusekiLoader.baseUri.empty")); dialog.open(); wTextBaseUri.setFocus(); validado = false; } // close the SWT dialog window meta.setOutputField(wHelloFieldName.getText()); meta.setDirectory(wChooseOutput.getText()); meta.setServiceName(wTextServName.getText()); meta.setPortName(wTextServPort.getText()); meta.setFubaseURI(wTextBaseUri.getText()); // TableItem miti = table.getItem(0); // meta.setFuDataset(miti.getText(1)); // // TableItem miti2 = table.getItem(1); // meta.setFuGraph(miti2.getText(1)); // // TableItem miti3 = table.getItem(2); // meta.setFuQuery(miti3.getText(1)); if (validado) { meta.setValidate("true"); } else { meta.setValidate("false"); } meta.setChanged(); dispose(); }
From source file:org.kuali.rice.krms.impl.provider.repository.SimplePropositionTypeService.java
/** * Translates the parameters on the given proposition definition to create an expression for evaluation. * The proposition parameters are defined in a reverse-polish notation so a stack is used for * evaluation purposes./*from ww w. ja va 2 s .c o m*/ * * @param propositionDefinition the proposition definition to translate * * @return the translated expression for the given proposition, this * expression, when evaluated, will return a Boolean. */ protected Expression<Boolean> translateToExpression(PropositionDefinition propositionDefinition) { LinkedList<Expression<? extends Object>> stack = new LinkedList<Expression<? extends Object>>(); for (PropositionParameter parameter : propositionDefinition.getParameters()) { PropositionParameterType parameterType = PropositionParameterType .fromCode(parameter.getParameterType()); if (parameterType == PropositionParameterType.CONSTANT) { // TODO - need some way to define data type on the prop parameter as well? Not all constants will actually be String values!!! stack.addFirst(new ConstantExpression<String>(parameter.getValue())); } else if (parameterType == PropositionParameterType.FUNCTION) { String functionId = parameter.getValue(); FunctionDefinition functionDefinition = functionRepositoryService.getFunction(functionId); if (functionDefinition == null) { throw new RepositoryDataException("Unable to locate function with the given id: " + functionId); } FunctionTypeService functionTypeService = typeResolver.getFunctionTypeService(functionDefinition); Function function = functionTypeService.loadFunction(functionDefinition); // TODO throw an exception if function is null? List<FunctionParameterDefinition> parameters = functionDefinition.getParameters(); if (stack.size() < parameters.size()) { throw new RepositoryDataException( "Failed to initialize custom function '" + functionDefinition.getNamespace() + " " + functionDefinition.getName() + "'. There were only " + stack.size() + " values on the stack but function requires at least " + parameters.size()); } List<Expression<? extends Object>> arguments = new ArrayList<Expression<? extends Object>>(); // work backward through the list to match params to the stack for (int index = parameters.size() - 1; index >= 0; index--) { FunctionParameterDefinition parameterDefinition = parameters.get(index); // TODO need to check types here? expression object probably needs a getType on it so that we can confirm that the types will be compatible? parameterDefinition.getParameterType(); Expression<? extends Object> argument = stack.removeFirst(); arguments.add(argument); } String[] parameterTypes = getFunctionParameterTypes(functionDefinition); stack.addFirst(new FunctionExpression(function, parameterTypes, arguments, getComparisonOperatorService())); } else if (parameterType == PropositionParameterType.OPERATOR) { ComparisonOperator operator = ComparisonOperator.fromCode(parameter.getValue()); if (stack.size() < 2) { throw new RepositoryDataException( "Failed to initialize expression for comparison operator " + operator + " because a sufficient number of arguments was not available on the stack. " + "Current contents of stack: " + stack.toString()); } Expression<? extends Object> rhs = stack.removeFirst(); Expression<? extends Object> lhs = stack.removeFirst(); stack.addFirst(new BinaryOperatorExpression(operator, lhs, rhs)); } else if (parameterType == PropositionParameterType.TERM) { String termId = parameter.getValue(); TermDefinition termDefinition = getTermRepositoryService().getTerm(termId); if (termDefinition == null) { throw new RepositoryDataException("unable to load term with id " + termId); } Term term = translateTermDefinition(termDefinition); stack.addFirst(new TermExpression(term)); } } if (stack.size() != 1) { throw new RepositoryDataException( "Final contents of expression stack are incorrect, there should only be one entry but was " + stack.size() + ". Current contents of stack: " + stack.toString()); } return new BooleanValidatingExpression(stack.removeFirst()); }
From source file:com.ucuenca.pentaho.plugin.step.EldaPDIStepDialog.java
/** * Called when the user confirms the dialog *///from w w w . jav a 2 s.c om private void ok() { // The "stepname" variable will be the return value for the open() // method. boolean validado = true; // Setting to step name from the dialog control stepname = wStepname.getText(); // Setting the settings to the meta object if (wHelloFieldName.getText().trim().isEmpty()) { //Validacion servicio MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "FusekiLoader.input.empty")); dialog.open(); wHelloFieldName.setFocus(); validado = false; } if (wChooseOutput.getText().trim().isEmpty()) { //validacion Directorio de Salida MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "FusekiLoader.output.empty")); dialog.open(); wChooseOutput.setFocus(); validado = false; } if (wTextBaseUri.getText().trim().isEmpty()) { //validacion Base URI MessageBox dialog = new MessageBox(shell, SWT.ICON_ERROR | SWT.OK); dialog.setText("ERROR"); dialog.setMessage(BaseMessages.getString(PKG, "Elda.BaseUri.empty")); dialog.open(); wTextBaseUri.setFocus(); validado = false; } LinkedList listEntidades = new LinkedList<String>(); //todas las cargadas // pasar la lista seleccionada ListSource.clear(); ListSourceNames.clear(); for (int k = 0; k < table.getItemCount(); k++) { TableItem fila = table.getItem(k); //System.out.println(fila.getChecked()+" "+fila.getText()); fila.getText(1); if (fila.getChecked()) { //si se ha seleccionada se pasara a la lista ListSource.add(fila.getText()); ListSourceNames.add(fila.getText(1)); //nombre puesto x el usuario } listEntidades.add(fila.getText()); //entidades cargadas /* String propiedad = ""; if (!combos.isEmpty() && combos.size()>k){ CCombo cmb = (CCombo)combos.get(k); propiedad = cmb.getText(); // propiedad System.out.println("LABEL ENTIDAD "+propiedad); } ListLabelEntidad.add(propiedad); * */ } //propiedad sort //label por entidad agrego el item seleccionado del combo for (int i = 0; i < combos.size(); i++) { String propiedad = ""; if (!combos.isEmpty() && combos.size() > i) { CCombo cmb = (CCombo) combos.get(i); propiedad = cmb.getText(); // propiedad System.out.println("LABEL ENTIDAD " + propiedad); } ListLabelEntidad.add(propiedad); } // //--------------- recorrertabla2();//guardo los valores de la tabla 2. meta.setListSelect(ListSource.toString()); meta.setListSelectNames(ListSourceNames.toString()); meta.setListProper(PropiedadesSeleccionadasUsuario.toString()); meta.setListProperNames(PropiedadesSeleccionadasNamesUsuario.toString()); System.out.println("##### ok proper1 " + PropiedadesSeleccionadasUsuario.size()); System.out.println("##### ok proper2 " + PropiedadesSeleccionadasNamesUsuario.size()); // close the SWT dialog window meta.setOutputField(wHelloFieldName.getText()); meta.setServicio(wHelloFieldName.getText()); meta.setDirectory(wChooseOutput.getText()); meta.setEbaseUri(wTextBaseUri.getText().trim()); meta.setListEntidadesCargadas(listEntidades.toString()); meta.setChanged(); if (validado) { // si todas las validacion son correctas se iniciara el proceso meta.setValidate("true"); dispose(); // solo cuando esta todo correcto dejo cerrar la ventana } else { meta.setValidate("false"); } meta.setListLabelxEntidad(ListLabelEntidad.toString()); System.out.println("LABEL ENTIDAD " + ListLabelEntidad.toString()); meta.setPropiedadesTodasCargadas(arrayPropiedades.toString()); System.out.println(" PropiedadesTodasCargadas " + arrayPropiedades.toString()); System.out.println(" Combos " + combos.toString()); }
From source file:org.alfresco.solr.query.Solr4QueryParser.java
@SuppressWarnings("unchecked") protected Query getFieldQueryImpl(String field, String queryText, AnalysisMode analysisMode, LuceneFunction luceneFunction) throws ParseException, IOException { // make sure the field exists or return a dummy query so we have no error ....ACE-3231 SchemaField schemaField = schema.getFieldOrNull(field); boolean isNumeric = false; if (schemaField == null) { return new TermQuery(new Term("_dummy_", "_miss_")); } else {/*from ww w .j av a 2s . c o m*/ isNumeric = (schemaField.getType().getNumericType() != null); } // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or noth // TODO: Untokenised columns with functions require special handling if (luceneFunction != LuceneFunction.FIELD) { throw new UnsupportedOperationException( "Field queries are not supported on lucene functions (UPPER, LOWER, etc)"); } // if the incoming string already has a language identifier we strip it iff and addit back on again String localePrefix = ""; String toTokenise = queryText; if (queryText.startsWith("{")) { int position = queryText.indexOf("}"); if (position > 0) { String language = queryText.substring(0, position + 1); Locale locale = new Locale(queryText.substring(1, position)); String token = queryText.substring(position + 1); boolean found = false; for (Locale current : Locale.getAvailableLocales()) { if (current.toString().equalsIgnoreCase(locale.toString())) { found = true; break; } } if (found) { localePrefix = language; toTokenise = token; } else { //toTokenise = token; } } } String testText = toTokenise; boolean requiresMLTokenDuplication = false; String localeString = null; if (isPropertyField(field) && (localePrefix.length() == 0)) { if ((queryText.length() > 0) && (queryText.charAt(0) == '\u0000')) { int position = queryText.indexOf("\u0000", 1); testText = queryText.substring(position + 1); requiresMLTokenDuplication = true; localeString = queryText.substring(1, position); } } // find the positions of any escaped * and ? and ignore them Set<Integer> wildcardPoistions = getWildcardPositions(testText); TokenStream source = null; ArrayList<org.apache.lucene.analysis.Token> list = new ArrayList<org.apache.lucene.analysis.Token>(); boolean severalTokensAtSamePosition = false; org.apache.lucene.analysis.Token nextToken; int positionCount = 0; try { org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); source = getAnalyzer().tokenStream(field, new StringReader(toTokenise)); source.reset(); while (source.incrementToken()) { CharTermAttribute cta = source.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = null; if (source.hasAttribute(TypeAttribute.class)) { typeAtt = source.getAttribute(TypeAttribute.class); } PositionIncrementAttribute posIncAtt = null; if (source.hasAttribute(PositionIncrementAttribute.class)) { posIncAtt = source.getAttribute(PositionIncrementAttribute.class); } nextToken = new Token(cta.buffer(), 0, cta.length(), offsetAtt.startOffset(), offsetAtt.endOffset()); if (typeAtt != null) { nextToken.setType(typeAtt.type()); } if (posIncAtt != null) { nextToken.setPositionIncrement(posIncAtt.getPositionIncrement()); } list.add(nextToken); if (nextToken.getPositionIncrement() != 0) positionCount += nextToken.getPositionIncrement(); else severalTokensAtSamePosition = true; } } catch (SolrException e) { // MNT-15336 // Text against a numeric field should fail silently rather then tell you it is not possible. if (isNumeric && e.getMessage() != null && e.getMessage().startsWith("Invalid Number:")) { // Generate a query that does not match any document - rather than nothing return createNoMatchQuery(); } else { throw e; } } finally { try { if (source != null) { source.close(); } } catch (IOException e) { // ignore } } // add any alpha numeric wildcards that have been missed // Fixes most stop word and wild card issues for (int index = 0; index < testText.length(); index++) { char current = testText.charAt(index); if (((current == '*') || (current == '?')) && wildcardPoistions.contains(index)) { StringBuilder pre = new StringBuilder(10); if (index == 0) { // "*" and "?" at the start boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= 0) && (0 < test.endOffset())) { found = true; break; } } if (!found && (list.size() == 0)) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token("", 0, 0); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, MLAnalysisMode.EXACT_LANGUAGE); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } else if (index > 0) { // Add * and ? back into any tokens from which it has been removed boolean tokenFound = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= index) && (index < test.endOffset())) { if (requiresMLTokenDuplication) { String termText = test.toString(); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); if (index >= test.startOffset() + token.length()) { test.setEmpty(); test.append(language + token + current); } } else { if (index >= test.startOffset() + test.length()) { test.setEmpty(); test.append(test.toString() + current); } } tokenFound = true; break; } } if (!tokenFound) { for (int i = index - 1; i >= 0; i--) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { pre.insert(0, c); } } else { break; } } if (pre.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token( pre.toString(), index - pre.length(), index); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, MLAnalysisMode.EXACT_LANGUAGE); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } StringBuilder post = new StringBuilder(10); if (index > 0) { for (int i = index + 1; i < testText.length(); i++) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { post.append(c); } } else { break; } } if (post.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token( post.toString(), index + 1, index + 1 + post.length()); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, MLAnalysisMode.EXACT_LANGUAGE); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } } // Put in real position increments as we treat them correctly int curentIncrement = -1; for (org.apache.lucene.analysis.Token c : list) { if (curentIncrement == -1) { curentIncrement = c.getPositionIncrement(); } else if (c.getPositionIncrement() > 0) { curentIncrement = c.getPositionIncrement(); } else { c.setPositionIncrement(curentIncrement); } } // Remove small bits already covered in larger fragments list = getNonContained(list); Collections.sort(list, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); return dif; } }); // Combined * and ? based strings - should redo the tokeniser // Build tokens by position LinkedList<LinkedList<org.apache.lucene.analysis.Token>> tokensByPosition = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); LinkedList<org.apache.lucene.analysis.Token> currentList = null; int lastStart = 0; for (org.apache.lucene.analysis.Token c : list) { if (c.startOffset() == lastStart) { if (currentList == null) { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); } currentList.add(c); } else { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); currentList.add(c); } lastStart = c.startOffset(); } // Build all the token sequences and see which ones get strung together OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>> allTokenSequencesSet = new OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokensAtPosition : tokensByPosition) { OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>> positionalSynonymSequencesSet = new OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>>(); OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>> newAllTokenSequencesSet = new OrderedHashSet<LinkedList<org.apache.lucene.analysis.Token>>(); FOR_FIRST_TOKEN_AT_POSITION_ONLY: for (org.apache.lucene.analysis.Token t : tokensAtPosition) { org.apache.lucene.analysis.Token replace = new org.apache.lucene.analysis.Token(t, t.startOffset(), t.endOffset()); replace.setType(t.type()); replace.setPositionIncrement(t.getPositionIncrement()); boolean tokenFoundSequence = false; for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequencesSet) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.addAll(tokenSequence); if ((newEntry.getLast().endOffset() == replace.endOffset()) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { if ((newEntry.getLast().startOffset() == replace.startOffset()) && newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); newEntry.add(replace); tokenFoundSequence = true; } else if (newEntry.getLast().type().equals(CommonGramsFilter.GRAM_TYPE)) { if (newEntry.toString().endsWith(replace.toString())) { // already in the gram positionalSynonymSequencesSet.add(tokenSequence); tokenFoundSequence = true; } else { // need to replace the synonym in the current gram tokenFoundSequence = true; StringBuffer old = new StringBuffer(newEntry.getLast().toString()); old.replace(replace.startOffset() - newEntry.getLast().startOffset(), replace.endOffset() - newEntry.getLast().startOffset(), replace.toString()); Token newToken = new org.apache.lucene.analysis.Token(old.toString(), newEntry.getLast().startOffset(), newEntry.getLast().endOffset()); newEntry.removeLast(); newEntry.add(newToken); } } } else if ((newEntry.getLast().startOffset() < replace.startOffset()) && (newEntry.getLast().endOffset() < replace.endOffset())) { if (newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); } newEntry.add(replace); tokenFoundSequence = true; } newAllTokenSequencesSet.add(newEntry); } if (false == tokenFoundSequence) { for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : newAllTokenSequencesSet) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.addAll(tokenSequence); if ((newEntry.getLast().endOffset() == replace.endOffset()) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { if ((newEntry.getLast().startOffset() == replace.startOffset()) && newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); newEntry.add(replace); tokenFoundSequence = true; } else if (newEntry.getLast().type().equals(CommonGramsFilter.GRAM_TYPE)) { if (newEntry.toString().endsWith(replace.toString())) { // already in the gram positionalSynonymSequencesSet.add(tokenSequence); tokenFoundSequence = true; } else { // need to replace the synonym in the current gram tokenFoundSequence = true; StringBuffer old = new StringBuffer(newEntry.getLast().toString()); old.replace(replace.startOffset() - newEntry.getLast().startOffset(), replace.endOffset() - newEntry.getLast().startOffset(), replace.toString()); Token newToken = new org.apache.lucene.analysis.Token(old.toString(), newEntry.getLast().startOffset(), newEntry.getLast().endOffset()); newEntry.removeLast(); newEntry.add(newToken); positionalSynonymSequencesSet.add(newEntry); } } } else if ((newEntry.getLast().startOffset() < replace.startOffset()) && (newEntry.getLast().endOffset() < replace.endOffset())) { if (newEntry.getLast().type().equals(SynonymFilter.TYPE_SYNONYM) && replace.type().equals(SynonymFilter.TYPE_SYNONYM)) { positionalSynonymSequencesSet.add(tokenSequence); newEntry.add(replace); tokenFoundSequence = true; } } } } if (false == tokenFoundSequence) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.add(replace); newAllTokenSequencesSet.add(newEntry); } // Limit the max number of permutations we consider if (newAllTokenSequencesSet.size() > 64) { break FOR_FIRST_TOKEN_AT_POSITION_ONLY; } } allTokenSequencesSet = newAllTokenSequencesSet; allTokenSequencesSet.addAll(positionalSynonymSequencesSet); } LinkedList<LinkedList<org.apache.lucene.analysis.Token>> allTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>( allTokenSequencesSet); // build the unique LinkedList<LinkedList<org.apache.lucene.analysis.Token>> fixedTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) { LinkedList<org.apache.lucene.analysis.Token> fixedTokenSequence = new LinkedList<org.apache.lucene.analysis.Token>(); fixedTokenSequences.add(fixedTokenSequence); org.apache.lucene.analysis.Token replace = null; for (org.apache.lucene.analysis.Token c : tokenSequence) { if (replace == null) { StringBuilder prefix = new StringBuilder(); for (int i = c.startOffset() - 1; i >= 0; i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { prefix.insert(0, test); } else { break; } } String pre = prefix.toString(); if (requiresMLTokenDuplication) { String termText = c.toString(); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); replace = new org.apache.lucene.analysis.Token(language + pre + token, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String termText = c.toString(); replace = new org.apache.lucene.analysis.Token(pre + termText, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } else { StringBuilder prefix = new StringBuilder(); StringBuilder postfix = new StringBuilder(); StringBuilder builder = prefix; for (int i = c.startOffset() - 1; i >= replace.endOffset(); i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { builder.insert(0, test); } else { builder = postfix; postfix.setLength(0); } } String pre = prefix.toString(); String post = postfix.toString(); // Does it bridge? if ((pre.length() > 0) && (replace.endOffset() + pre.length()) == c.startOffset()) { String termText = c.toString(); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); @SuppressWarnings("unused") String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = replace.toString(); replace = new org.apache.lucene.analysis.Token(replaceTermText + pre + token, replace.startOffset(), c.endOffset()); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } else { int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = replace.toString(); replace = new org.apache.lucene.analysis.Token(replaceTermText + pre + termText, replace.startOffset(), c.endOffset()); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } } else { String termText = c.toString(); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); String replaceTermText = replace.toString(); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replaceTermText + post, replace.startOffset(), replace.endOffset() + post.length()); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(language + pre + token, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String replaceTermText = replace.toString(); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replaceTermText + post, replace.startOffset(), replace.endOffset() + post.length()); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(pre + termText, c.startOffset() - pre.length(), c.endOffset()); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } } } // finish last if (replace != null) { StringBuilder postfix = new StringBuilder(); if ((replace.endOffset() >= 0) && (replace.endOffset() < testText.length())) { for (int i = replace.endOffset(); i < testText.length(); i++) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { postfix.append(test); } else { break; } } } String post = postfix.toString(); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = replace.toString(); replace = new org.apache.lucene.analysis.Token(replaceTermText + post, replace.startOffset(), replace.endOffset() + post.length()); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); fixedTokenSequence.add(replace); } } // rebuild fixed list ArrayList<org.apache.lucene.analysis.Token> fixed = new ArrayList<org.apache.lucene.analysis.Token>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { for (org.apache.lucene.analysis.Token token : tokenSequence) { fixed.add(token); } } // reorder by start position and increment Collections.sort(fixed, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); if (dif != 0) { return dif; } else { return o1.getPositionIncrement() - o2.getPositionIncrement(); } } }); // make sure we remove any tokens we have duplicated @SuppressWarnings("rawtypes") OrderedHashSet unique = new OrderedHashSet(); unique.addAll(fixed); fixed = new ArrayList<org.apache.lucene.analysis.Token>(unique); list = fixed; // add any missing locales back to the tokens if (localePrefix.length() > 0) { for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token currentToken = list.get(j); String termText = currentToken.toString(); currentToken.setEmpty(); currentToken.append(localePrefix + termText); } } SchemaField sf = schema.getField(field); TokenizerChain tokenizerChain = (sf.getType().getQueryAnalyzer() instanceof TokenizerChain) ? ((TokenizerChain) sf.getType().getQueryAnalyzer()) : null; boolean isShingled = false; if (tokenizerChain != null) { for (TokenFilterFactory factory : tokenizerChain.getTokenFilterFactories()) { if (factory instanceof ShingleFilterFactory) { isShingled = true; break; } } } AlfrescoAnalyzerWrapper analyzerWrapper = (sf.getType() .getQueryAnalyzer() instanceof AlfrescoAnalyzerWrapper) ? ((AlfrescoAnalyzerWrapper) sf.getType().getQueryAnalyzer()) : null; if (analyzerWrapper != null) { // assume if there are no term positions it is shingled .... isShingled = true; } boolean forceConjuncion = rerankPhase == RerankPhase.QUERY_PHASE; if (list.size() == 0) return null; else if (list.size() == 1) { nextToken = list.get(0); String termText = nextToken.toString(); if (!isNumeric && (termText.contains("*") || termText.contains("?"))) { return newWildcardQuery(new Term(field, termText)); } else { return newTermQuery(new Term(field, termText)); } } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = newBooleanQuery(true); for (int i = 0; i < list.size(); i++) { Query currentQuery; nextToken = list.get(i); String termText = nextToken.toString(); if (termText.contains("*") || termText.contains("?")) { currentQuery = newWildcardQuery(new Term(field, termText)); } else { currentQuery = newTermQuery(new Term(field, termText)); } q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else if (forceConjuncion) { BooleanQuery or = new BooleanQuery(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { BooleanQuery and = new BooleanQuery(); for (int i = 0; i < tokenSequence.size(); i++) { nextToken = (org.apache.lucene.analysis.Token) tokenSequence.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); and.add(wildQuery, Occur.MUST); } else { TermQuery termQuery = new TermQuery(term); and.add(termQuery, Occur.MUST); } } if (and.clauses().size() > 0) { or.add(and, Occur.SHOULD); } } return or; } // shingle else if (sf.omitPositions() && isShingled) { ArrayList<org.apache.lucene.analysis.Token> nonContained = getNonContained(list); Query currentQuery; BooleanQuery weakPhrase = new BooleanQuery(); for (org.apache.lucene.analysis.Token shingleToken : nonContained) { String termText = shingleToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { currentQuery = new org.apache.lucene.search.WildcardQuery(term); } else { currentQuery = new TermQuery(term); } weakPhrase.add(currentQuery, Occur.MUST); } return weakPhrase; } // Consider if we can use a multi-phrase query (e.g for synonym use rather then WordDelimiterFilterFactory) else if (canUseMultiPhraseQuery(fixedTokenSequences)) { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(internalSlop); ArrayList<Term> multiTerms = new ArrayList<Term>(); int position = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { throw new IllegalStateException("Wildcards are not allowed in multi phrase anymore"); } else { multiTerms.add(term); } if (nextToken.getPositionIncrement() > 0 && multiTerms.size() > 0) { if (getEnablePositionIncrements()) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } checkTermCount(field, queryText, mpq); multiTerms.clear(); } position += nextToken.getPositionIncrement(); } if (getEnablePositionIncrements()) { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0]), position); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }, position); // } } else { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0])); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }); // } } checkTermCount(field, queryText, mpq); return mpq; } // Word delimiter factory and other odd things generate complex token patterns // Smart skip token sequences with small tokens that generate toomany wildcards // Fall back to the larger pattern // e.g Site1* will not do (S ite 1*) or (Site 1*) if 1* matches too much (S ite1*) and (Site1*) will still be OK // If we skip all (for just 1* in the input) this is still an issue. else { return generateSpanOrQuery(field, fixedTokenSequences); } } else { if (forceConjuncion) { BooleanQuery or = new BooleanQuery(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { BooleanQuery and = new BooleanQuery(); for (int i = 0; i < tokenSequence.size(); i++) { nextToken = (org.apache.lucene.analysis.Token) tokenSequence.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); and.add(wildQuery, Occur.MUST); } else { TermQuery termQuery = new TermQuery(term); and.add(termQuery, Occur.MUST); } } if (and.clauses().size() > 0) { or.add(and, Occur.SHOULD); } } return or; } else { SpanQuery spanQuery = null; SpanOrQuery atSamePosition = new SpanOrQuery(); int gap = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = nextToken.toString(); Term term = new Term(field, termText); if (getEnablePositionIncrements()) { SpanQuery nextSpanQuery; if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); SpanMultiTermQueryWrapper wrapper = new SpanMultiTermQueryWrapper<>(wildQuery); wrapper.setRewriteMethod( new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit)); nextSpanQuery = wrapper; } else { nextSpanQuery = new SpanTermQuery(term); } if (gap == 0) { atSamePosition.addClause(nextSpanQuery); } else { if (atSamePosition.getClauses().length == 0) { if (spanQuery == null) { spanQuery = nextSpanQuery; } else { spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, nextSpanQuery }, (gap - 1) + internalSlop, internalSlop < 2); } atSamePosition = new SpanOrQuery(); } else if (atSamePosition.getClauses().length == 1) { if (spanQuery == null) { spanQuery = atSamePosition.getClauses()[0]; } else { spanQuery = new SpanNearQuery( new SpanQuery[] { spanQuery, atSamePosition.getClauses()[0] }, (gap - 1) + internalSlop, internalSlop < 2); } atSamePosition = new SpanOrQuery(); atSamePosition.addClause(nextSpanQuery); } else { if (spanQuery == null) { spanQuery = atSamePosition; } else { spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePosition }, (gap - 1) + internalSlop, internalSlop < 2); } atSamePosition = new SpanOrQuery(); atSamePosition.addClause(nextSpanQuery); } } gap = nextToken.getPositionIncrement(); } else { SpanQuery nextSpanQuery; if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery( term); SpanMultiTermQueryWrapper wrapper = new SpanMultiTermQueryWrapper<>(wildQuery); wrapper.setRewriteMethod( new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit)); nextSpanQuery = wrapper; } else { nextSpanQuery = new SpanTermQuery(term); } if (spanQuery == null) { spanQuery = new SpanOrQuery(); ((SpanOrQuery) spanQuery).addClause(nextSpanQuery); } else { ((SpanOrQuery) spanQuery).addClause(nextSpanQuery); } } } if (atSamePosition.getClauses().length == 0) { return spanQuery; } else if (atSamePosition.getClauses().length == 1) { if (spanQuery == null) { spanQuery = atSamePosition.getClauses()[0]; } else { spanQuery = new SpanNearQuery( new SpanQuery[] { spanQuery, atSamePosition.getClauses()[0] }, (gap - 1) + internalSlop, internalSlop < 2); } return spanQuery; } else { if (spanQuery == null) { spanQuery = atSamePosition; } else { spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePosition }, (gap - 1) + internalSlop, internalSlop < 2); } return spanQuery; } } } } }