List of usage examples for java.util TreeMap get
public V get(Object key)
From source file:com.lp.server.fertigung.ejbfac.FertigungFacBean.java
private TreeMap<String, Object[]> add2TreeMap(TreeMap<String, Object[]> tm, String key, Object[] zeile) { if (tm.containsKey(key)) { Object[] zeileVorhanden = tm.get(key); BigDecimal bdMenge = (BigDecimal) zeileVorhanden[StuecklisteReportFac.REPORT_STUECKLISTE_LOSEAKTUALISIERT_KORREKTUR_AUSGABEMENGE]; bdMenge = bdMenge.add(//from w ww. j a v a 2s. c om (BigDecimal) zeile[StuecklisteReportFac.REPORT_STUECKLISTE_LOSEAKTUALISIERT_KORREKTUR_AUSGABEMENGE]); zeileVorhanden[StuecklisteReportFac.REPORT_STUECKLISTE_LOSEAKTUALISIERT_KORREKTUR_AUSGABEMENGE] = bdMenge; BigDecimal bdSollMenge = (BigDecimal) zeileVorhanden[StuecklisteReportFac.REPORT_STUECKLISTE_LOSEAKTUALISIERT_KORREKTUR_SOLLMENGE]; bdSollMenge = bdSollMenge.add( (BigDecimal) zeile[StuecklisteReportFac.REPORT_STUECKLISTE_LOSEAKTUALISIERT_KORREKTUR_SOLLMENGE]); zeileVorhanden[StuecklisteReportFac.REPORT_STUECKLISTE_LOSEAKTUALISIERT_KORREKTUR_SOLLMENGE] = bdSollMenge; if (bdMenge.doubleValue() == 0 && bdSollMenge.doubleValue() == 0) { tm.remove(key); } else { tm.put(key, zeileVorhanden); } } else { tm.put(key, zeile); } return tm; }
From source file:com.joliciel.talismane.posTagger.PosTaggerImpl.java
@Override public List<PosTagSequence> tagSentence(List<TokenSequence> tokenSequences) { MONITOR.startTask("tagSentence"); try {//from ww w . j a v a2 s .c o m MONITOR.startTask("apply filters"); try { for (TokenSequence tokenSequence : tokenSequences) { for (TokenSequenceFilter tokenFilter : this.preProcessingFilters) { tokenFilter.apply(tokenSequence); } } } finally { MONITOR.endTask("apply filters"); } int sentenceLength = tokenSequences.get(0).getText().length(); TreeMap<Double, PriorityQueue<PosTagSequence>> heaps = new TreeMap<Double, PriorityQueue<PosTagSequence>>(); PriorityQueue<PosTagSequence> heap0 = new PriorityQueue<PosTagSequence>(); for (TokenSequence tokenSequence : tokenSequences) { // add an empty PosTagSequence for each token sequence PosTagSequence emptySequence = this.getPosTaggerService().getPosTagSequence(tokenSequence, 0); emptySequence.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(emptySequence); } heaps.put(0.0, heap0); PriorityQueue<PosTagSequence> finalHeap = null; while (heaps.size() > 0) { Entry<Double, PriorityQueue<PosTagSequence>> heapEntry = heaps.pollFirstEntry(); if (LOG.isTraceEnabled()) { LOG.trace("heap key: " + heapEntry.getKey() + ", sentence length: " + sentenceLength); } if (heapEntry.getKey() == sentenceLength) { finalHeap = heapEntry.getValue(); break; } PriorityQueue<PosTagSequence> previousHeap = heapEntry.getValue(); // limit the breadth to K int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { PosTagSequence history = previousHeap.poll(); Token token = history.getNextToken(); if (LOG.isTraceEnabled()) { LOG.trace("#### Next history ( " + heapEntry.getKey() + "): " + history.toString()); LOG.trace("Prob: " + df.format(history.getScore())); LOG.trace("Token: " + token.getText()); StringBuilder sb = new StringBuilder(); for (Token oneToken : history.getTokenSequence().listWithWhiteSpace()) { if (oneToken.equals(token)) sb.append("[" + oneToken + "]"); else sb.append(oneToken); } LOG.trace(sb.toString()); } PosTaggerContext context = this.getPosTaggerFeatureService().getContext(token, history); List<Decision<PosTag>> decisions = new ArrayList<Decision<PosTag>>(); // test the positive rules on the current token boolean ruleApplied = false; if (posTaggerPositiveRules != null) { MONITOR.startTask("check rules"); try { for (PosTaggerRule rule : posTaggerPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<PosTag> positiveRuleDecision = TalismaneSession.getPosTagSet() .createDefaultDecision(rule.getTag()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting posTag to: " + rule.getTag().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current token List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) { MONITOR.startTask(posTaggerFeature.getCollectionName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = posTaggerFeature.check(context, env); if (featureResult != null) featureResults.add(featureResult); } finally { MONITOR.endTask(posTaggerFeature.getCollectionName()); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> result : featureResults) { LOG.trace(result.toString()); } } } finally { MONITOR.endTask("analyse features"); } // evaluate the feature results using the maxent model MONITOR.startTask("make decision"); decisions = this.decisionMaker.decide(featureResults); MONITOR.endTask("make decision"); for (ClassificationObserver<PosTag> observer : this.observers) { observer.onAnalyse(token, featureResults, decisions); } // apply the negative rules Set<PosTag> eliminatedPosTags = new TreeSet<PosTag>(); if (posTaggerNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (PosTaggerRule rule : posTaggerNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedPosTags.add(rule.getTag()); if (LOG.isTraceEnabled()) { LOG.trace( "Rule applies. Eliminating posTag: " + rule.getTag().getCode()); } } } if (eliminatedPosTags.size() > 0) { List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (!eliminatedPosTags.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } // is this a known word in the lexicon? MONITOR.startTask("apply constraints"); try { if (LOG.isTraceEnabled()) { String posTags = ""; for (PosTag onePosTag : token.getPossiblePosTags()) { posTags += onePosTag.getCode() + ","; } LOG.trace("Token: " + token.getText() + ". PosTags: " + posTags); } List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (decision.getProbability() >= MIN_PROB_TO_STORE) { decisionShortList.add(decision); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } } finally { MONITOR.endTask("apply constraints"); } } // has a rule been applied? // add new TaggedTokenSequences to the heap, one for each outcome provided by MaxEnt MONITOR.startTask("heap sort"); for (Decision<PosTag> decision : decisions) { if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + decision.getOutcome() + ", " + decision.getProbability()); PosTaggedToken posTaggedToken = this.getPosTaggerService().getPosTaggedToken(token, decision); PosTagSequence sequence = this.getPosTaggerService().getPosTagSequence(history); sequence.addPosTaggedToken(posTaggedToken); if (decision.isStatistical()) sequence.addDecision(decision); double heapIndex = token.getEndIndex(); // add another half for an empty token, to differentiate it from regular ones if (token.getStartIndex() == token.getEndIndex()) heapIndex += 0.5; // if it's the last token, make sure we end if (token.getIndex() == sequence.getTokenSequence().size() - 1) heapIndex = sentenceLength; if (LOG.isTraceEnabled()) LOG.trace("Heap index: " + heapIndex); PriorityQueue<PosTagSequence> heap = heaps.get(heapIndex); if (heap == null) { heap = new PriorityQueue<PosTagSequence>(); heaps.put(heapIndex, heap); } heap.add(sequence); } // next outcome for this token MONITOR.endTask("heap sort"); } // next history } // next atomic index // return the best sequence on the heap List<PosTagSequence> sequences = new ArrayList<PosTagSequence>(); int i = 0; while (!finalHeap.isEmpty()) { sequences.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } // apply post-processing filters LOG.debug("####Final postag sequences:"); int j = 1; for (PosTagSequence sequence : sequences) { if (LOG.isDebugEnabled()) { LOG.debug("Sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Sequence before filters: " + sequence); } for (PosTagSequenceFilter filter : this.postProcessingFilters) filter.apply(sequence); if (LOG.isDebugEnabled()) { LOG.debug("Sequence after filters: " + sequence); } } return sequences; } finally { MONITOR.endTask("tagSentence"); } }
From source file:gov.vha.isaac.ochre.impl.sememe.DynamicSememeUsageDescription.java
/** * Read the RefexUsageDescription data from the database for a given nid. * /*from w ww.j av a 2s .co m*/ * Note that most users should call {@link #read(int)} instead, as that utilizes a cache. * This always reads directly from the DB. * * @param refexUsageDescriptorSequence * @throws IOException * @throws ContradictionException */ @SuppressWarnings("unchecked") public DynamicSememeUsageDescription(int refexUsageDescriptorSequence) { refexUsageDescriptorSequence_ = refexUsageDescriptorSequence; TreeMap<Integer, DynamicSememeColumnInfo> allowedColumnInfo = new TreeMap<>(); ConceptChronology<?> assemblageConcept = Get.conceptService().getConcept(refexUsageDescriptorSequence_); for (SememeChronology<? extends DescriptionSememe<?>> descriptionSememe : assemblageConcept .getConceptDescriptionList()) { @SuppressWarnings("rawtypes") Optional<LatestVersion<DescriptionSememe<?>>> descriptionVersion = ((SememeChronology) descriptionSememe) .getLatestVersion(DescriptionSememe.class, StampCoordinates.getDevelopmentLatestActiveOnly()); if (descriptionVersion.isPresent()) { @SuppressWarnings("rawtypes") DescriptionSememe ds = descriptionVersion.get().value(); if (ds.getDescriptionTypeConceptSequence() == IsaacMetadataAuxiliaryBinding.DEFINITION_DESCRIPTION_TYPE .getConceptSequence()) { Optional<SememeChronology<? extends SememeVersion<?>>> nestesdSememe = Get.sememeService() .getSememesForComponentFromAssemblage(ds.getNid(), IsaacMetadataConstants.DYNAMIC_SEMEME_DEFINITION_DESCRIPTION.getSequence()) .findAny(); if (nestesdSememe.isPresent()) { sememeUsageDescription_ = ds.getText(); } ; } if (ds.getDescriptionTypeConceptSequence() == IsaacMetadataAuxiliaryBinding.FULLY_SPECIFIED_NAME .getConceptSequence()) { name_ = ds.getText(); } if (sememeUsageDescription_ != null && name_ != null) { break; } } } if (StringUtils.isEmpty(sememeUsageDescription_)) { throw new RuntimeException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. It must contain a description of type Definition with an annotation of type " + "DynamicSememe.DYNAMIC_SEMEME_DEFINITION_DESCRIPTION"); } Get.sememeService().getSememesForComponent(assemblageConcept.getNid()).forEach(sememe -> { if (sememe.getSememeType() == SememeType.DYNAMIC) { @SuppressWarnings("rawtypes") Optional<LatestVersion<? extends DynamicSememe>> sememeVersion = ((SememeChronology) sememe) .getLatestVersion(DynamicSememe.class, StampCoordinates.getDevelopmentLatestActiveOnly()); if (sememeVersion.isPresent()) { @SuppressWarnings("rawtypes") DynamicSememe ds = sememeVersion.get().value(); DynamicSememeDataBI[] refexDefinitionData = ds.getData(); if (sememe.getAssemblageSequence() == IsaacMetadataConstants.DYNAMIC_SEMEME_EXTENSION_DEFINITION .getSequence()) { if (refexDefinitionData == null || refexDefinitionData.length < 3 || refexDefinitionData.length > 7) { throw new RuntimeException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. It must contain at least 3 columns in the DynamicSememeDataBI attachment, and no more than 7."); } //col 0 is the column number, //col 1 is the concept with col name //col 2 is the column data type, stored as a string. //col 3 (if present) is the default column data, stored as a subtype of DynamicSememeDataBI //col 4 (if present) is a boolean field noting whether the column is required (true) or optional (false or null) //col 5 (if present) is the validator {@link DynamicSememeValidatorType}, stored as a string array. //col 6 (if present) is the validatorData for the validator in column 5, stored as a subtype of DynamicSememeDataBI try { int column = (Integer) refexDefinitionData[0].getDataObject(); UUID descriptionUUID = (UUID) refexDefinitionData[1].getDataObject(); DynamicSememeDataType type = DynamicSememeDataType .valueOf((String) refexDefinitionData[2].getDataObject()); DynamicSememeDataBI defaultData = null; if (refexDefinitionData.length > 3) { defaultData = (refexDefinitionData[3] == null ? null : refexDefinitionData[3]); } if (defaultData != null && type.getDynamicSememeMemberClass() != refexDefinitionData[3] .getDynamicSememeDataType().getDynamicSememeMemberClass()) { throw new IOException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. The type of the column (column 3) must match the type of the defaultData (column 4)"); } Boolean columnRequired = null; if (refexDefinitionData.length > 4) { columnRequired = (refexDefinitionData[4] == null ? null : (Boolean) refexDefinitionData[4].getDataObject()); } DynamicSememeValidatorType[] validators = null; DynamicSememeDataBI[] validatorsData = null; if (refexDefinitionData.length > 5) { if (refexDefinitionData[5] != null && ((DynamicSememeArrayBI<DynamicSememeStringBI>) refexDefinitionData[5]) .getDataArray().length > 0) { DynamicSememeArrayBI<DynamicSememeStringBI> readValidators = (DynamicSememeArrayBI<DynamicSememeStringBI>) refexDefinitionData[5]; validators = new DynamicSememeValidatorType[readValidators .getDataArray().length]; for (int i = 0; i < validators.length; i++) { validators[i] = DynamicSememeValidatorType .valueOf((String) readValidators.getDataArray()[i].getDataObject()); } } if (refexDefinitionData.length > 6) { if (refexDefinitionData[6] != null && ((DynamicSememeArrayBI<? extends DynamicSememeDataBI>) refexDefinitionData[6]) .getDataArray().length > 0) { DynamicSememeArrayBI<? extends DynamicSememeDataBI> readValidatorsData = (DynamicSememeArrayBI<? extends DynamicSememeDataBI>) refexDefinitionData[6]; validatorsData = new DynamicSememeDataBI[readValidatorsData .getDataArray().length]; for (int i = 0; i < validators.length; i++) { if (readValidatorsData.getDataArray()[i] != null) { validatorsData[i] = readValidatorsData.getDataArray()[i]; } else { validatorsData[i] = null; } } } } } allowedColumnInfo.put(column, new DynamicSememeColumnInfo(assemblageConcept.getPrimordialUuid(), column, descriptionUUID, type, defaultData, columnRequired, validators, validatorsData)); } catch (Exception e) { throw new RuntimeException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. The first column must have a data type of integer, and the third column must be a string " + "that is parseable as a DynamicSememeDataType"); } } else if (sememe .getAssemblageSequence() == IsaacMetadataConstants.DYNAMIC_SEMEME_REFERENCED_COMPONENT_RESTRICTION .getSequence()) { if (refexDefinitionData == null || refexDefinitionData.length < 1) { throw new RuntimeException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. If it contains a " + IsaacMetadataConstants.DYNAMIC_SEMEME_REFERENCED_COMPONENT_RESTRICTION .getFSN() + " then it must contain a single column of data, of type string, parseable as a " + ObjectChronologyType.class.getName()); } //col 0 is Referenced component restriction information - as a string. try { ObjectChronologyType type = ObjectChronologyType .parse(refexDefinitionData[0].getDataObject().toString()); if (type == ObjectChronologyType.UNKNOWN_NID) { //just ignore - it shouldn't have been saved that way anyway. } else { referencedComponentTypeRestriction_ = type; } } catch (Exception e) { throw new RuntimeException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. The component type restriction annotation has an invalid value"); } //col 1 is an optional Referenced component sub-restriction information - as a string. if (refexDefinitionData.length > 1 && refexDefinitionData[1] != null) { try { SememeType type = SememeType .parse(refexDefinitionData[1].getDataObject().toString()); if (type == SememeType.UNKNOWN) { //just ignore - it shouldn't have been saved that way anyway. } else { referencedComponentTypeSubRestriction_ = type; } } catch (Exception e) { throw new RuntimeException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. The component type restriction annotation has an invalid value"); } } else { referencedComponentTypeSubRestriction_ = null; } } } } }); refexColumnInfo_ = new DynamicSememeColumnInfo[allowedColumnInfo.size()]; int i = 0; for (int key : allowedColumnInfo.keySet()) { if (key != i) { throw new RuntimeException("The Assemblage concept: " + assemblageConcept + " is not correctly assembled for use as an Assemblage for " + "a DynamicSememeData Refex Type. It must contain sequential column numbers, with no gaps, which start at 0."); } refexColumnInfo_[i++] = allowedColumnInfo.get(key); } }
From source file:net.spfbl.http.ServerHTTP.java
private static String getControlPanel(Locale locale, User user, Long begin, String filter) { StringBuilder builder = new StringBuilder(); if (begin == null && filter == null) { // builder.append("<!DOCTYPE html>\n"); builder.append("<html lang=\""); builder.append(locale.getLanguage()); builder.append("\">\n"); builder.append(" <head>\n"); builder.append(" <meta charset=\"UTF-8\">\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append(" <title>Painel de controle do SPFBL</title>\n"); } else {//w w w.j ava 2 s .co m builder.append(" <title>SPFBL control panel</title>\n"); } // Styled page. builder.append(" <style type=\"text/css\">\n"); builder.append(" body {\n"); builder.append(" margin:180px 0px 30px 0px;\n"); builder.append(" background:lightgray;\n"); builder.append(" }\n"); builder.append(" iframe {\n"); builder.append(" border-width: 0px 0px 0px 0px;\n"); builder.append(" width:100%;\n"); builder.append(" height:150px;\n"); builder.append(" }\n"); builder.append(" .header {\n"); builder.append(" background-color:lightgray;\n"); builder.append(" border-width: 0px 0px 0px 0px;\n"); builder.append(" position:fixed;\n"); builder.append(" top:0px;\n"); builder.append(" margin:auto;\n"); builder.append(" z-index:1;\n"); builder.append(" width:100%;\n"); builder.append(" height:180px;\n"); builder.append(" }\n"); builder.append(" .bottom {\n"); builder.append(" background-color:lightgray;\n"); builder.append(" border-width: 0px 0px 0px 0px;\n"); builder.append(" position:fixed;\n"); builder.append(" bottom:0px;\n"); builder.append(" margin:auto;\n"); builder.append(" z-index:1;\n"); builder.append(" width:100%;\n"); builder.append(" height:30px;\n"); builder.append(" }\n"); builder.append(" .button {\n"); builder.append(" background-color: #4CAF50;\n"); builder.append(" border: none;\n"); builder.append(" color: white;\n"); builder.append(" padding: 16px 32px;\n"); builder.append(" text-align: center;\n"); builder.append(" text-decoration: none;\n"); builder.append(" display: inline-block;\n"); builder.append(" font-size: 16px;\n"); builder.append(" margin: 4px 2px;\n"); builder.append(" -webkit-transition-duration: 0.4s;\n"); builder.append(" transition-duration: 0.4s;\n"); builder.append(" cursor: pointer;\n"); builder.append(" }\n"); builder.append(" .sender {\n"); builder.append(" background-color: white; \n"); builder.append(" color: black; \n"); builder.append(" border: 2px solid #008CBA;\n"); builder.append(" width: 100%;\n"); builder.append(" word-wrap: break-word;\n"); builder.append(" }\n"); builder.append(" .sender:hover {\n"); builder.append(" background-color: #008CBA;\n"); builder.append(" color: white;\n"); builder.append(" }\n"); builder.append(" .highlight {\n"); builder.append(" background: #b4b9d2;\n"); builder.append(" color:black;\n"); builder.append(" border-top: 1px solid #22262e;\n"); builder.append(" border-bottom: 1px solid #22262e;\n"); builder.append(" }\n"); builder.append(" .highlight:nth-child(odd) td {\n"); builder.append(" background: #b4b9d2;\n"); builder.append(" }\n"); builder.append(" .click {\n"); builder.append(" cursor:pointer;\n"); builder.append(" cursor:hand;\n"); builder.append(" }\n"); builder.append(" table {\n"); builder.append(" background: white;\n"); builder.append(" table-layout:fixed;\n"); builder.append(" border-collapse: collapse;\n"); builder.append(" word-wrap:break-word;\n"); builder.append(" border-radius:3px;\n"); builder.append(" border-collapse: collapse;\n"); builder.append(" margin: auto;\n"); builder.append(" padding:2px;\n"); builder.append(" width: 100%;\n"); builder.append(" box-shadow: 0 5px 10px rgba(0, 0, 0, 0.1);\n"); builder.append(" animation: float 5s infinite;\n"); builder.append(" }\n"); builder.append(" th {\n"); builder.append(" color:#FFFFFF;;\n"); builder.append(" background:#1b1e24;\n"); builder.append(" border-bottom:4px solid #9ea7af;\n"); builder.append(" border-right: 0px;\n"); builder.append(" font-size:16px;\n"); builder.append(" font-weight: bold;\n"); builder.append(" padding:4px;\n"); builder.append(" text-align:left;\n"); builder.append(" text-shadow: 0 1px 1px rgba(0, 0, 0, 0.1);\n"); builder.append(" vertical-align:middle;\n"); builder.append(" height:30px;\n"); builder.append(" }\n"); builder.append(" tr {\n"); builder.append(" border-top: 1px solid #C1C3D1;\n"); builder.append(" border-bottom-: 1px solid #C1C3D1;\n"); builder.append(" font-size:16px;\n"); builder.append(" font-weight:normal;\n"); builder.append(" text-shadow: 0 1px 1px rgba(256, 256, 256, 0.1);\n"); builder.append(" }\n"); builder.append(" tr:nth-child(odd) td {\n"); builder.append(" background:#EBEBEB;\n"); builder.append(" }\n"); builder.append(" td {\n"); builder.append(" padding:2px;\n"); builder.append(" vertical-align:middle;\n"); builder.append(" font-size:16px;\n"); builder.append(" text-shadow: -1px -1px 1px rgba(0, 0, 0, 0.1);\n"); builder.append(" border-right: 1px solid #C1C3D1;\n"); builder.append(" }\n"); builder.append(" input[type=text], select {\n"); builder.append(" width: 400px;\n"); builder.append(" padding: 0px 4px;\n"); builder.append(" margin: 1px 0;\n"); builder.append(" display: inline-block;\n"); builder.append(" background: #b4b9d2;\n"); builder.append(" border: 1px solid #ccc;\n"); builder.append(" border-radius: 4px;\n"); builder.append(" box-sizing: border-box;\n"); builder.append(" }\n"); builder.append(" </style>\n"); // JavaScript functions. TreeMap<Long, Query> queryMap = user.getQueryMap(null, null); builder.append( " <script type=\"text/javascript\" src=\"https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js\"></script>\n"); builder.append(" <script type=\"text/javascript\">\n"); builder.append(" window.onbeforeunload = function () {\n"); builder.append(" document.getElementById('filterField').value = '';\n"); builder.append(" window.scrollTo(0, 0);\n"); builder.append(" }\n"); builder.append(" var last = "); if (queryMap.isEmpty()) { builder.append(0); } else { builder.append(queryMap.lastKey()); } builder.append(";\n"); builder.append(" var filterText = '';\n"); builder.append(" function view(query) {\n"); builder.append(" if (query == undefined || query == 0) {\n"); builder.append(" var viewer = document.getElementById('viewer');\n"); builder.append(" viewer.src = 'about:blank';\n"); builder.append(" last = 0;\n"); builder.append(" } else if (last != query) {\n"); builder.append(" var viewer = document.getElementById('viewer');\n"); builder.append(" viewer.addEventListener('load', function() {\n"); builder.append(" if (document.getElementById(last)) {\n"); builder.append(" document.getElementById(last).className = 'tr';\n"); builder.append(" document.getElementById(last).className = 'click';\n"); builder.append(" }\n"); builder.append(" document.getElementById(query).className = 'highlight';\n"); builder.append(" last = query;\n"); builder.append(" });\n"); builder.append(" viewer.src = '"); builder.append(Core.getURL()); builder.append("' + query;\n"); builder.append(" }\n"); builder.append(" }\n"); builder.append(" function more(query) {\n"); builder.append(" var rowMore = document.getElementById('rowMore');\n"); builder.append(" rowMore.onclick = '';\n"); builder.append(" rowMore.className = 'tr';\n"); builder.append(" var columnMore = document.getElementById('columnMore');\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append(" columnMore.innerHTML = 'carregando mais registros';\n"); } else { builder.append(" columnMore.innerHTML = 'loading more records';\n"); } builder.append(" $.post(\n"); builder.append(" '"); builder.append(Core.getURL()); builder.append(user.getEmail()); builder.append("',\n"); builder.append(" {filter:filterText,begin:query},\n"); builder.append(" function(data, status) {\n"); builder.append(" if (status == 'success') {\n"); builder.append(" rowMore.parentNode.removeChild(rowMore);\n"); builder.append(" $('#tableBody').append(data);\n"); builder.append(" } else {\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " alert('Houve uma falha de sistema ao tentar realizar esta operao.');\n"); } else { builder.append( " alert('There was a system crash while trying to perform this operation.');\n"); } builder.append(" }\n"); builder.append(" }\n"); builder.append(" );\n"); builder.append(" }\n"); builder.append(" function refresh() {\n"); builder.append(" filterText = document.getElementById('filterField').value;\n"); builder.append(" $.post(\n"); builder.append(" '"); builder.append(Core.getURL()); builder.append(user.getEmail()); builder.append("',\n"); builder.append(" {filter:filterText},\n"); builder.append(" function(data, status) {\n"); builder.append(" if (status == 'success') {\n"); builder.append(" $('#tableBody').html(data);\n"); builder.append(" view($('#tableBody tr').attr('id'));\n"); builder.append(" } else {\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " alert('Houve uma falha de sistema ao tentar realizar esta operao.');\n"); } else { builder.append( " alert('There was a system crash while trying to perform this operation.');\n"); } builder.append(" }\n"); builder.append(" }\n"); builder.append(" );\n"); builder.append(" }\n"); builder.append(" </script>\n"); builder.append(" </head>\n"); // Body. builder.append(" <body>\n"); builder.append(" <div class=\"header\">\n"); if (queryMap.isEmpty()) { builder.append(" <iframe id=\"viewer\" src=\"about:blank\"></iframe>\n"); } else { builder.append(" <iframe id=\"viewer\" src=\""); builder.append(Core.getURL()); builder.append(queryMap.lastKey()); builder.append("\"></iframe>\n"); } // Construo da tabela de consultas. builder.append(" <table>\n"); builder.append(" <thead>\n"); builder.append(" <tr>\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append(" <th style=\"width:120px;\">Recepo</th>\n"); builder.append(" <th>Origem</th>\n"); builder.append(" <th>Remetente</th>\n"); builder.append(" <th>Contedo</th>\n"); builder.append(" <th>Entrega</th>\n"); } else { builder.append(" <th style=\"width:160px;\">Reception</th>\n"); builder.append(" <th style=\"width:auto;\">Source</th>\n"); builder.append(" <th style=\"width:auto;\">Sender</th>\n"); builder.append(" <th style=\"width:auto;\">Content</th>\n"); builder.append(" <th style=\"width:auto;\">Delivery</th>\n"); } builder.append(" </tr>\n"); builder.append(" </thead>\n"); builder.append(" </table>\n"); builder.append(" </div>\n"); if (queryMap.isEmpty()) { builder.append(" <table>\n"); builder.append(" <tbody>\n"); builder.append(" <tr>\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " <td colspan=\"5\" align=\"center\">nenhum registro encontrado</td>\n"); } else { builder.append(" <td colspan=\"5\" align=\"center\">no records found</td>\n"); } builder.append(" </tr>\n"); builder.append(" </tbody>\n"); builder.append(" </table>\n"); } else { DateFormat dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale); GregorianCalendar calendar = new GregorianCalendar(); Long nextQuery = null; while (queryMap.size() > User.QUERY_MAX_ROWS) { nextQuery = queryMap.pollFirstEntry().getKey(); } builder.append(" <table>\n"); builder.append(" <tbody id=\"tableBody\">\n"); for (Long time : queryMap.descendingKeySet()) { User.Query query = queryMap.get(time); boolean highlight = time.equals(queryMap.lastKey()); buildQueryRow(locale, builder, dateFormat, calendar, time, query, highlight); } if (nextQuery == null) { builder.append(" <tr>\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " <td colspan=\"5\" align=\"center\">no foram encontrados outros registros</td>\n"); } else { builder.append(" <td colspan=\"5\" align=\"center\">no more records found</td>\n"); } builder.append(" </tr>\n"); } else { builder.append(" <tr id=\"rowMore\" class=\"click\" onclick=\"more('"); builder.append(nextQuery); builder.append("')\">\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " <td id=\"columnMore\" colspan=\"5\" align=\"center\">clique para ver mais registros</td>\n"); } else { builder.append( " <td id=\"columnMore\" colspan=\"5\" align=\"center\">click to see more records</td>\n"); } builder.append(" </tr>\n"); } builder.append(" </tbody>\n"); builder.append(" </table>\n"); } builder.append(" <div class=\"bottom\">\n"); builder.append(" <table>\n"); builder.append(" <tr>\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " <th>Pesquisar <input type=\"text\" id=\"filterField\" name=\"filterField\" onkeydown=\"if (event.keyCode == 13) refresh();\" autofocus></th>\n"); } else { builder.append( " <th>Search <input type=\"text\" id=\"filterField\" name=\"filterField\" onkeydown=\"if (event.keyCode == 13) refresh();\" autofocus></th>\n"); } builder.append(" <th style=\"text-align:right;\"><small>"); builder.append( "Powered by <a target=\"_blank\" href=\"http://spfbl.net/\" style=\"color: #b4b9d2;\">SPFBL.net</a></small>"); builder.append("</th>\n"); builder.append(" </tr>\n"); builder.append(" <table>\n"); builder.append(" </div>\n"); builder.append(" </body>\n"); builder.append("</html>\n"); } else { TreeMap<Long, Query> queryMap = user.getQueryMap(begin, filter); if (queryMap.isEmpty()) { builder.append(" <tr>\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " <td colspan=\"5\" align=\"center\">nenhum registro encontrado</td>\n"); } else { builder.append(" <td colspan=\"5\" align=\"center\">no records found</td>\n"); } builder.append(" </tr>\n"); } else { DateFormat dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale); GregorianCalendar calendar = new GregorianCalendar(); Long nextQuery = null; while (queryMap.size() > User.QUERY_MAX_ROWS) { nextQuery = queryMap.pollFirstEntry().getKey(); } for (Long time : queryMap.descendingKeySet()) { User.Query query = queryMap.get(time); buildQueryRow(locale, builder, dateFormat, calendar, time, query, false); } if (nextQuery == null) { builder.append(" <tr>\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " <td colspan=\"5\" align=\"center\">no foram encontrados outros registros</td>\n"); } else { builder.append(" <td colspan=\"5\" align=\"center\">no more records found</td>\n"); } builder.append(" </tr>\n"); } else { builder.append(" <tr id=\"rowMore\" class=\"click\" onclick=\"more('"); builder.append(nextQuery); builder.append("')\">\n"); if (locale.getLanguage().toLowerCase().equals("pt")) { builder.append( " <td id=\"columnMore\" colspan=\"5\" align=\"center\">clique para ver mais registros</td>\n"); } else { builder.append( " <td id=\"columnMore\" colspan=\"5\" align=\"center\">click to see more records</td>\n"); } builder.append(" </tr>\n"); } } } return builder.toString(); }
From source file:com.joliciel.talismane.parser.TransitionBasedParserImpl.java
@Override public List<ParseConfiguration> parseSentence(List<PosTagSequence> posTagSequences) { MONITOR.startTask("parseSentence"); try {//www . j a v a 2 s . c o m long startTime = (new Date()).getTime(); int maxAnalysisTimeMilliseconds = maxAnalysisTimePerSentence * 1000; int minFreeMemoryBytes = minFreeMemory * KILOBYTE; TokenSequence tokenSequence = posTagSequences.get(0).getTokenSequence(); TreeMap<Integer, PriorityQueue<ParseConfiguration>> heaps = new TreeMap<Integer, PriorityQueue<ParseConfiguration>>(); PriorityQueue<ParseConfiguration> heap0 = new PriorityQueue<ParseConfiguration>(); for (PosTagSequence posTagSequence : posTagSequences) { // add an initial ParseConfiguration for each postag sequence ParseConfiguration initialConfiguration = this.getParserServiceInternal() .getInitialConfiguration(posTagSequence); initialConfiguration.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(initialConfiguration); if (LOG.isDebugEnabled()) { LOG.debug("Adding initial posTagSequence: " + posTagSequence); } } heaps.put(0, heap0); PriorityQueue<ParseConfiguration> backupHeap = null; PriorityQueue<ParseConfiguration> finalHeap = null; PriorityQueue<ParseConfiguration> terminalHeap = new PriorityQueue<ParseConfiguration>(); while (heaps.size() > 0) { Entry<Integer, PriorityQueue<ParseConfiguration>> heapEntry = heaps.pollFirstEntry(); PriorityQueue<ParseConfiguration> currentHeap = heapEntry.getValue(); int currentHeapIndex = heapEntry.getKey(); if (LOG.isTraceEnabled()) { LOG.trace("##### Polling next heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); } boolean finished = false; // systematically set the final heap here, just in case we exit "naturally" with no more heaps finalHeap = heapEntry.getValue(); backupHeap = new PriorityQueue<ParseConfiguration>(); // we jump out when either (a) all tokens have been attached or (b) we go over the max alloted time ParseConfiguration topConf = currentHeap.peek(); if (topConf.isTerminal()) { LOG.trace("Exiting with terminal heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); finished = true; } if (earlyStop && terminalHeap.size() >= beamWidth) { LOG.debug( "Early stop activated and terminal heap contains " + beamWidth + " entries. Exiting."); finalHeap = terminalHeap; finished = true; } long analysisTime = (new Date()).getTime() - startTime; if (maxAnalysisTimePerSentence > 0 && analysisTime > maxAnalysisTimeMilliseconds) { LOG.info("Parse tree analysis took too long for sentence: " + tokenSequence.getText()); LOG.info("Breaking out after " + maxAnalysisTimePerSentence + " seconds."); finished = true; } if (minFreeMemory > 0) { long freeMemory = Runtime.getRuntime().freeMemory(); if (freeMemory < minFreeMemoryBytes) { LOG.info("Not enough memory left to parse sentence: " + tokenSequence.getText()); LOG.info("Min free memory (bytes):" + minFreeMemoryBytes); LOG.info("Current free memory (bytes): " + freeMemory); finished = true; } } if (finished) { break; } // limit the breadth to K int maxSequences = currentHeap.size() > this.beamWidth ? this.beamWidth : currentHeap.size(); int j = 0; while (currentHeap.size() > 0) { ParseConfiguration history = currentHeap.poll(); if (LOG.isTraceEnabled()) { LOG.trace("### Next configuration on heap " + heapEntry.getKey() + ":"); LOG.trace(history.toString()); LOG.trace("Score: " + df.format(history.getScore())); LOG.trace(history.getPosTagSequence()); } List<Decision<Transition>> decisions = new ArrayList<Decision<Transition>>(); // test the positive rules on the current configuration boolean ruleApplied = false; if (parserPositiveRules != null) { MONITOR.startTask("check rules"); try { for (ParserRule rule : parserPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<Transition> positiveRuleDecision = TalismaneSession .getTransitionSystem().createDefaultDecision(rule.getTransition()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting transition to: " + rule.getTransition().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current configuration List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("feature analyse"); try { for (ParseConfigurationFeature<?> feature : this.parseFeatures) { MONITOR.startTask(feature.getName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(history, env); if (featureResult != null) parseFeatureResults.add(featureResult); } finally { MONITOR.endTask(feature.getName()); } } if (LOG_FEATURES.isTraceEnabled()) { for (FeatureResult<?> featureResult : parseFeatureResults) { LOG_FEATURES.trace(featureResult.toString()); } } } finally { MONITOR.endTask("feature analyse"); } // evaluate the feature results using the decision maker MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(parseFeatureResults); for (ClassificationObserver<Transition> observer : this.observers) { observer.onAnalyse(history, parseFeatureResults, decisions); } List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>( decisions.size()); for (Decision<Transition> decision : decisions) { if (decision.getProbability() > MIN_PROB_TO_STORE) decisionShortList.add(decision); } decisions = decisionShortList; } finally { MONITOR.endTask("make decision"); } // apply the negative rules Set<Transition> eliminatedTransitions = new HashSet<Transition>(); if (parserNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (ParserRule rule : parserNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedTransitions.addAll(rule.getTransitions()); if (LOG.isTraceEnabled()) { for (Transition eliminatedTransition : rule.getTransitions()) LOG.trace("Rule applies. Eliminating transition: " + eliminatedTransition.getCode()); } } } if (eliminatedTransitions.size() > 0) { List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>(); for (Decision<Transition> decision : decisions) { if (!eliminatedTransitions.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } } // has a positive rule been applied? boolean transitionApplied = false; // add new configuration to the heap, one for each valid transition MONITOR.startTask("heap sort"); try { // Why apply all decisions here? Why not just the top N (where N = beamwidth)? // Answer: because we're not always adding solutions to the same heap // And yet: a decision here can only do one of two things: process a token (heap+1000), or add a non-processing transition (heap+1) // So, if we've already applied N decisions of each type, we should be able to stop for (Decision<Transition> decision : decisions) { Transition transition = decision.getOutcome(); if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + transition.getCode() + ", " + decision.getProbability()); if (transition.checkPreconditions(history)) { transitionApplied = true; ParseConfiguration configuration = this.parserServiceInternal .getConfiguration(history); if (decision.isStatistical()) configuration.addDecision(decision); transition.apply(configuration); int nextHeapIndex = parseComparisonStrategy.getComparisonIndex(configuration) * 1000; if (configuration.isTerminal()) { nextHeapIndex = Integer.MAX_VALUE; } else { while (nextHeapIndex <= currentHeapIndex) nextHeapIndex++; } PriorityQueue<ParseConfiguration> nextHeap = heaps.get(nextHeapIndex); if (nextHeap == null) { if (configuration.isTerminal()) nextHeap = terminalHeap; else nextHeap = new PriorityQueue<ParseConfiguration>(); heaps.put(nextHeapIndex, nextHeap); if (LOG.isTraceEnabled()) LOG.trace("Created heap with index: " + nextHeapIndex); } nextHeap.add(configuration); if (LOG.isTraceEnabled()) { LOG.trace("Added configuration with score " + configuration.getScore() + " to heap: " + nextHeapIndex + ", total size: " + nextHeap.size()); } configuration.clearMemory(); } else { if (LOG.isTraceEnabled()) LOG.trace("Cannot apply transition: doesn't meet pre-conditions"); // just in case the we run out of both heaps and analyses, we build this backup heap backupHeap.add(history); } // does transition meet pre-conditions? } // next transition } finally { MONITOR.endTask("heap sort"); } if (transitionApplied) { j++; } else { LOG.trace("No transitions could be applied: not counting this history as part of the beam"); } // beam width test if (j == maxSequences) break; } // next history } // next atomic index // return the best sequences on the heap List<ParseConfiguration> bestConfigurations = new ArrayList<ParseConfiguration>(); int i = 0; if (finalHeap.isEmpty()) finalHeap = backupHeap; while (!finalHeap.isEmpty()) { bestConfigurations.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } if (LOG.isDebugEnabled()) { for (ParseConfiguration finalConfiguration : bestConfigurations) { LOG.debug(df.format(finalConfiguration.getScore()) + ": " + finalConfiguration.toString()); LOG.debug("Pos tag sequence: " + finalConfiguration.getPosTagSequence()); LOG.debug("Transitions: " + finalConfiguration.getTransitions()); LOG.debug("Decisions: " + finalConfiguration.getDecisions()); if (LOG.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); for (Decision<Transition> decision : finalConfiguration.getDecisions()) { sb.append(" * "); sb.append(df.format(decision.getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getTransitions().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * PosTag sequence score "); sb.append(df.format(finalConfiguration.getPosTagSequence().getScore())); sb.append(" = "); for (PosTaggedToken posTaggedToken : finalConfiguration.getPosTagSequence()) { sb.append(" * "); sb.append(df.format(posTaggedToken.getDecision().getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getPosTagSequence().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * Token sequence score = "); sb.append(df.format(finalConfiguration.getPosTagSequence().getTokenSequence().getScore())); LOG.trace(sb.toString()); } } } return bestConfigurations; } finally { MONITOR.endTask("parseSentence"); } }
From source file:com.joliciel.talismane.parser.TransitionBasedGlobalLearningParser.java
public List<ParseConfiguration> parseSentence(List<PosTagSequence> posTagSequences, FeatureWeightVector weightVector, RankingSolution correctSolution) { MONITOR.startTask("parseSentence"); try {/*from w w w. jav a2 s .c o m*/ long startTime = (new Date()).getTime(); int maxAnalysisTimeMilliseconds = maxAnalysisTimePerSentence * 1000; int minFreeMemoryBytes = minFreeMemory * KILOBYTE; TokenSequence tokenSequence = posTagSequences.get(0).getTokenSequence(); TreeMap<Integer, TreeSet<ParseConfiguration>> heaps = new TreeMap<Integer, TreeSet<ParseConfiguration>>(); TreeSet<ParseConfiguration> heap0 = new TreeSet<ParseConfiguration>(); for (PosTagSequence posTagSequence : posTagSequences) { // add an initial ParseConfiguration for each postag sequence ParseConfiguration initialConfiguration = this.getParserServiceInternal() .getInitialConfiguration(posTagSequence); initialConfiguration.setScoringStrategy(new SimpleRankingScoringStrategy()); initialConfiguration.setRankingScore(0.0); heap0.add(initialConfiguration); if (LOG.isDebugEnabled()) { LOG.debug("Adding initial posTagSequence: " + posTagSequence); } } heaps.put(0, heap0); TreeSet<ParseConfiguration> backupHeap = null; TreeSet<ParseConfiguration> finalHeap = null; while (heaps.size() > 0) { Entry<Integer, TreeSet<ParseConfiguration>> heapEntry = heaps.firstEntry(); TreeSet<ParseConfiguration> currentHeap = heapEntry.getValue(); int currentHeapIndex = heapEntry.getKey(); if (LOG.isTraceEnabled()) { LOG.trace("##### Polling next heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); } boolean finished = false; // systematically set the final heap here, just in case we exit "naturally" with no more heaps finalHeap = heapEntry.getValue(); backupHeap = new TreeSet<ParseConfiguration>(); // we jump out when either (a) all tokens have been attached or (b) we go over the max alloted time ParseConfiguration topConf = currentHeap.first(); if (topConf.isTerminal()) { LOG.trace("Exiting with terminal heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); finished = true; } // check if we've gone over alloted time for this sentence long analysisTime = (new Date()).getTime() - startTime; if (maxAnalysisTimePerSentence > 0 && analysisTime > maxAnalysisTimeMilliseconds) { LOG.info("Parse tree analysis took too long for sentence: " + tokenSequence.getText()); LOG.info("Breaking out after " + maxAnalysisTimePerSentence + " seconds."); finished = true; } // check if we've enough memory to process this sentence if (minFreeMemory > 0) { long freeMemory = Runtime.getRuntime().freeMemory(); if (freeMemory < minFreeMemoryBytes) { LOG.info("Not enough memory left to parse sentence: " + tokenSequence.getText()); LOG.info("Min free memory (bytes):" + minFreeMemoryBytes); LOG.info("Current free memory (bytes): " + freeMemory); finished = true; } } // check if any of the remaining top-N solutions on any heap can lead to the correct solution if (correctSolution != null) { boolean canReachCorrectSolution = false; for (TreeSet<ParseConfiguration> heap : heaps.values()) { int j = 1; for (ParseConfiguration solution : heap) { if (j > beamWidth) break; if (solution.canReach(correctSolution)) { canReachCorrectSolution = true; break; } j++; } if (canReachCorrectSolution) break; } if (!canReachCorrectSolution) { LOG.debug("None of the solutions on the heap can reach the gold solution. Exiting."); finished = true; } } if (finished) { // combine any remaining heaps for (TreeSet<ParseConfiguration> heap : heaps.values()) { if (finalHeap != heap) { finalHeap.addAll(heap); } } break; } // remove heap from set of heaps heapEntry = heaps.pollFirstEntry(); // limit the breadth to K int maxSolutions = currentHeap.size() > this.beamWidth ? this.beamWidth : currentHeap.size(); int j = 0; while (currentHeap.size() > 0) { ParseConfiguration history = currentHeap.pollFirst(); backupHeap.add(history); if (LOG.isTraceEnabled()) { LOG.trace("### Next configuration on heap " + heapEntry.getKey() + ":"); LOG.trace(history.toString()); LOG.trace("Score: " + df.format(history.getScore())); LOG.trace(history.getPosTagSequence()); } Set<Transition> transitions = new HashSet<Transition>(); // test the positive rules on the current configuration boolean ruleApplied = false; if (parserPositiveRules != null) { MONITOR.startTask("check rules"); try { for (ParserRule rule : parserPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { transitions.add(rule.getTransition()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting transition to: " + rule.getTransition().getCode()); } if (!rule.getTransition().checkPreconditions(history)) { LOG.error("Cannot apply rule, preconditions not met."); ruleApplied = false; } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { transitions = parsingConstrainer.getPossibleTransitions(history); Set<Transition> eliminatedTransitions = new HashSet<Transition>(); for (Transition transition : transitions) { if (!transition.checkPreconditions(history)) { eliminatedTransitions.add(transition); } } transitions.removeAll(eliminatedTransitions); // apply the negative rules eliminatedTransitions = new HashSet<Transition>(); if (parserNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (ParserRule rule : parserNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedTransitions.add(rule.getTransition()); if (LOG.isTraceEnabled()) { LOG.debug("Rule applies. Eliminating transition: " + rule.getTransition().getCode()); } } } if (eliminatedTransitions.size() == transitions.size()) { LOG.debug("All transitions eliminated! Restoring original transitions."); } else { transitions.removeAll(eliminatedTransitions); } } finally { MONITOR.endTask("check negative rules"); } } } // has a positive rule been applied? if (transitions.size() == 0) { // just in case the we run out of both heaps and analyses, we build this backup heap backupHeap.add(history); if (LOG.isTraceEnabled()) LOG.trace( "No transitions could be applied: not counting this solution as part of the beam"); } else { // up the counter, since we will count this solution towards the heap j++; // add solutions to the heap, one per valid transition MONITOR.startTask("heap sort"); try { Map<Transition, Double> deltaScorePerTransition = new HashMap<Transition, Double>(); double absoluteMax = 1; for (Transition transition : transitions) { if (LOG.isTraceEnabled()) { LOG.trace("Applying transition: " + transition.getCode()); } ParseConfiguration configuration = this.parserServiceInternal .getConfiguration(history); transition.apply(configuration); configuration.setRankingScore(history.getRankingScore()); configuration.getIncrementalFeatureResults() .addAll(history.getIncrementalFeatureResults()); // test the features on the new configuration double scoreDelta = 0.0; MONITOR.startTask("feature analyse"); List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>(); try { for (ParseConfigurationFeature<?> feature : this.parseFeatures) { MONITOR.startTask(feature.getName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(configuration, env); if (featureResult != null) { featureResults.add(featureResult); double weight = weightVector.getWeight(featureResult); scoreDelta += weight; if (LOG.isTraceEnabled()) { LOG.trace(featureResult.toString() + " = " + weight); } } } finally { MONITOR.endTask(feature.getName()); } } configuration.getIncrementalFeatureResults().add(featureResults); if (LOG.isTraceEnabled()) { LOG.trace("Score = " + configuration.getRankingScore() + " + " + scoreDelta + " = " + (configuration.getRankingScore() + scoreDelta)); } configuration.setRankingScore(configuration.getRankingScore() + scoreDelta); deltaScorePerTransition.put(transition, scoreDelta); if (Math.abs(scoreDelta) > absoluteMax) absoluteMax = Math.abs(scoreDelta); } finally { MONITOR.endTask("feature analyse"); } int nextHeapIndex = parseComparisonStrategy.getComparisonIndex(configuration) * 1000; while (nextHeapIndex <= currentHeapIndex) nextHeapIndex++; TreeSet<ParseConfiguration> nextHeap = heaps.get(nextHeapIndex); if (nextHeap == null) { nextHeap = new TreeSet<ParseConfiguration>(); heaps.put(nextHeapIndex, nextHeap); if (LOG.isTraceEnabled()) LOG.trace("Created heap with index: " + nextHeapIndex); } nextHeap.add(configuration); if (LOG.isTraceEnabled()) { LOG.trace("Added configuration with score " + configuration.getScore() + " to heap: " + nextHeapIndex + ", total size: " + nextHeap.size()); } configuration.clearMemory(); } // next transition // Create a probability distribution of transitions // normalise probabilities for each transition via normalised exponential // e^(x/absmax)/sum(e^(x/absmax)) // where x/absmax is in [-1,1] // e^(x/absmax) is in [1/e,e] double total = 0.0; for (Transition transition : deltaScorePerTransition.keySet()) { double deltaScore = deltaScorePerTransition.get(transition); deltaScore = Math.exp(deltaScore / absoluteMax); deltaScorePerTransition.put(transition, deltaScore); total += deltaScore; } for (Transition transition : deltaScorePerTransition.keySet()) { double probability = deltaScorePerTransition.get(transition); probability /= total; Decision<Transition> decision = machineLearningService.createDecision(transition, probability); transition.setDecision(decision); if (LOG.isTraceEnabled()) { LOG.trace("Transition: " + transition.getCode() + ", Prob: " + probability); } } } finally { MONITOR.endTask("heap sort"); } } // have we any transitions? // beam width test if (j == maxSolutions) break; } // next history } // next atomic index // return the best sequences on the heap List<ParseConfiguration> bestConfigurations = new ArrayList<ParseConfiguration>(); int i = 0; if (finalHeap.isEmpty()) finalHeap = backupHeap; while (!finalHeap.isEmpty()) { bestConfigurations.add(finalHeap.pollFirst()); i++; if (i >= this.getBeamWidth()) break; } if (LOG.isDebugEnabled()) { if (correctSolution != null) { LOG.debug("Gold transitions: " + correctSolution.getIncrementalOutcomes()); } for (ParseConfiguration finalConfiguration : bestConfigurations) { LOG.debug(df.format(finalConfiguration.getScore()) + ": " + finalConfiguration.toString()); LOG.debug("Pos tag sequence: " + finalConfiguration.getPosTagSequence()); LOG.debug("Transitions: " + finalConfiguration.getTransitions()); if (LOG.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append(" * PosTag sequence score "); sb.append(df.format(finalConfiguration.getPosTagSequence().getScore())); sb.append(" = "); for (PosTaggedToken posTaggedToken : finalConfiguration.getPosTagSequence()) { sb.append(" * "); sb.append(df.format(posTaggedToken.getDecision().getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getPosTagSequence().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * Token sequence score = "); sb.append(df.format(finalConfiguration.getPosTagSequence().getTokenSequence().getScore())); LOG.trace(sb.toString()); } } } return bestConfigurations; } finally { MONITOR.endTask("parseSentence"); } }