List of usage examples for java.util SortedMap entrySet
Set<Map.Entry<K, V>> entrySet();
From source file:org.ambraproject.service.user.UserServiceImpl.java
@Override @SuppressWarnings("unchecked") public List<UserAlert> getAvailableAlerts() { List<UserAlert> alerts = new ArrayList<UserAlert>(); final SortedMap<Integer, Pair> categoryNames = new ConcurrentSkipListMap<Integer, Pair>(); HierarchicalConfiguration hc = (HierarchicalConfiguration) configuration; List<HierarchicalConfiguration> categories = hc.configurationsAt(ALERTS_CATEGORIES_CATEGORY); for (HierarchicalConfiguration c : categories) { String key = c.getString("[@key]"); int order = c.getInt("[@displayOrder]", categoryNames.size()); String value = c.getString(""); categoryNames.put(order, new Pair<String, String>(key, value)); }/*from ww w . ja va 2 s . c o m*/ final String[] weeklyCategories = hc.getStringArray(ALERTS_WEEKLY); final String[] monthlyCategories = hc.getStringArray(ALERTS_MONTHLY); final String[] subjectFilters = hc.getStringArray(SUBJECT_FILTER); final Set<Map.Entry<Integer, Pair>> categoryNamesSet = categoryNames.entrySet(); for (final Map.Entry<Integer, Pair> category : categoryNamesSet) { final String key = (String) category.getValue().getFirst(); boolean weeklyCategoryKey = false; boolean monthlyCategoryKey = false; boolean subjectFilter = false; if (ArrayUtils.contains(weeklyCategories, key)) { weeklyCategoryKey = true; } if (ArrayUtils.contains(monthlyCategories, key)) { monthlyCategoryKey = true; } if (ArrayUtils.contains(subjectFilters, key)) { subjectFilter = true; } alerts.add( new UserAlert((String) category.getValue().getFirst(), (String) category.getValue().getSecond(), weeklyCategoryKey, monthlyCategoryKey, subjectFilter)); } return alerts; }
From source file:com.github.pffy.chinese.freq.ChineseFrequency.java
private void analyze() { int inputCount = 0; int removedCount = 0; int hanziCount = 0; int uniqueHanziCount = 0; int processedCount = 0; int freq = 0; String csvOutput = this.HEADER_ROW_CSV; String tsvOutput = this.HEADER_ROW_TSV; String txtOutput = this.HEADER_ROW_TXT; String csv, tsv, txt;/* ww w. ja va 2s . c o m*/ String str, input, pinyin, hanzi; Scanner sc; List<String> hanziList; Map<String, Integer> freqMap; JSONObject hpdx; String[] arr; Set<String> unmappedCharacters; hpdx = this.hpdx; input = this.input; inputCount = input.length(); input = retainHanzi(input); removedCount = inputCount - input.length(); hanziCount = input.length(); sc = new Scanner(input); sc.useDelimiter(""); hanziList = new ArrayList<String>(); freqMap = new HashMap<String, Integer>(); // counts occurrences while (sc.hasNext()) { str = sc.next(); hanziList.add(str); if (freqMap.containsKey(str)) { freqMap.put(str, (Integer) freqMap.get(str).intValue() + 1); } else { freqMap.put(str, 1); } } // done with Scanner sc.close(); uniqueHanziCount = freqMap.keySet().size(); SortedMap<String, String> freqTreeMap = new TreeMap<String, String>(Collections.reverseOrder()); unmappedCharacters = new HashSet<String>(); for (Entry<String, Integer> counts : freqMap.entrySet()) { try { hanzi = counts.getKey(); pinyin = hpdx.getString(hanzi); } catch (JSONException je) { // add this unmapped character to the list unmappedCharacters.add(counts.getKey()); // not idx mapped yet. that's ok. move on. continue; } if (pinyin.isEmpty()) { // if character is unmapped in idx, do not process. continue; } freq = counts.getValue(); freqTreeMap.put(String.format("%" + this.PADSIZE_FREQ + "s", freq).replace(' ', '0') + "-" + hanzi + "-" + pinyin, hanzi + "," + pinyin + "," + freq); processedCount++; } // outputs for (Entry<String, String> outputs : freqTreeMap.entrySet()) { csv = this.CRLF + outputs.getValue(); csvOutput += csv; tsv = csv.replaceAll(",", "\t"); tsvOutput += tsv; arr = csv.split(","); // arr[0] is hanzi. arr[1] is pinyin. arr[2] is freq. txt = padSummary(arr[0] + " [" + arr[1] + "]", this.PADSIZE_SUMMARY + 1) + arr[2]; txtOutput += txt; } // cleanup csvOutput = csvOutput.trim(); tsvOutput = tsvOutput.trim(); txtOutput = txtOutput.trim(); // post-process this.csvOutput = csvOutput; this.tsvOutput = tsvOutput; this.txtOutput = txtOutput; // counts this.inputCount = inputCount; this.removedCount = removedCount; this.hanziCount = hanziCount; this.uniqueHanziCount = uniqueHanziCount; this.processedCount = processedCount; this.unmappedCharacters = unmappedCharacters; // summary String summaryString = ""; summaryString += padSummary(this.MSG_TOTAL_COUNT, this.PADSIZE_SUMMARY) + inputCount; summaryString += this.CRLF + padSummary(this.MSG_REMOVED_COUNT, this.PADSIZE_SUMMARY) + removedCount; summaryString += this.CRLF + padSummary(this.MSG_HANZI_COUNT, this.PADSIZE_SUMMARY) + hanziCount; summaryString += this.CRLF + padSummary(this.MSG_UNIQUE_COUNT, this.PADSIZE_SUMMARY) + uniqueHanziCount; summaryString += this.CRLF + padSummary(this.MSG_PROCESSED_COUNT, this.PADSIZE_SUMMARY) + processedCount; this.summary = summaryString; }
From source file:ml.shifu.shifu.core.processor.StatsModelProcessor.java
/** * Compute correlation value according to correlation statistics from correlation MR job. * /*from w ww. ja v a 2 s.c o m*/ * @param corrMap * CorrelationWritable map read from MR job output file * @throws IOException * any IOException to write correlation value to csv file. */ private void computeCorrValue(SortedMap<Integer, CorrelationWritable> corrMap) throws IOException { boolean hasCandidates = CommonUtils.hasCandidateColumns(this.columnConfigList); String localCorrelationCsv = super.pathFinder.getLocalCorrelationCsvPath(); ShifuFileUtils.createFileIfNotExists(localCorrelationCsv, SourceType.LOCAL); BufferedWriter writer = null; Map<Integer, double[]> finalCorrMap = new HashMap<>(); try { writer = ShifuFileUtils.getWriter(localCorrelationCsv, SourceType.LOCAL); writer.write(getColumnIndexes()); writer.newLine(); writer.write(getColumnNames()); writer.newLine(); for (Entry<Integer, CorrelationWritable> entry : corrMap.entrySet()) { ColumnConfig xColumnConfig = this.columnConfigList.get(entry.getKey()); if (xColumnConfig.getColumnFlag() == ColumnFlag.Meta || (hasCandidates && !ColumnFlag.Candidate.equals(xColumnConfig.getColumnFlag()))) { continue; } CorrelationWritable xCw = entry.getValue(); double[] corrArray = new double[this.columnConfigList.size()]; for (int i = 0; i < corrArray.length; i++) { ColumnConfig yColumnConfig = this.columnConfigList.get(i); if (yColumnConfig.getColumnFlag() == ColumnFlag.Meta) { continue; } if (entry.getKey() > i) { double[] reverseDoubleArray = finalCorrMap.get(i); if (reverseDoubleArray != null) { corrArray[i] = reverseDoubleArray[entry.getKey()]; } else { corrArray[i] = 0d; } // not compute all, only up-right matrix are computed, such case, just get [i, j] from [j, i] continue; } double numerator = xCw.getAdjustCount()[i] * xCw.getXySum()[i] - xCw.getAdjustSumX()[i] * xCw.getAdjustSumY()[i]; double denominator1 = Math.sqrt(xCw.getAdjustCount()[i] * xCw.getXxSum()[i] - xCw.getAdjustSumX()[i] * xCw.getAdjustSumX()[i]); double denominator2 = Math.sqrt(xCw.getAdjustCount()[i] * xCw.getYySum()[i] - xCw.getAdjustSumY()[i] * xCw.getAdjustSumY()[i]); if (Double.compare(denominator1, Double.valueOf(0d)) == 0 || Double.compare(denominator2, Double.valueOf(0d)) == 0) { corrArray[i] = 0d; } else { corrArray[i] = numerator / (denominator1 * denominator2); } // if(corrArray[i] > 1.0005d || (entry.getKey() == 54 && i == 2124)) { if (corrArray[i] > 1.0005d) { log.warn("Correlation value for columns {} {} > 1, below is debug info.", entry.getKey(), i); log.warn( "DEBUG: corr {}, value > 1d, numerator " + numerator + " denominator1 " + denominator1 + " denominator2 " + denominator2 + " {}, {}", numerator / (denominator1 * denominator2), entry.getKey(), i); log.warn( "DEBUG: xCw.getAdjustCount()[i] * xCw.getXySum()[i] - xCw.getAdjustSumX()[i] * xCw.getAdjustSumY()[i] : {} * {} - {} * {} ", xCw.getAdjustCount()[i], xCw.getXySum()[i], xCw.getAdjustSumX()[i], xCw.getAdjustSumY()[i]); log.warn( "DEBUG: xCw.getAdjustCount()[i] * xCw.getXxSum()[i] - xCw.getAdjustSumX()[i] * xCw.getAdjustSumX()[i] : {} * {} - {} * {} ", xCw.getAdjustCount()[i], xCw.getXxSum()[i], xCw.getAdjustSumX()[i], xCw.getAdjustSumX()[i]); log.warn( "DEBUG: xCw.getAdjustCount()[i] * xCw.getYySum()[i] - xCw.getAdjustSumY()[i] * xCw.getAdjustSumY()[i] : {} * {} - {} * {} ", xCw.getAdjustCount()[i], xCw.getYySum()[i], xCw.getAdjustSumY()[i], xCw.getAdjustSumY()[i]); } } // put to current map finalCorrMap.put(entry.getKey(), corrArray); // write to csv String corrStr = Arrays.toString(corrArray); String adjustCorrStr = corrStr.substring(1, corrStr.length() - 1); writer.write(entry.getKey() + "," + this.columnConfigList.get(entry.getKey()).getColumnName() + "," + adjustCorrStr); writer.newLine(); } } finally { IOUtils.closeQuietly(writer); } }
From source file:com.oneops.metrics.es.ElasticsearchReporter.java
@Override public void report(SortedMap<String, Gauge> gauges, SortedMap<String, Counter> counters, SortedMap<String, Histogram> histograms, SortedMap<String, Meter> meters, SortedMap<String, Timer> timers) { if (!checkedForIndexTemplate) { checkForIndexTemplate();/*from www .j a v a2s .c o m*/ } final long timestamp = clock.getTime() / 1000; currentIndexName = index; if (indexDateFormat != null) { currentIndexName += "-" + indexDateFormat.format(new Date(timestamp * 1000)); } try { HttpURLConnection connection = openConnection("/_bulk", "POST"); if (connection == null) { LOGGER.error("Could not connect to any configured elasticsearch instances: {}", Arrays.asList(hosts)); return; } List<JsonMetric> percolationMetrics = new ArrayList<JsonMetric>(); AtomicInteger entriesWritten = new AtomicInteger(0); for (Map.Entry<String, Gauge> entry : gauges.entrySet()) { if (entry.getValue().getValue() != null) { JsonMetric jsonMetric = new JsonGauge(name(prefix, entry.getKey()), timestamp, entry.getValue()); connection = writeJsonMetricAndRecreateConnectionIfNeeded(jsonMetric, connection, entriesWritten); addJsonMetricToPercolationIfMatching(jsonMetric, percolationMetrics); } } for (Map.Entry<String, Counter> entry : counters.entrySet()) { JsonCounter jsonMetric = new JsonCounter(name(prefix, entry.getKey()), timestamp, entry.getValue()); connection = writeJsonMetricAndRecreateConnectionIfNeeded(jsonMetric, connection, entriesWritten); addJsonMetricToPercolationIfMatching(jsonMetric, percolationMetrics); } for (Map.Entry<String, Histogram> entry : histograms.entrySet()) { JsonHistogram jsonMetric = new JsonHistogram(name(prefix, entry.getKey()), timestamp, entry.getValue()); connection = writeJsonMetricAndRecreateConnectionIfNeeded(jsonMetric, connection, entriesWritten); addJsonMetricToPercolationIfMatching(jsonMetric, percolationMetrics); } for (Map.Entry<String, Meter> entry : meters.entrySet()) { JsonMeter jsonMetric = new JsonMeter(name(prefix, entry.getKey()), timestamp, entry.getValue()); connection = writeJsonMetricAndRecreateConnectionIfNeeded(jsonMetric, connection, entriesWritten); addJsonMetricToPercolationIfMatching(jsonMetric, percolationMetrics); } for (Map.Entry<String, Timer> entry : timers.entrySet()) { JsonTimer jsonMetric = new JsonTimer(name(prefix, entry.getKey()), timestamp, entry.getValue()); connection = writeJsonMetricAndRecreateConnectionIfNeeded(jsonMetric, connection, entriesWritten); addJsonMetricToPercolationIfMatching(jsonMetric, percolationMetrics); } closeConnection(connection); // execute the notifier impl, in case percolation found matches if (percolationMetrics.size() > 0 && notifier != null) { for (JsonMetric jsonMetric : percolationMetrics) { List<String> matches = getPercolationMatches(jsonMetric); for (String match : matches) { notifier.notify(jsonMetric, match); } } } // catch the exception to make sure we do not interrupt the live application } catch (IOException e) { LOGGER.error("Couldnt report to elasticsearch server", e); } }
From source file:org.jets3t.service.utils.RestUtils.java
/** * Calculate the canonical string for a REST/HTTP request to a storage service. * * When expires is non-null, it will be used instead of the Date header. * @throws UnsupportedEncodingException/*from w ww.ja v a 2 s. c om*/ */ public static String makeServiceCanonicalString(String method, String resource, Map<String, Object> headersMap, String expires, String headerPrefix, List<String> serviceResourceParameterNames) throws UnsupportedEncodingException { StringBuilder canonicalStringBuf = new StringBuilder(); canonicalStringBuf.append(method).append("\n"); // Add all interesting headers to a list, then sort them. "Interesting" // is defined as Content-MD5, Content-Type, Date, and x-amz- SortedMap<String, Object> interestingHeaders = new TreeMap<String, Object>(); if (headersMap != null && headersMap.size() > 0) { for (Map.Entry<String, Object> entry : headersMap.entrySet()) { Object key = entry.getKey(); Object value = entry.getValue(); if (key == null) { continue; } String lk = key.toString().toLowerCase(Locale.getDefault()); // Ignore any headers that are not particularly interesting. if (lk.equals("content-type") || lk.equals("content-md5") || lk.equals("date") || lk.startsWith(headerPrefix)) { interestingHeaders.put(lk, value); } } } // Remove default date timestamp if "x-amz-date" or "x-goog-date" is set. if (interestingHeaders.containsKey(Constants.REST_METADATA_ALTERNATE_DATE_AMZ) || interestingHeaders.containsKey(Constants.REST_METADATA_ALTERNATE_DATE_GOOG)) { interestingHeaders.put("date", ""); } // Use the expires value as the timestamp if it is available. This trumps both the default // "date" timestamp, and the "x-amz-date" header. if (expires != null) { interestingHeaders.put("date", expires); } // these headers require that we still put a new line in after them, // even if they don't exist. if (!interestingHeaders.containsKey("content-type")) { interestingHeaders.put("content-type", ""); } if (!interestingHeaders.containsKey("content-md5")) { interestingHeaders.put("content-md5", ""); } // Finally, add all the interesting headers (i.e.: all that start with x-amz- ;-)) for (Map.Entry<String, Object> entry : interestingHeaders.entrySet()) { String key = entry.getKey(); Object value = entry.getValue(); if (key.startsWith(headerPrefix)) { canonicalStringBuf.append(key).append(':').append(value); } else { canonicalStringBuf.append(value); } canonicalStringBuf.append("\n"); } // don't include the query parameters... int queryIndex = resource.indexOf('?'); if (queryIndex == -1) { canonicalStringBuf.append(resource); } else { canonicalStringBuf.append(resource.substring(0, queryIndex)); } // ...unless the parameter(s) are in the set of special params // that actually identify a service resource. if (queryIndex >= 0) { SortedMap<String, String> sortedResourceParams = new TreeMap<String, String>(); // Parse parameters from resource string String query = resource.substring(queryIndex + 1); for (String paramPair : query.split("&")) { String[] paramNameValue = paramPair.split("="); String name = URLDecoder.decode(paramNameValue[0], "UTF-8"); String value = null; if (paramNameValue.length > 1) { value = URLDecoder.decode(paramNameValue[1], "UTF-8"); } // Only include parameter (and its value if present) in canonical // string if it is a resource-identifying parameter if (serviceResourceParameterNames.contains(name)) { sortedResourceParams.put(name, value); } } // Add resource parameters if (sortedResourceParams.size() > 0) { canonicalStringBuf.append("?"); } boolean addedParam = false; for (Map.Entry<String, String> entry : sortedResourceParams.entrySet()) { if (addedParam) { canonicalStringBuf.append("&"); } canonicalStringBuf.append(entry.getKey()); if (entry.getValue() != null) { canonicalStringBuf.append("=").append(entry.getValue()); } addedParam = true; } } return canonicalStringBuf.toString(); }
From source file:tajo.master.GlobalPlanner.java
@VisibleForTesting public static Map<String, Map<ScanNode, List<URI>>> hashFetches(Map<ScanNode, List<URI>> uriMap) { SortedMap<String, Map<ScanNode, List<URI>>> hashed = new TreeMap<String, Map<ScanNode, List<URI>>>(); String uriPath, key;//from www.ja va 2s. c o m Map<ScanNode, List<URI>> m = null; List<URI> uriList = null; for (Entry<ScanNode, List<URI>> e : uriMap.entrySet()) { for (URI uri : e.getValue()) { uriPath = uri.toString(); key = uriPath.substring(uriPath.lastIndexOf("=") + 1); if (hashed.containsKey(key)) { m = hashed.get(key); } else { m = new HashMap<ScanNode, List<URI>>(); } if (m.containsKey(e.getKey())) { uriList = m.get(e.getKey()); } else { uriList = new ArrayList<URI>(); } uriList.add(uri); m.put(e.getKey(), uriList); hashed.put(key, m); } } SortedMap<String, Map<ScanNode, List<URI>>> finalHashed = new TreeMap<String, Map<ScanNode, List<URI>>>(); for (Entry<String, Map<ScanNode, List<URI>>> entry : hashed.entrySet()) { finalHashed.put(entry.getKey(), combineURIByHostForBinary(entry.getValue())); } return finalHashed; }
From source file:se.mithlond.services.shared.test.entity.PlainJaxbContextRule.java
/** * Acquires a JAXB Schema from the provided JAXBContext. * * @param ctx The context for which am XSD should be constructed. * @return A tuple holding the constructed XSD from the provided JAXBContext, and * the LSResourceResolver synthesized during the way. * @throws NullPointerException if ctx was {@code null}. * @throws IllegalArgumentException if a JAXB-related exception occurred while extracting the schema. *///from www.j a v a 2 s . co m public static Tuple<Schema, LSResourceResolver> generateTransientXSD(final JAXBContext ctx) throws NullPointerException, IllegalArgumentException { // Check sanity org.apache.commons.lang3.Validate.notNull(ctx, "Cannot handle null ctx argument."); final SortedMap<String, ByteArrayOutputStream> namespace2SchemaMap = new TreeMap<>(); try { ctx.generateSchema(new SchemaOutputResolver() { /** * {@inheritDoc} */ @Override public Result createOutput(final String namespaceUri, final String suggestedFileName) throws IOException { // The types should really be annotated with @XmlType(namespace = "... something ...") // to avoid using the default ("") namespace. if (namespaceUri.isEmpty()) { log.warn("Received empty namespaceUri while resolving a generated schema. " + "Did you forget to add a @XmlType(namespace = \"... something ...\") annotation " + "to your class?"); } // Create the result ByteArrayOutputStream final ByteArrayOutputStream out = new ByteArrayOutputStream(); final StreamResult toReturn = new StreamResult(out); toReturn.setSystemId(""); // Map the namespaceUri to the schemaResult. namespace2SchemaMap.put(namespaceUri, out); // All done. return toReturn; } }); } catch (IOException e) { throw new IllegalArgumentException("Could not acquire Schema snippets.", e); } // Convert to an array of StreamSource. final MappedSchemaResourceResolver resourceResolver = new MappedSchemaResourceResolver(); final StreamSource[] schemaSources = new StreamSource[namespace2SchemaMap.size()]; int counter = 0; for (Map.Entry<String, ByteArrayOutputStream> current : namespace2SchemaMap.entrySet()) { final byte[] schemaSnippetAsBytes = current.getValue().toByteArray(); resourceResolver.addNamespace2SchemaEntry(current.getKey(), new String(schemaSnippetAsBytes)); if (log.isDebugEnabled()) { log.info("Generated schema [" + (counter + 1) + "/" + schemaSources.length + "]:\n " + new String(schemaSnippetAsBytes)); } // Copy the schema source to the schemaSources array. schemaSources[counter] = new StreamSource(new ByteArrayInputStream(schemaSnippetAsBytes), ""); // Increase the counter counter++; } try { // All done. final SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); schemaFactory.setResourceResolver(resourceResolver); final Schema transientSchema = schemaFactory.newSchema(schemaSources); // All done. return new Tuple<>(transientSchema, resourceResolver); } catch (final SAXException e) { throw new IllegalArgumentException("Could not create Schema from snippets.", e); } }
From source file:lisong_mechlab.view.graphs.DamageGraph.java
private TableXYDataset getSeries() { final Collection<Modifier> modifiers = loadout.getModifiers(); SortedMap<Weapon, List<Pair<Double, Double>>> data = new TreeMap<Weapon, List<Pair<Double, Double>>>( new Comparator<Weapon>() { @Override/* w w w. j av a 2 s . c o m*/ public int compare(Weapon aO1, Weapon aO2) { int comp = Double.compare(aO2.getRangeMax(modifiers), aO1.getRangeMax(modifiers)); if (comp == 0) return aO1.compareTo(aO2); return comp; } }); Double[] ranges = WeaponRanges.getRanges(loadout); for (double range : ranges) { Set<Entry<Weapon, Double>> damageDistributio = maxSustainedDPS.getWeaponRatios(range).entrySet(); for (Map.Entry<Weapon, Double> entry : damageDistributio) { final Weapon weapon = entry.getKey(); final double ratio = entry.getValue(); final double dps = weapon.getStat("d/s", modifiers); final double rangeEff = weapon.getRangeEffectivity(range, modifiers); if (!data.containsKey(weapon)) { data.put(weapon, new ArrayList<Pair<Double, Double>>()); } data.get(weapon).add(new Pair<Double, Double>(range, dps * ratio * rangeEff)); } } DefaultTableXYDataset dataset = new DefaultTableXYDataset(); for (Map.Entry<Weapon, List<Pair<Double, Double>>> entry : data.entrySet()) { XYSeries series = new XYSeries(entry.getKey().getName(), true, false); for (Pair<Double, Double> pair : entry.getValue()) { series.add(pair.first, pair.second); } dataset.addSeries(series); } return dataset; }
From source file:de.faustedition.tei.TeiValidator.java
@Override public void run() { try {//from ww w . j a v a 2 s . c om final SortedSet<FaustURI> xmlErrors = new TreeSet<FaustURI>(); final SortedMap<FaustURI, String> teiErrors = new TreeMap<FaustURI, String>(); for (FaustURI source : xml.iterate(new FaustURI(FaustAuthority.XML, "/transcript"))) { try { final List<String> errors = validate(source); if (!errors.isEmpty()) { teiErrors.put(source, Joiner.on("\n").join(errors)); } } catch (SAXException e) { logger.debug("XML error while validating transcript: " + source, e); xmlErrors.add(source); } catch (IOException e) { logger.warn("I/O error while validating transcript: " + source, e); } } if (xmlErrors.isEmpty() && teiErrors.isEmpty()) { return; } reporter.send("TEI validation report", new ReportCreator() { public void create(PrintWriter body) { if (!xmlErrors.isEmpty()) { body.println(Strings.padStart(" XML errors", 79, '=')); body.println(); body.println(Joiner.on("\n").join(xmlErrors)); body.println(); } if (!teiErrors.isEmpty()) { body.println(Strings.padStart(" TEI errors", 79, '=')); body.println(); for (Map.Entry<FaustURI, String> teiError : teiErrors.entrySet()) { body.println(Strings.padStart(" " + teiError.getKey(), 79, '-')); body.println(); body.println(teiError.getValue()); body.println(); body.println(); } } } }); } catch (EmailException e) { e.printStackTrace(); } }
From source file:org.languagetool.rules.spelling.suggestions.XGBoostSuggestionsOrderer.java
@Override public List<SuggestedReplacement> orderSuggestions(List<String> suggestions, String word, AnalyzedSentence sentence, int startPos) { if (!isMlAvailable()) { throw new IllegalStateException("Illegal call to orderSuggestions() - isMlAvailable() returned false."); }//from w w w . j ava 2s . c o m long featureStartTime = System.currentTimeMillis(); String langCode = language.getShortCodeWithCountryAndVariant(); Pair<List<SuggestedReplacement>, SortedMap<String, Float>> candidatesAndFeatures = computeFeatures( suggestions, word, sentence, startPos); //System.out.printf("Computing %d features took %d ms.%n", suggestions.size(), System.currentTimeMillis() - featureStartTime); List<SuggestedReplacement> candidates = candidatesAndFeatures.getLeft(); SortedMap<String, Float> matchFeatures = candidatesAndFeatures.getRight(); List<SortedMap<String, Float>> suggestionFeatures = candidates.stream() .map(SuggestedReplacement::getFeatures).collect(Collectors.toList()); if (candidates.isEmpty()) { return Collections.emptyList(); } if (candidates.size() != suggestionFeatures.size()) { throw new RuntimeException( String.format("Mismatch between candidates and corresponding feature list: length %d / %d", candidates.size(), suggestionFeatures.size())); } int numFeatures = matchFeatures.size() + topN * suggestionFeatures.get(0).size(); // padding with zeros float[] data = new float[numFeatures]; int featureIndex = 0; //System.out.printf("Features for match on '%s': %n", word); int expectedMatchFeatures = matchFeatureCount.getOrDefault(langCode, -1); int expectedCandidateFeatures = candidateFeatureCount.getOrDefault(langCode, -1); if (matchFeatures.size() != expectedMatchFeatures) { logger.warn(String.format("Match features '%s' do not have expected size %d.", matchFeatures, expectedMatchFeatures)); } for (Map.Entry<String, Float> feature : matchFeatures.entrySet()) { //System.out.printf("%s = %f%n", feature.getKey(), feature.getValue()); data[featureIndex++] = feature.getValue(); } //int suggestionIndex = 0; for (SortedMap<String, Float> candidateFeatures : suggestionFeatures) { if (candidateFeatures.size() != expectedCandidateFeatures) { logger.warn(String.format("Candidate features '%s' do not have expected size %d.", candidateFeatures, expectedCandidateFeatures)); } //System.out.printf("Features for candidate '%s': %n", candidates.get(suggestionIndex++).getReplacement()); for (Map.Entry<String, Float> feature : candidateFeatures.entrySet()) { //System.out.printf("%s = %f%n", feature.getKey(), feature.getValue()); data[featureIndex++] = feature.getValue(); } } List<Integer> labels = modelClasses.get(langCode); Booster model = null; try { long modelStartTime = System.currentTimeMillis(); model = modelPool.borrowObject(language); //System.out.printf("Loading model took %d ms.%n", System.currentTimeMillis() - modelStartTime); DMatrix matrix = new DMatrix(data, 1, numFeatures); long predictStartTime = System.currentTimeMillis(); float[][] output = model.predict(matrix); //System.out.printf("Prediction took %d ms.%n", System.currentTimeMillis() - predictStartTime); if (output.length != 1) { throw new XGBoostError(String.format( "XGBoost returned array with first dimension of length %d, expected 1.", output.length)); } float[] probabilities = output[0]; if (probabilities.length != labels.size()) { throw new XGBoostError( String.format("XGBoost returned array with second dimension of length %d, expected %d.", probabilities.length, labels.size())); } // TODO: could react to label -1 (not in list) by e.g. evaluating more candidates //if (labels.get(0) != -1) { // throw new IllegalStateException(String.format( // "Expected first label of ML ranking model to be -1 (= suggestion not in list), was %d", labels.get(0))); //} //float notInListProbabilily = probabilites[0]; for (int candidateIndex = 0; candidateIndex < candidates.size(); candidateIndex++) { int labelIndex = labels.indexOf(candidateIndex); float prob = 0.0f; if (labelIndex != -1) { prob = probabilities[labelIndex]; } candidates.get(candidateIndex).setConfidence(prob); } } catch (XGBoostError xgBoostError) { logger.error("Error while applying XGBoost model to spelling suggestions", xgBoostError); return candidates; } catch (Exception e) { logger.error("Error while loading XGBoost model for spelling suggestions", e); return candidates; } finally { if (model != null) { try { modelPool.returnObject(language, model); } catch (Exception e) { throw new RuntimeException(e); } } } candidates.sort(Collections.reverseOrder(Comparator.comparing(SuggestedReplacement::getConfidence))); return candidates; }