List of usage examples for java.util TreeSet add
public boolean add(E e)
From source file:de.tudarmstadt.ukp.dariah.IO.DARIAHWriter.java
private void convert(JCas aJCas, PrintWriter aOut) { int paragraphId = 0, sentenceId = 0, tokenId = 0; Map<Token, Collection<NamedEntity>> neCoveringMap = JCasUtil.indexCovering(aJCas, Token.class, NamedEntity.class); Map<Token, Collection<Chunk>> chunksCoveringMap = JCasUtil.indexCovering(aJCas, Token.class, Chunk.class); Map<Token, Collection<Section>> sectionCoveringMap = JCasUtil.indexCovering(aJCas, Token.class, Section.class); Map<Token, Collection<DirectSpeech>> directSpeechCoveringMap = JCasUtil.indexCovering(aJCas, Token.class, DirectSpeech.class); Map<Token, Collection<SemanticPredicate>> predIdx = JCasUtil.indexCovered(aJCas, Token.class, SemanticPredicate.class); Map<SemanticPredicate, Collection<Token>> pred2TokenIdx = JCasUtil.indexCovering(aJCas, SemanticPredicate.class, Token.class); Map<SemanticArgument, Collection<Token>> argIdx = JCasUtil.indexCovered(aJCas, SemanticArgument.class, Token.class); //Coreference Map<Token, Collection<CoreferenceLink>> corefLinksCoveringMap = JCasUtil.indexCovering(aJCas, Token.class, CoreferenceLink.class); HashMap<CoreferenceLink, CoreferenceChain> linkToChainMap = new HashMap<>(); HashMap<CoreferenceChain, Integer> corefChainToIntMap = new HashMap<>(); int corefChainId = 0; for (CoreferenceChain chain : JCasUtil.select(aJCas, CoreferenceChain.class)) { CoreferenceLink link = chain.getFirst(); int count = 0; while (link != null) { linkToChainMap.put(link, chain); link = link.getNext();/*from ww w .j a v a 2s . c om*/ count++; } if (count > 0) { corefChainToIntMap.put(chain, corefChainId); corefChainId++; } } HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>(); Collection<Paragraph> paragraphs = select(aJCas, Paragraph.class); Collection<Sentence> sentences = select(aJCas, Sentence.class); TreeSet<Integer> sentenceEnds = new TreeSet<>(); for (Sentence sentence : sentences) { sentenceEnds.add(sentence.getEnd()); } for (Paragraph paragraph : paragraphs) { sentenceEnds.add(paragraph.getEnd()); } for (Paragraph para : select(aJCas, Paragraph.class)) { for (Sentence sentence : selectCovered(Sentence.class, para)) { // Tokens List<Token> tokens = selectCovered(Token.class, sentence); // Check if we should try to include the morphology in output List<Morpheme> morphologies = selectCovered(Morpheme.class, sentence); boolean useMorphology = tokens.size() == morphologies.size(); // Check if we should try to include the morphology in output List<Hyphenation> hyphenations = selectCovered(Hyphenation.class, sentence); boolean useHyphenation = tokens.size() == hyphenations.size(); //Parsing information String[] parseFragments = null; List<ROOT> root = selectCovered(ROOT.class, sentence); if (root.size() == 1) { PennTreeNode rootNode = PennTreeUtils.convertPennTree(root.get(0)); if ("ROOT".equals(rootNode.getLabel())) { rootNode.setLabel("TOP"); } parseFragments = toPrettyPennTree(rootNode); } boolean useParseFragements = (parseFragments != null && parseFragments.length == tokens.size()); List<SemanticPredicate> preds = selectCovered(SemanticPredicate.class, sentence); for (int i = 0; i < tokens.size(); i++) { Row row = new Row(); row.paragraphId = paragraphId; row.sentenceId = sentenceId; row.tokenId = tokenId; row.token = tokens.get(i); row.args = new SemanticArgument[preds.size()]; if (useParseFragements) { row.parseFragment = parseFragments[i]; } if (useMorphology) { row.morphology = morphologies.get(i); } if (useHyphenation) { row.hyphenation = hyphenations.get(i); } // Section ID Collection<Section> section = sectionCoveringMap.get(row.token); if (section.size() > 0) row.sectionId = section.toArray(new Section[0])[0].getValue(); // Named entities Collection<NamedEntity> ne = neCoveringMap.get(row.token); if (ne.size() > 0) row.ne = ne.toArray(new NamedEntity[0])[0]; // Chunk Collection<Chunk> chunks = chunksCoveringMap.get(row.token); if (chunks.size() > 0) row.chunk = chunks.toArray(new Chunk[0])[0]; //Quote annotation Collection<DirectSpeech> ds = directSpeechCoveringMap.get(row.token); if (ds.size() > 0) row.directSpeech = ds.toArray(new DirectSpeech[0])[0]; //Coref Collection<CoreferenceLink> corefLinks = corefLinksCoveringMap.get(row.token); row.corefChains = UNUSED; if (corefLinks.size() > 0) { String[] chainIds = new String[corefLinks.size()]; // StringBuilder chainIdsStr = new StringBuilder(); int k = 0; for (CoreferenceLink link : corefLinks) { CoreferenceChain chain = linkToChainMap.get(link); int chainId = corefChainToIntMap.get(chain); //chainIds[k++] = chainId; String BIOMarker = "I"; if (link.getCoveredText().substring(0, row.token.getCoveredText().length()) .equals(row.token.getCoveredText())) { BIOMarker = "B"; } chainIds[k++] = BIOMarker + "-" + chainId; } //Sort without the BIO marker Arrays.sort(chainIds, new Comparator<String>() { public int compare(String idx1, String idx2) { Integer id1 = new Integer(idx1.substring(2)); Integer id2 = new Integer(idx2.substring(2)); return Integer.compare(id1, id2); } }); StringBuilder chainIdsStr = new StringBuilder(); for (String chainId : chainIds) { chainIdsStr.append(chainId + ","); } row.corefChains = chainIdsStr.substring(0, chainIdsStr.length() - 1); } //Predicate Collection<SemanticPredicate> predsForToken = predIdx.get(row.token); if (predsForToken != null && !predsForToken.isEmpty()) { row.pred = predsForToken.iterator().next(); } ctokens.put(row.token, row); tokenId++; } // Dependencies for (Dependency rel : selectCovered(Dependency.class, sentence)) { ctokens.get(rel.getDependent()).deprel = rel; } // Semantic arguments for (int p = 0; p < preds.size(); p++) { FSArray args = preds.get(p).getArguments(); //Set the column position info Collection<Token> tokensOfPredicate = pred2TokenIdx.get(preds.get(p)); for (Token t : tokensOfPredicate) { Row row = ctokens.get(t); row.semanticArgIndex = p; } //Set the arguments information for (SemanticArgument arg : select(args, SemanticArgument.class)) { for (Token t : argIdx.get(arg)) { Row row = ctokens.get(t); row.args[p] = arg; } } } sentenceId++; } paragraphId++; } // Write to output file int maxPredArguments = 0; for (Row row : ctokens.values()) { maxPredArguments = Math.max(maxPredArguments, row.args.length); } aOut.printf("%s\n", StringUtils.join(getHeader(maxPredArguments), "\t").trim()); for (Row row : ctokens.values()) { String[] output = getData(ctokens, maxPredArguments, row); aOut.printf("%s\n", StringUtils.join(output, "\t").trim()); } }
From source file:org.opendatakit.security.server.SecurityServiceUtil.java
/** * Get the complete set of granted authorities (ROLE and RUN_AS grants) this user possesses. * /*from w w w. j a v a 2s . co m*/ * @param cc * @return * @throws ODKDatastoreException */ public static TreeSet<GrantedAuthorityName> getCurrentUserSecurityInfo(CallingContext cc) throws ODKDatastoreException { User user = cc.getCurrentUser(); TreeSet<GrantedAuthorityName> authorities = new TreeSet<GrantedAuthorityName>(); if (user.isAnonymous()) { RoleHierarchy hierarchy = cc.getHierarchicalRoleRelationships(); Set<GrantedAuthority> badGrants = new TreeSet<GrantedAuthority>(); // The assigned groups are the specialGroup that this user defines // (i.e., anonymous or daemon) plus all directly-assigned assignable // permissions. GrantedAuthority specialAuth = new SimpleGrantedAuthority( GrantedAuthorityName.USER_IS_ANONYMOUS.name()); Collection<? extends GrantedAuthority> auths = hierarchy .getReachableGrantedAuthorities(Collections.singletonList(specialAuth)); for (GrantedAuthority auth : auths) { GrantedAuthorityName name = mapName(auth, badGrants); if (name != null && !GrantedAuthorityName.permissionsCanBeAssigned(auth.getAuthority())) { authorities.add(name); } } removeBadGrantedAuthorities(badGrants, cc); } else { RegisteredUsersTable t; t = RegisteredUsersTable.getUserByUri(user.getUriUser(), cc.getDatastore(), user); Datastore ds = cc.getDatastore(); RoleHierarchy hierarchy = (RoleHierarchy) cc.getHierarchicalRoleRelationships(); Set<GrantedAuthority> grants = UserGrantedAuthority.getGrantedAuthorities(user.getUriUser(), ds, user); Set<GrantedAuthority> badGrants = new TreeSet<GrantedAuthority>(); TreeSet<GrantedAuthorityName> groups = new TreeSet<GrantedAuthorityName>(); for (GrantedAuthority grant : grants) { GrantedAuthorityName name = mapName(grant, badGrants); if (name != null) { if (GrantedAuthorityName.permissionsCanBeAssigned(grant.getAuthority())) { groups.add(name); } else { authorities.add(name); } } } Collection<? extends GrantedAuthority> auths = hierarchy.getReachableGrantedAuthorities(grants); for (GrantedAuthority auth : auths) { GrantedAuthorityName name = mapName(auth, badGrants); if (name != null && !GrantedAuthorityName.permissionsCanBeAssigned(auth.getAuthority())) { authorities.add(name); } } removeBadGrantedAuthorities(badGrants, cc); } return authorities; }
From source file:net.sourceforge.sqlexplorer.sessiontree.model.utility.Dictionary.java
/** * Load dictionary data for catalog// ww w . j a va 2s. c o m * * @param node catalognode to load * @param monitor ProgressMonitor displayed whilst loading * @throws InterruptedException If user cancelled loading */ private void loadSchemaCatalog(INode iNode, IProgressMonitor monitor) throws InterruptedException { if (_logger.isDebugEnabled()) { _logger.debug("Loading dictionary: " + iNode.getName()); } // check for cancellation by user if (monitor.isCanceled()) { throw new InterruptedException(Messages.getString("Progress.Dictionary.Cancelled")); } putCatalogSchemaName(iNode.toString(), iNode); monitor.subTask(iNode.getName()); INode[] children = iNode.getChildNodes(); if (children != null) { // check for cancellation by user if (monitor.isCanceled()) { throw new InterruptedException(Messages.getString("Progress.Dictionary.Cancelled")); } // divide work equally between type nodes int typeNodeWorkUnit = ROOT_WORK_UNIT / SUPPORTED_CONTENT_ASSIST_TYPES.length; int typeNodeWorkCompleted = 0; for (int i = 0; i < children.length; i++) { INode typeNode = children[i]; if (_logger.isDebugEnabled()) { _logger.debug("Loading dictionary: " + typeNode.getName()); } // only load a few types like tables and view nodes into the // dictionary boolean isIncludedInContentAssist = false; for (int j = 0; j < SUPPORTED_CONTENT_ASSIST_TYPES.length; j++) { if (typeNode.getType().equalsIgnoreCase(SUPPORTED_CONTENT_ASSIST_TYPES[j])) { isIncludedInContentAssist = true; } } if (!isIncludedInContentAssist) { continue; } monitor.subTask(typeNode.getName()); // check for cancellation by user if (monitor.isCanceled()) { throw new InterruptedException(Messages.getString("Progress.Dictionary.Cancelled")); } INode tableNodes[] = typeNode.getChildNodes(); if (tableNodes != null) { // check for cancellation by user if (monitor.isCanceled()) { throw new InterruptedException(Messages.getString("Progress.Dictionary.Cancelled")); } int tableNodeWorkUnit = typeNodeWorkUnit / tableNodes.length; for (int j = 0; j < tableNodes.length; j++) { INode tableNode = tableNodes[j]; if (_logger.isDebugEnabled()) { _logger.debug("Loading dictionary: " + tableNode.getName()); } if (monitor != null) { monitor.worked(tableNodeWorkUnit); typeNodeWorkCompleted = typeNodeWorkCompleted + tableNodeWorkUnit; if (_logger.isDebugEnabled()) { _logger.debug("worked table: " + tableNodeWorkUnit + ", total type work: " + typeNodeWorkCompleted); } monitor.subTask(tableNode.getQualifiedName()); // check for cancellation by user if (monitor.isCanceled()) { throw new InterruptedException(Messages.getString("Progress.Dictionary.Cancelled")); } } // add table name ArrayList tableDetails = (ArrayList) getByTableName(tableNode.getName()); if (tableDetails == null) { tableDetails = new ArrayList(); putTableName(tableNode.getName(), tableDetails); } tableDetails.add(tableNode); // add column names if (tableNode instanceof TableNode) { TreeSet columnNames = new TreeSet(); List columns = ((TableNode) tableNode).getColumnNames(); if (columns != null) { Iterator it = columns.iterator(); while (it.hasNext()) { columnNames.add(it.next()); } } putColumnsByTableName(tableNode.getName(), columnNames); } } } if (typeNodeWorkCompleted < typeNodeWorkUnit) { // consume remainder of work for this type node if (_logger.isDebugEnabled()) { _logger.debug("consuming remainder: " + (typeNodeWorkUnit - typeNodeWorkCompleted)); } monitor.worked(typeNodeWorkUnit - typeNodeWorkCompleted); } typeNodeWorkCompleted = 0; } } }
From source file:com.linuxbox.enkive.web.StatsServlet.java
private void consolidateMapsHelper(Map<String, Object> templateData, Map<String, Object> consolidatedMap, LinkedList<String> path, List<ConsolidationKeyHandler> statKeys, List<Map<String, Object>> serviceData) { for (String key : templateData.keySet()) { path.addLast(key);//from w ww .ja va2 s . c om ConsolidationKeyHandler matchingConsolidationDefinition = findMatchingPath(path, statKeys); if (matchingConsolidationDefinition != null) { TreeSet<Map<String, Object>> dataSet = new TreeSet<Map<String, Object>>(new NumComparator()); for (Map<String, Object> dataMap : serviceData) { Map<String, Object> dataVal = createMap(getDataVal(dataMap, path)); if (dataVal != null && !dataVal.isEmpty()) { dataVal.put(STAT_TIMESTAMP, dataMap.get(STAT_TIMESTAMP)); dataSet.add(dataVal); } } putOnPath(path, consolidatedMap, dataSet); } else { if (templateData.get(key) instanceof Map) { consolidateMapsHelper((Map<String, Object>) templateData.get(key), consolidatedMap, path, statKeys, serviceData); } } path.removeLast(); } }
From source file:com.clust4j.algo.MeanShiftTests.java
@Test public void testAutoEstimation() { Array2DRowRealMatrix iris = data_; final double[][] X = iris.getData(); // MS estimates bw at 1.2032034114912584 final double bandwidth = 1.2032034114912584; assertTrue(MeanShift.autoEstimateBW(iris, 0.3, Distance.EUCLIDEAN, GlobalState.DEFAULT_RANDOM_STATE, false) == bandwidth);//from www . java2 s . com // Asserting fit works without breaking things... RadiusNeighbors r = new RadiusNeighbors(iris, new RadiusNeighborsParameters(bandwidth)).fit(); TreeSet<MeanShiftSeed> centers = new TreeSet<>(); for (double[] seed : X) centers.add(MeanShift.singleSeed(seed, r, X, 300)); assertTrue(centers.size() == 7); double[][] expected_dists = new double[][] { new double[] { 6.2114285714285691, 2.8928571428571428, 4.8528571428571423, 1.6728571428571426 }, new double[] { 6.1927536231884037, 2.8768115942028984, 4.8188405797101437, 1.6463768115942023 }, new double[] { 6.1521739130434767, 2.850724637681159, 4.7405797101449272, 1.6072463768115937 }, new double[] { 6.1852941176470564, 2.8705882352941177, 4.8058823529411754, 1.6397058823529407 }, new double[] { 6.1727272727272711, 2.874242424242424, 4.7757575757575745, 1.6287878787878785 }, new double[] { 5.0163265306122451, 3.440816326530614, 1.46734693877551, 0.24285714285714283 }, new double[] { 5.0020833333333341, 3.4208333333333356, 1.4666666666666668, 0.23958333333333334 } }; int[] expected_centers = new int[] { 70, 69, 69, 68, 66, 49, 48 }; int idx = 0; for (MeanShiftSeed seed : centers) { assertTrue(VecUtils.equalsWithTolerance(seed.dists, expected_dists[idx], 1e-1)); assertTrue(seed.count == expected_centers[idx]); idx++; } }
From source file:ldbc.snb.datagen.generator.CommentGenerator.java
public long createComments(RandomGeneratorFarm randomFarm, final Forum forum, final Post post, long numComments, long startId, PersonActivityExporter exporter) throws IOException { long nextId = startId; ArrayList<Message> replyCandidates = new ArrayList<Message>(); replyCandidates.add(post);//from w w w .j a v a 2s . c om Properties prop = new Properties(); prop.setProperty("type", "comment"); for (int i = 0; i < numComments; ++i) { int replyIndex = randomFarm.get(RandomGeneratorFarm.Aspect.REPLY_TO).nextInt(replyCandidates.size()); Message replyTo = replyCandidates.get(replyIndex); ArrayList<ForumMembership> validMemberships = new ArrayList<ForumMembership>(); for (ForumMembership fM : forum.memberships()) { if (fM.creationDate() + DatagenParams.deltaTime <= replyTo.creationDate()) { validMemberships.add(fM); } } if (validMemberships.size() == 0) { return nextId; } ForumMembership member = validMemberships.get( randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(validMemberships.size())); TreeSet<Integer> tags = new TreeSet<Integer>(); String content = ""; String gif = ""; boolean isShort = false; if (randomFarm.get(RandomGeneratorFarm.Aspect.REDUCED_TEXT).nextDouble() > 0.6666) { ArrayList<Integer> currentTags = new ArrayList<Integer>(); Iterator<Integer> it = replyTo.tags().iterator(); while (it.hasNext()) { Integer tag = it.next(); if (randomFarm.get(RandomGeneratorFarm.Aspect.TAG).nextDouble() > 0.5) { tags.add(tag); } currentTags.add(tag); } for (int j = 0; j < (int) Math.ceil(replyTo.tags().size() / 2.0); ++j) { int randomTag = currentTags .get(randomFarm.get(RandomGeneratorFarm.Aspect.TAG).nextInt(currentTags.size())); tags.add(Dictionaries.tagMatrix .getRandomRelated(randomFarm.get(RandomGeneratorFarm.Aspect.TOPIC), randomTag)); } content = this.generator.generateText(member.person(), tags, prop); } else { isShort = true; if (!richRdf || randomFarm.get(RandomGeneratorFarm.Aspect.COMMENT_ISGIF).nextDouble() > 0.8) { int index = randomFarm.get(RandomGeneratorFarm.Aspect.TEXT_SIZE).nextInt(shortComments_.length); content = shortComments_[index]; } else { int index = randomFarm.get(RandomGeneratorFarm.Aspect.COMMENT_GIF).nextInt(gifs_.length); gif = gifs_[index]; } } long creationDate = Dictionaries.dates.powerlawCommDateDay( randomFarm.get(RandomGeneratorFarm.Aspect.DATE), replyTo.creationDate() + DatagenParams.deltaTime); /*if( creationDate <= Dictionaries.dates.getEndDateTime() )*/ { Comment comment = new Comment(SN.formId(SN.composeId(nextId++, creationDate)), creationDate, member.person(), forum.id(), content, tags, Dictionaries.ips.getIP(randomFarm.get(RandomGeneratorFarm.Aspect.IP), randomFarm.get(RandomGeneratorFarm.Aspect.DIFF_IP), randomFarm.get(RandomGeneratorFarm.Aspect.DIFF_IP_FOR_TRAVELER), member.person().ipAddress(), creationDate), Dictionaries.browsers.getPostBrowserId( randomFarm.get(RandomGeneratorFarm.Aspect.DIFF_BROWSER), randomFarm.get(RandomGeneratorFarm.Aspect.BROWSER), member.person().browserId()), post.messageId(), replyTo.messageId(), gif); if (richRdf) { comment.richRdf(true); if (randomFarm.get(RandomGeneratorFarm.Aspect.COMMENT_MENTIONED).nextDouble() > 0.6) { TreeSet<Long> t = new TreeSet<Long>(); // The user mentions one or more (up to 4) members of the forum t.add(validMemberships .get(randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX_COMMENT_MENTIONED) .nextInt(validMemberships.size())) .person().accountId()); double probabilityForNumberOfMentions = randomFarm .get(RandomGeneratorFarm.Aspect.COMMENT_MENTIONED_NUM).nextDouble(); if (probabilityForNumberOfMentions > 0.5) t.add(validMemberships.get( randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX_COMMENT_MENTIONED) .nextInt(validMemberships.size())) .person().accountId()); if (probabilityForNumberOfMentions > 0.75) t.add(validMemberships.get( randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX_COMMENT_MENTIONED) .nextInt(validMemberships.size())) .person().accountId()); if (probabilityForNumberOfMentions > 0.95) t.add(validMemberships.get( randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX_COMMENT_MENTIONED) .nextInt(validMemberships.size())) .person().accountId()); comment.mentioned(t); } if (randomFarm.get(RandomGeneratorFarm.Aspect.COMMENT_VISIBILITY).nextDouble() > 0.95) { if (comment.mentioned() == null || randomFarm .get(RandomGeneratorFarm.Aspect.COMMENT_VISIBILITY_TF).nextDouble() > 0.5) comment.setPublic(true); else comment.setPublic(false); } if (randomFarm.get(RandomGeneratorFarm.Aspect.COMMENT_LINK).nextDouble() > 0.57) { comment.link("http://ld.bc/" + RandomStringUtils.random(6, true, false)); } } if (richRdf && randomFarm.get(RandomGeneratorFarm.Aspect.COMMENT_COUNTRY).nextDouble() > 0.02) comment.countryKnown(false); if (!isShort) replyCandidates.add(new Comment(comment)); exporter.export(comment); if (comment.content().length() > 10 && randomFarm.get(RandomGeneratorFarm.Aspect.NUM_LIKE).nextDouble() <= 0.1) { likeGenerator_.generateLikes(randomFarm.get(RandomGeneratorFarm.Aspect.NUM_LIKE), forum, comment, Like.LikeType.COMMENT, exporter); } } } replyCandidates.clear(); return nextId; }
From source file:com.joliciel.jochre.boundaries.RecursiveShapeSplitter.java
List<ShapeSequence> split(Shape shape, int depth, Shape originalShape, boolean leftToRight) { String padding = "-"; for (int i = 0; i < depth; i++) padding += "-"; padding += " "; LOG.trace(padding + "Splitting shape: " + shape.getLeft() + " , " + shape.getRight()); LOG.trace(padding + "depth: " + depth); List<ShapeSequence> shapeSequences = new ArrayList<ShapeSequence>(); // check if shape is wide enough to bother with double widthRatio = (double) shape.getWidth() / (double) shape.getXHeight(); LOG.trace(padding + "widthRatio: " + widthRatio); if (widthRatio < minWidthRatio || depth >= maxDepth) { LOG.trace(padding + "too narrow or too deep"); ShapeSequence shapeSequence = this.boundaryServiceInternal.getEmptyShapeSequence(); shapeSequence.addShape(shape, originalShape); shapeSequences.add(shapeSequence); } else {// w w w.ja va 2s . c o m List<Split> splitCandidates = this.splitCandidateFinder.findSplitCandidates(shape); TreeSet<ShapeSequence> myShapeSequences = new TreeSet<ShapeSequence>(); TreeSet<WeightedOutcome<Split>> weightedSplits = new TreeSet<WeightedOutcome<Split>>(); for (Split splitCandidate : splitCandidates) { double splitProb = this.shouldSplit(splitCandidate); WeightedOutcome<Split> weightedSplit = new WeightedOutcome<Split>(splitCandidate, splitProb); weightedSplits.add(weightedSplit); } double maxSplitProb = 0.0; if (weightedSplits.size() > 0) maxSplitProb = weightedSplits.first().getWeight(); double noSplitProb = 1 - maxSplitProb; if (noSplitProb > maxSplitProb) maxSplitProb = noSplitProb; Split noSplit = boundaryServiceInternal.getEmptySplit(shape); noSplit.setPosition(-1); WeightedOutcome<Split> weightedNoSplit = new WeightedOutcome<Split>(noSplit, noSplitProb); weightedSplits.add(weightedNoSplit); boolean topCandidate = true; double topCandidateWeight = 1.0; for (WeightedOutcome<Split> weightedSplit : weightedSplits) { Split splitCandidate = weightedSplit.getOutcome(); double splitProb = weightedSplit.getWeight(); LOG.trace(padding + "splitCandidate: left=" + splitCandidate.getShape().getLeft() + ", pos=" + splitCandidate.getPosition() + ", initial prob: " + splitProb); if (topCandidate) { LOG.trace(padding + "topCandidate"); } if (splitCandidate.getPosition() < 0) { // This is the no-split candidate if (topCandidate) topCandidateWeight = 1.0; ShapeSequence shapeSequence = boundaryServiceInternal.getEmptyShapeSequence(); shapeSequence.addShape(shape, originalShape); double prob = (splitProb / maxSplitProb) * topCandidateWeight; LOG.trace(padding + "noSplit prob=(" + splitProb + " / " + maxSplitProb + ") * " + topCandidateWeight + " = " + prob); Decision<SplitMergeOutcome> decision = boundaryDecisionFactory .createDecision(SplitOutcome.DO_NOT_SPLIT.getCode(), prob); shapeSequence.addDecision(decision); myShapeSequences.add(shapeSequence); } else { // a proper split Shape leftShape = shape.getJochreImage().getShape(shape.getLeft(), shape.getTop(), shape.getLeft() + splitCandidate.getPosition(), shape.getBottom()); Shape rightShape = shape.getJochreImage().getShape( shape.getLeft() + splitCandidate.getPosition() + 1, shape.getTop(), shape.getRight(), shape.getBottom()); // for each split recursively try to split it again up to depth of m // Note: m=2 is probably enough, since we're not expecting more than 4 letters per shape (3 splits) List<ShapeSequence> leftShapeSequences = this.split(leftShape, depth + 1, originalShape, leftToRight); List<ShapeSequence> rightShapeSequences = this.split(rightShape, depth + 1, originalShape, leftToRight); if (topCandidate) { // find the no-split sequence in each sub-sequence ShapeSequence noSplitLeft = null; for (ShapeSequence leftShapeSequence : leftShapeSequences) { if (leftShapeSequence.size() == 1) { noSplitLeft = leftShapeSequence; break; } } ShapeSequence noSplitRight = null; for (ShapeSequence rightShapeSequence : rightShapeSequences) { if (rightShapeSequence.size() == 1) { noSplitRight = rightShapeSequence; break; } } // we should be guaranteed to find a noSplitLeft and noSplitRight // since a no-split candidate is always returned topCandidateWeight = noSplitLeft.getScore() * noSplitRight.getScore(); LOG.trace(padding + "topCandidateWeight=" + noSplitLeft.getScore() + " *" + noSplitRight.getScore() + " = " + topCandidateWeight); } for (ShapeSequence leftShapeSequence : leftShapeSequences) { for (ShapeSequence rightShapeSequence : rightShapeSequences) { ShapeSequence newSequence = null; if (leftToRight) newSequence = boundaryServiceInternal.getShapeSequence(leftShapeSequence, rightShapeSequence); else newSequence = boundaryServiceInternal.getShapeSequence(rightShapeSequence, leftShapeSequence); if (LOG.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); for (ShapeInSequence splitShape : newSequence) { sb.append("(" + splitShape.getShape().getLeft() + "," + splitShape.getShape().getRight() + ") "); } LOG.trace(padding + sb.toString()); } double totalProb = 1.0; for (Decision<SplitMergeOutcome> decision : newSequence.getDecisions()) { totalProb = totalProb * decision.getProbability(); } newSequence.getDecisions().clear(); double prob = 0.0; if (topCandidate) { prob = totalProb * (splitProb / maxSplitProb); LOG.trace(padding + "prob=" + totalProb + " * (" + splitProb + " / " + maxSplitProb + ") = " + prob); } else { prob = totalProb * (splitProb / maxSplitProb) * topCandidateWeight; LOG.trace(padding + "prob=" + totalProb + " * (" + splitProb + " / " + maxSplitProb + ") * " + topCandidateWeight + " = " + prob); } Decision<SplitMergeOutcome> decision = this.boundaryDecisionFactory .createDecision(SplitOutcome.DO_SPLIT.getCode(), prob); newSequence.addDecision(decision); myShapeSequences.add(newSequence); } } } topCandidate = false; } int i = 0; for (ShapeSequence shapeSequence : myShapeSequences) { // Note: we always return the no-split option, even it it's very low probability if (shapeSequence.size() == 1 || i < beamWidth) { shapeSequences.add(shapeSequence); } i++; } } return shapeSequences; }
From source file:net.dv8tion.jda.core.entities.impl.MessageImpl.java
@Override public synchronized String getStrippedContent() { if (strippedContent == null) { String tmp = getContent(); //all the formatting keys to keep track of String[] keys = new String[] { "*", "_", "`", "~~" }; //find all tokens (formatting strings described above) TreeSet<FormatToken> tokens = new TreeSet<>((t1, t2) -> Integer.compare(t1.start, t2.start)); for (String key : keys) { Matcher matcher = Pattern.compile(Pattern.quote(key)).matcher(tmp); while (matcher.find()) { tokens.add(new FormatToken(key, matcher.start())); }/*w ww . j av a 2 s . com*/ } //iterate over all tokens, find all matching pairs, and add them to the list toRemove Stack<FormatToken> stack = new Stack<>(); List<FormatToken> toRemove = new ArrayList<>(); boolean inBlock = false; for (FormatToken token : tokens) { if (stack.empty() || !stack.peek().format.equals(token.format) || stack.peek().start + token.format.length() == token.start) { //we are at opening tag if (!inBlock) { //we are outside of block -> handle normally if (token.format.equals("`")) { //block start... invalidate all previous tags stack.clear(); inBlock = true; } stack.push(token); } else if (token.format.equals("`")) { //we are inside of a block -> handle only block tag stack.push(token); } } else if (!stack.empty()) { //we found a matching close-tag toRemove.add(stack.pop()); toRemove.add(token); if (token.format.equals("`") && stack.empty()) { //close tag closed the block inBlock = false; } } } //sort tags to remove by their start-index and iteratively build the remaining string Collections.sort(toRemove, (t1, t2) -> Integer.compare(t1.start, t2.start)); StringBuilder out = new StringBuilder(); int currIndex = 0; for (FormatToken formatToken : toRemove) { if (currIndex < formatToken.start) { out.append(tmp.substring(currIndex, formatToken.start)); } currIndex = formatToken.start + formatToken.format.length(); } if (currIndex < tmp.length()) { out.append(tmp.substring(currIndex)); } //return the stripped text, escape all remaining formatting characters (did not have matching open/close before or were left/right of block strippedContent = out.toString().replace("*", "\\*").replace("_", "\\_").replace("~", "\\~"); } return strippedContent; }
From source file:mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer.java
private void init() throws AccumuloException, AccumuloSecurityException, TableNotFoundException, TableExistsException { String doctable = ConfigUtils.getFreeTextDocTablename(conf); String termtable = ConfigUtils.getFreeTextTermTablename(conf); docTableNumPartitions = ConfigUtils.getFreeTextDocNumPartitions(conf); int termTableNumPartitions = ConfigUtils.getFreeTextTermNumPartitions(conf); TableOperations tableOps = ConfigUtils.getConnector(conf).tableOperations(); // Create term table partitions boolean createdTermTable = ConfigUtils.createTableIfNotExists(conf, termtable); if (createdTermTable && !ConfigUtils.useMockInstance(conf) && termTableNumPartitions > 0) { TreeSet<Text> splits = new TreeSet<Text>(); // split on the "Term List" and "Reverse Term list" boundary splits.add(new Text(ColumnPrefixes.getRevTermListColFam(""))); // Symmetrically split the "Term List" and "Reverse Term list" int numSubpartitions = ((termTableNumPartitions - 1) / 2); if (numSubpartitions > 0) { int step = (26 / numSubpartitions); for (int i = 0; i < numSubpartitions; i++) { String nextChar = String.valueOf((char) ('a' + (step * i))); splits.add(new Text(ColumnPrefixes.getTermListColFam(nextChar))); splits.add(new Text(ColumnPrefixes.getRevTermListColFam(nextChar))); }/*from ww w . java 2 s .c o m*/ } tableOps.addSplits(termtable, splits); } // Create document (text) table partitions boolean createdDocTable = ConfigUtils.createTableIfNotExists(conf, doctable); if (createdDocTable && !ConfigUtils.useMockInstance(conf)) { TreeSet<Text> splits = new TreeSet<Text>(); for (int i = 0; i < docTableNumPartitions; i++) { splits.add(genPartition(i, docTableNumPartitions)); } tableOps.addSplits(doctable, splits); // Add a tablet level Bloom filter for the Column Family. // This will allow us to quickly determine if a term is contained in a tablet. tableOps.setProperty(doctable, "table.bloom.key.functor", ColumnFamilyFunctor.class.getCanonicalName()); tableOps.setProperty(doctable, "table.bloom.enabled", Boolean.TRUE.toString()); } mtbw = ConfigUtils.createMultitableBatchWriter(conf); docTableBw = mtbw.getBatchWriter(doctable); termTableBw = mtbw.getBatchWriter(termtable); tokenizer = ConfigUtils.getFreeTextTokenizer(conf); validPredicates = ConfigUtils.getFreeTextPredicates(conf); queryTermLimit = ConfigUtils.getFreeTextTermLimit(conf); }
From source file:fr.dutra.confluence2wordpress.action.SyncAction.java
private void initSessionElements() throws WordpressXmlRpcException { if (getWordpressUsers() == null) { WordpressClient client = pluginSettingsManager.getWordpressClient(); Future<List<WordpressUser>> futureUsers = client.getUsers(); Future<List<WordpressCategory>> futureCategories = client.getCategories(); Future<List<WordpressTag>> futureTags = client.getTags(); Set<WordpressUser> users = new TreeSet<WordpressUser>(new Comparator<WordpressUser>() { @Override//from w ww.j a v a 2s . c o m public int compare(WordpressUser o1, WordpressUser o2) { return new CompareToBuilder() .append(StringUtils.lowerCase(o1.getNiceUsername()), StringUtils.lowerCase(o2.getNiceUsername())) .append(o1.getId(), o2.getId()).toComparison(); } }); Set<WordpressCategory> categories = new TreeSet<WordpressCategory>(new Comparator<WordpressCategory>() { @Override public int compare(WordpressCategory o1, WordpressCategory o2) { return new CompareToBuilder().append(StringUtils.lowerCase(o1.getCategoryName()), StringUtils.lowerCase(o2.getCategoryName())).toComparison(); } }); Set<WordpressTag> tags = new TreeSet<WordpressTag>(new Comparator<WordpressTag>() { @Override public int compare(WordpressTag o1, WordpressTag o2) { return new CompareToBuilder() .append(StringUtils.lowerCase(o1.getName()), StringUtils.lowerCase(o2.getName())) .toComparison(); } }); try { users.addAll(futureUsers.get(30, TimeUnit.SECONDS)); categories.addAll(futureCategories.get(30, TimeUnit.SECONDS)); tags.addAll(futureTags.get(30, TimeUnit.SECONDS)); } catch (InterruptedException e) { throw new WordpressXmlRpcException("Error contacting Wordpress server", e); } catch (ExecutionException e) { if (e.getCause() instanceof WordpressXmlRpcException) { throw (WordpressXmlRpcException) e.getCause(); } throw new WordpressXmlRpcException("Error contacting Wordpress server", e.getCause()); } catch (TimeoutException e) { throw new WordpressXmlRpcException("Connection to Wordpress timed out", e.getCause()); } setWordpressUsers(users); setWordpressCategories(categories); setWordpressTags(tags); } if (getAvailableMacros() == null) { Set<MacroMetadata> allMacroMetadata = macroMetadataManager.getAllMacroMetadata(); TreeSet<String> macros = new TreeSet<String>(); for (MacroMetadata macroMetadata : allMacroMetadata) { macros.add(macroMetadata.getMacroName()); } setAvailableMacros(macros); } }