List of usage examples for java.util Map getOrDefault
default V getOrDefault(Object key, V defaultValue)
From source file:org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptorTest.java
@Test public void testGetAllVariants_polyphenSift() { //POLYPHEN// w w w . j a v a 2 s .co m //SIFT Query query; Map<Double, Integer> sift = new HashMap<>(); Map<String, Integer> siftDesc = new HashMap<>(); Map<Double, Integer> polyphen = new HashMap<>(); Map<Double, Integer> maxPolyphen = new HashMap<>(); Map<String, Integer> polyphenDesc = new HashMap<>(); for (Variant variant : allVariants.getResult()) { Set<Double> siftInVariant = new HashSet<>(); Set<Double> polyphenInVariant = new HashSet<>(); Set<String> siftDescInVariant = new HashSet<>(); Set<String> polyphenDescInVariant = new HashSet<>(); if (variant.getAnnotation().getConsequenceTypes() != null) { for (ConsequenceType consequenceType : variant.getAnnotation().getConsequenceTypes()) { if (consequenceType.getProteinVariantAnnotation() != null) { if (consequenceType.getProteinVariantAnnotation().getSubstitutionScores() != null) { for (Score score : consequenceType.getProteinVariantAnnotation() .getSubstitutionScores()) { if (score.getSource().equals("sift")) { siftInVariant.add(score.getScore()); siftDescInVariant.add(score.getDescription()); } else if (score.getSource().equals("polyphen")) { polyphenInVariant.add(score.getScore()); polyphenDescInVariant.add(score.getDescription()); } } } } } } for (Double value : siftInVariant) { sift.put(value, sift.getOrDefault(value, 0) + 1); } for (String value : siftDescInVariant) { siftDesc.put(value, siftDesc.getOrDefault(value, 0) + 1); } for (Double value : polyphenInVariant) { polyphen.put(value, polyphen.getOrDefault(value, 0) + 1); } Optional<Double> max = polyphenInVariant.stream().max(Double::compareTo); if (max.isPresent()) { maxPolyphen.put(max.get(), maxPolyphen.getOrDefault(max.get(), 0) + 1); } for (String value : polyphenDescInVariant) { polyphenDesc.put(value, polyphenDesc.getOrDefault(value, 0) + 1); } } for (Map.Entry<String, Integer> entry : siftDesc.entrySet()) { query = new Query(ANNOT_SIFT.key(), entry.getKey()); queryResult = dbAdaptor.get(query, null); assertEquals(entry.getKey(), entry.getValue().intValue(), queryResult.getNumResults()); System.out.println("queryResult.getDbTime() = " + queryResult.getDbTime()); } for (Map.Entry<String, Integer> entry : polyphenDesc.entrySet()) { query = new Query(ANNOT_POLYPHEN.key(), entry.getKey()); queryResult = dbAdaptor.get(query, null); assertEquals(entry.getKey(), entry.getValue().intValue(), queryResult.getNumResults()); System.out.println("queryResult.getDbTime() = " + queryResult.getDbTime()); } query = new Query(ANNOT_POLYPHEN.key(), ">0.5"); queryResult = dbAdaptor.get(query, null); Integer expected = maxPolyphen.entrySet().stream().filter(entry -> entry.getKey() > 0.5) .map(Map.Entry::getValue).reduce((i, j) -> i + j).orElse(0); assertEquals(expected.intValue(), queryResult.getNumResults()); query = new Query(ANNOT_POLYPHEN.key(), "sift>0.5"); thrown.expect(VariantQueryException.class); dbAdaptor.get(query, null); // for (Map.Entry<Double, Integer> entry : polyphen.entrySet()) { // query = new Query(VariantDBAdaptor.VariantQueryParams.SIFT.key(), entry.getKey()); // queryResult = dbAdaptor.get(query, null); // assertEquals(entry.getKey(), entry.getValue(), queryResult.getNumResults()); // } }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
/** * This method does two things -//from w w w . jav a 2s .com * 1. Makes sure the checkpoints information in the taskGroup is consistent with that of the tasks, if not kill * inconsistent tasks. * 2. truncates the checkpoints in the taskGroup corresponding to which segments have been published, so that any newly * created tasks for the taskGroup start indexing from after the latest published offsets. */ private void verifyAndMergeCheckpoints(final TaskGroup taskGroup) { final int groupId = taskGroup.groupId; final List<Pair<String, TreeMap<Integer, Map<Integer, Long>>>> taskSequences = new ArrayList<>(); final List<ListenableFuture<TreeMap<Integer, Map<Integer, Long>>>> futures = new ArrayList<>(); final List<String> taskIds = new ArrayList<>(); for (String taskId : taskGroup.taskIds()) { final ListenableFuture<TreeMap<Integer, Map<Integer, Long>>> checkpointsFuture = taskClient .getCheckpointsAsync(taskId, true); taskIds.add(taskId); futures.add(checkpointsFuture); } try { List<TreeMap<Integer, Map<Integer, Long>>> futuresResult = Futures.successfulAsList(futures) .get(futureTimeoutInSeconds, TimeUnit.SECONDS); for (int i = 0; i < futuresResult.size(); i++) { final TreeMap<Integer, Map<Integer, Long>> checkpoints = futuresResult.get(i); final String taskId = taskIds.get(i); if (checkpoints == null) { try { // catch the exception in failed futures futures.get(i).get(); } catch (Exception e) { log.error(e, "Problem while getting checkpoints for task [%s], killing the task", taskId); killTask(taskId); taskGroup.tasks.remove(taskId); } } else if (checkpoints.isEmpty()) { log.warn("Ignoring task [%s], as probably it is not started running yet", taskId); } else { taskSequences.add(new Pair<>(taskId, checkpoints)); } } } catch (Exception e) { throw new RuntimeException(e); } final KafkaDataSourceMetadata latestDataSourceMetadata = (KafkaDataSourceMetadata) indexerMetadataStorageCoordinator .getDataSourceMetadata(dataSource); final boolean hasValidOffsetsFromDb = latestDataSourceMetadata != null && latestDataSourceMetadata.getKafkaPartitions() != null && ioConfig.getTopic().equals(latestDataSourceMetadata.getKafkaPartitions().getTopic()); final Map<Integer, Long> latestOffsetsFromDb; if (hasValidOffsetsFromDb) { latestOffsetsFromDb = latestDataSourceMetadata.getKafkaPartitions().getPartitionOffsetMap(); } else { latestOffsetsFromDb = null; } // order tasks of this taskGroup by the latest sequenceId taskSequences.sort((o1, o2) -> o2.rhs.firstKey().compareTo(o1.rhs.firstKey())); final Set<String> tasksToKill = new HashSet<>(); final AtomicInteger earliestConsistentSequenceId = new AtomicInteger(-1); int taskIndex = 0; while (taskIndex < taskSequences.size()) { TreeMap<Integer, Map<Integer, Long>> taskCheckpoints = taskSequences.get(taskIndex).rhs; String taskId = taskSequences.get(taskIndex).lhs; if (earliestConsistentSequenceId.get() == -1) { // find the first replica task with earliest sequenceId consistent with datasource metadata in the metadata // store if (taskCheckpoints.entrySet().stream() .anyMatch(sequenceCheckpoint -> sequenceCheckpoint.getValue().entrySet().stream() .allMatch(partitionOffset -> Longs.compare(partitionOffset.getValue(), latestOffsetsFromDb == null ? partitionOffset.getValue() : latestOffsetsFromDb.getOrDefault(partitionOffset.getKey(), partitionOffset.getValue())) == 0) && earliestConsistentSequenceId.compareAndSet(-1, sequenceCheckpoint.getKey())) || (pendingCompletionTaskGroups.getOrDefault(groupId, EMPTY_LIST).size() > 0 && earliestConsistentSequenceId.compareAndSet(-1, taskCheckpoints.firstKey()))) { final SortedMap<Integer, Map<Integer, Long>> latestCheckpoints = new TreeMap<>( taskCheckpoints.tailMap(earliestConsistentSequenceId.get())); log.info("Setting taskGroup sequences to [%s] for group [%d]", latestCheckpoints, groupId); taskGroup.sequenceOffsets.clear(); taskGroup.sequenceOffsets.putAll(latestCheckpoints); } else { log.debug("Adding task [%s] to kill list, checkpoints[%s], latestoffsets from DB [%s]", taskId, taskCheckpoints, latestOffsetsFromDb); tasksToKill.add(taskId); } } else { // check consistency with taskGroup sequences if (taskCheckpoints.get(taskGroup.sequenceOffsets.firstKey()) == null || !(taskCheckpoints.get(taskGroup.sequenceOffsets.firstKey()) .equals(taskGroup.sequenceOffsets.firstEntry().getValue())) || taskCheckpoints.tailMap(taskGroup.sequenceOffsets.firstKey()) .size() != taskGroup.sequenceOffsets.size()) { log.debug("Adding task [%s] to kill list, checkpoints[%s], taskgroup checkpoints [%s]", taskId, taskCheckpoints, taskGroup.sequenceOffsets); tasksToKill.add(taskId); } } taskIndex++; } if ((tasksToKill.size() > 0 && tasksToKill.size() == taskGroup.tasks.size()) || (taskGroup.tasks.size() == 0 && pendingCompletionTaskGroups.getOrDefault(groupId, EMPTY_LIST).size() == 0)) { // killing all tasks or no task left in the group ? // clear state about the taskgroup so that get latest offset information is fetched from metadata store log.warn("Clearing task group [%d] information as no valid tasks left the group", groupId); taskGroups.remove(groupId); partitionGroups.get(groupId).replaceAll((partition, offset) -> NOT_SET); } taskSequences.stream().filter(taskIdSequences -> tasksToKill.contains(taskIdSequences.lhs)) .forEach(sequenceCheckpoint -> { log.warn( "Killing task [%s], as its checkpoints [%s] are not consistent with group checkpoints[%s] or latest " + "persisted offsets in metadata store [%s]", sequenceCheckpoint.lhs, sequenceCheckpoint.rhs, taskGroup.sequenceOffsets, latestOffsetsFromDb); killTask(sequenceCheckpoint.lhs); taskGroup.tasks.remove(sequenceCheckpoint.lhs); }); }
From source file:org.apache.rya.rdftriplestore.inference.InferenceEngine.java
/** * Queries domain and range information, then populates the inference engine with direct * domain/range relations and any that can be inferred from the subclass graph, subproperty * graph, and inverse property map. Should be called after that class and property information * has been refreshed.//from w ww. j a v a 2 s. com * * Computes indirect domain/range: * - If p1 has domain c, and p2 is a subproperty of p1, then p2 also has domain c. * - If p1 has range c, and p2 is a subproperty of p1, then p2 also has range c. * - If p1 has domain c, and p2 is the inverse of p1, then p2 has range c. * - If p1 has range c, and p2 is the inverse of p1, then p2 has domain c. * - If p has domain c1, and c1 is a subclass of c2, then p also has domain c2. * - If p has range c1, and c1 is a subclass of c2, then p also has range c2. * @throws QueryEvaluationException */ private void refreshDomainRange() throws QueryEvaluationException { final Map<URI, Set<URI>> domainByTypePartial = new ConcurrentHashMap<>(); final Map<URI, Set<URI>> rangeByTypePartial = new ConcurrentHashMap<>(); // First, populate domain and range based on direct domain/range triples. CloseableIteration<Statement, QueryEvaluationException> iter = RyaDAOHelper.query(ryaDAO, null, RDFS.DOMAIN, null, conf); try { while (iter.hasNext()) { final Statement st = iter.next(); final Resource property = st.getSubject(); final Value domainType = st.getObject(); if (domainType instanceof URI && property instanceof URI) { if (!domainByTypePartial.containsKey(domainType)) { domainByTypePartial.put((URI) domainType, new HashSet<>()); } domainByTypePartial.get(domainType).add((URI) property); } } } finally { if (iter != null) { iter.close(); } } iter = RyaDAOHelper.query(ryaDAO, null, RDFS.RANGE, null, conf); try { while (iter.hasNext()) { final Statement st = iter.next(); final Resource property = st.getSubject(); final Value rangeType = st.getObject(); if (rangeType instanceof URI && property instanceof URI) { if (!rangeByTypePartial.containsKey(rangeType)) { rangeByTypePartial.put((URI) rangeType, new HashSet<>()); } rangeByTypePartial.get(rangeType).add((URI) property); } } } finally { if (iter != null) { iter.close(); } } // Then combine with the subclass/subproperty graphs and the inverse property map to compute // the closure of domain and range per class. final Set<URI> domainRangeTypeSet = new HashSet<>(domainByTypePartial.keySet()); domainRangeTypeSet.addAll(rangeByTypePartial.keySet()); // Extend to subproperties: make sure that using a more specific form of a property // still triggers its domain/range inferences. // Mirror for inverse properties: make sure that using the inverse form of a property // triggers the inverse domain/range inferences. // These two rules can recursively trigger one another. for (final URI domainRangeType : domainRangeTypeSet) { final Set<URI> propertiesWithDomain = domainByTypePartial.getOrDefault(domainRangeType, new HashSet<>()); final Set<URI> propertiesWithRange = rangeByTypePartial.getOrDefault(domainRangeType, new HashSet<>()); // Since findParents will traverse the subproperty graph and find all indirect // subproperties, the subproperty rule does not need to trigger itself directly. // And since no more than one inverseOf relationship is stored for any property, the // inverse property rule does not need to trigger itself directly. However, each rule // can trigger the other, so keep track of how the inferred domains/ranges were // discovered so we can apply only those rules that might yield new information. final Stack<URI> domainViaSuperProperty = new Stack<>(); final Stack<URI> rangeViaSuperProperty = new Stack<>(); final Stack<URI> domainViaInverseProperty = new Stack<>(); final Stack<URI> rangeViaInverseProperty = new Stack<>(); // Start with the direct domain/range assertions, which can trigger any rule. domainViaSuperProperty.addAll(propertiesWithDomain); domainViaInverseProperty.addAll(propertiesWithDomain); rangeViaSuperProperty.addAll(propertiesWithRange); rangeViaInverseProperty.addAll(propertiesWithRange); // Repeatedly infer domain/range from subproperties/inverse properties until no new // information can be generated. while (!(domainViaSuperProperty.isEmpty() && rangeViaSuperProperty.isEmpty() && domainViaInverseProperty.isEmpty() && rangeViaInverseProperty.isEmpty())) { // For a type c and property p, if c is a domain of p, then c is the range of any // inverse of p. Would be redundant for properties discovered via inverseOf. while (!domainViaSuperProperty.isEmpty()) { final URI property = domainViaSuperProperty.pop(); final URI inverseProperty = findInverseOf(property); if (inverseProperty != null && propertiesWithRange.add(inverseProperty)) { rangeViaInverseProperty.push(inverseProperty); } } // For a type c and property p, if c is a range of p, then c is the domain of any // inverse of p. Would be redundant for properties discovered via inverseOf. while (!rangeViaSuperProperty.isEmpty()) { final URI property = rangeViaSuperProperty.pop(); final URI inverseProperty = findInverseOf(property); if (inverseProperty != null && propertiesWithDomain.add(inverseProperty)) { domainViaInverseProperty.push(inverseProperty); } } // For a type c and property p, if c is a domain of p, then c is also a domain of // p's subproperties. Would be redundant for properties discovered via this rule. while (!domainViaInverseProperty.isEmpty()) { final URI property = domainViaInverseProperty.pop(); final Set<URI> subProperties = getSubProperties(property); subProperties.removeAll(propertiesWithDomain); propertiesWithDomain.addAll(subProperties); domainViaSuperProperty.addAll(subProperties); } // For a type c and property p, if c is a range of p, then c is also a range of // p's subproperties. Would be redundant for properties discovered via this rule. while (!rangeViaInverseProperty.isEmpty()) { final URI property = rangeViaInverseProperty.pop(); final Set<URI> subProperties = getSubProperties(property); subProperties.removeAll(propertiesWithRange); propertiesWithRange.addAll(subProperties); rangeViaSuperProperty.addAll(subProperties); } } if (!propertiesWithDomain.isEmpty()) { domainByTypePartial.put(domainRangeType, propertiesWithDomain); } if (!propertiesWithRange.isEmpty()) { rangeByTypePartial.put(domainRangeType, propertiesWithRange); } } // Once all properties have been found for each domain/range class, extend to superclasses: // make sure that the consequent of a domain/range inference goes on to apply any more // general classes as well. for (final URI subtype : domainRangeTypeSet) { final Set<URI> supertypes = getSuperClasses(subtype); final Set<URI> propertiesWithDomain = domainByTypePartial.getOrDefault(subtype, new HashSet<>()); final Set<URI> propertiesWithRange = rangeByTypePartial.getOrDefault(subtype, new HashSet<>()); for (final URI supertype : supertypes) { // For a property p and its domain c: all of c's superclasses are also domains of p. if (!propertiesWithDomain.isEmpty() && !domainByTypePartial.containsKey(supertype)) { domainByTypePartial.put(supertype, new HashSet<>()); } for (final URI property : propertiesWithDomain) { domainByTypePartial.get(supertype).add(property); } // For a property p and its range c: all of c's superclasses are also ranges of p. if (!propertiesWithRange.isEmpty() && !rangeByTypePartial.containsKey(supertype)) { rangeByTypePartial.put(supertype, new HashSet<>()); } for (final URI property : propertiesWithRange) { rangeByTypePartial.get(supertype).add(property); } } } domainByType = domainByTypePartial; rangeByType = rangeByTypePartial; }
From source file:org.jamocha.dn.compiler.pathblocks.PathBlocks.java
protected static List<PathRule> createOutput(final List<Either<Rule, ExistentialProxy>> rules, final PathBlockSet resultBlockSet) { final Function<? super Block, ? extends Integer> characteristicNumber = block -> block .getFlatFilterInstances().size() / block.getRulesOrProxies().size(); final TreeMap<Integer, CursorableLinkedList<Block>> blockMap = resultBlockSet.getBlocks().stream() .collect(groupingBy(characteristicNumber, TreeMap::new, toCollection(CursorableLinkedList::new))); // iterate over all the filter proxies ever used for (final FilterProxy filterProxy : FilterProxy.getFilterProxies()) { final Set<ExistentialProxy> existentialProxies = filterProxy.getProxies(); // determine the largest characteristic number of the blocks containing filter instances // of one of the existential proxies (choice is arbitrary, since the filters and the // conflicts are identical if they belong to the same filter). final OptionalInt optMax = resultBlockSet.getRuleInstanceToBlocks() .computeIfAbsent(Either.right(existentialProxies.iterator().next()), newHashSet()).stream() .mapToInt(composeToInt(characteristicNumber, Integer::intValue)).max(); if (!optMax.isPresent()) continue; final int eCN = optMax.getAsInt(); // get the list to append the blocks using the existential closure filter INSTANCE to final CursorableLinkedList<Block> targetList = blockMap.get(eCN); // for every existential part for (final ExistentialProxy existentialProxy : existentialProxies) { final FilterInstance exClosure = existentialProxy.getExistentialClosure(); // create a list storing the blocks to move final List<Block> toMove = new ArrayList<>(); for (final CursorableLinkedList<Block> blockList : blockMap.headMap(eCN, true).values()) { // iterate over the blocks in the current list for (final ListIterator<Block> iterator = blockList.listIterator(); iterator.hasNext();) { final Block current = iterator.next(); // if the current block uses the current existential closure filter // INSTANCE, it has to be moved if (current.getFlatFilterInstances().contains(exClosure)) { iterator.remove(); toMove.add(current); }/*from ww w . j a va2 s . c om*/ } } // append the blocks to be moved (they were only removed so far) targetList.addAll(toMove); } } final Set<FilterInstance> constructedFIs = new HashSet<>(); final Map<Either<Rule, ExistentialProxy>, Map<FilterInstance, Set<FilterInstance>>> ruleToJoinedWith = new HashMap<>(); final Map<Set<FilterInstance>, PathFilterList> joinedWithToComponent = new HashMap<>(); // at this point, the network can be constructed for (final CursorableLinkedList<Block> blockList : blockMap.values()) { for (final Block block : blockList) { final List<Either<Rule, ExistentialProxy>> blockRules = Lists .newArrayList(block.getRulesOrProxies()); final Set<List<FilterInstance>> filterInstanceColumns = Block .getFilterInstanceColumns(block.getFilters(), block.getRuleToFilterToRow(), blockRules); // since we are considering blocks, it is either the case that all filter // instances of the column have been constructed or none of them have final PathSharedListWrapper sharedListWrapper = new PathSharedListWrapper(blockRules.size()); final Map<Either<Rule, ExistentialProxy>, PathSharedList> ruleToSharedList = IntStream .range(0, blockRules.size()).boxed() .collect(toMap(blockRules::get, sharedListWrapper.getSharedSiblings()::get)); final List<List<FilterInstance>> columnsToConstruct, columnsAlreadyConstructed; { final Map<Boolean, List<List<FilterInstance>>> partition = filterInstanceColumns.stream() .collect(partitioningBy(column -> Collections.disjoint(column, constructedFIs))); columnsAlreadyConstructed = partition.get(Boolean.FALSE); columnsToConstruct = partition.get(Boolean.TRUE); } if (!columnsAlreadyConstructed.isEmpty()) { final Map<PathSharedList, LinkedHashSet<PathFilterList>> sharedPart = new HashMap<>(); for (final List<FilterInstance> column : columnsAlreadyConstructed) { for (final FilterInstance fi : column) { sharedPart .computeIfAbsent(ruleToSharedList.get(fi.getRuleOrProxy()), newLinkedHashSet()) .add(joinedWithToComponent .get(ruleToJoinedWith.get(fi.getRuleOrProxy()).get(fi))); } } sharedListWrapper.addSharedColumns(sharedPart); } for (final List<FilterInstance> column : columnsToConstruct) { sharedListWrapper.addSharedColumn(column.stream().collect( toMap(fi -> ruleToSharedList.get(fi.getRuleOrProxy()), FilterInstance::convert))); } constructedFIs.addAll(block.getFlatFilterInstances()); for (final Entry<Either<Rule, ExistentialProxy>, Map<Filter, FilterInstancesSideBySide>> entry : block .getRuleToFilterToRow().entrySet()) { final Either<Rule, ExistentialProxy> rule = entry.getKey(); final Set<FilterInstance> joined = entry.getValue().values().stream() .flatMap(sbs -> sbs.getInstances().stream()).collect(toSet()); final Map<FilterInstance, Set<FilterInstance>> joinedWithMapForThisRule = ruleToJoinedWith .computeIfAbsent(rule, newHashMap()); joined.forEach(fi -> joinedWithMapForThisRule.put(fi, joined)); joinedWithToComponent.put(joined, ruleToSharedList.get(rule)); } } } final List<PathRule> pathRules = new ArrayList<>(); for (final Either<Rule, ExistentialProxy> either : rules) { if (either.isRight()) { continue; } final List<PathFilterList> pathFilterLists = Stream .concat(either.left().get().existentialProxies.values().stream().map(p -> Either.right(p)), Stream.of(either)) .flatMap(e -> ruleToJoinedWith.getOrDefault(e, Collections.emptyMap()).values().stream() .distinct()) .map(joinedWithToComponent::get).collect(toList()); pathRules.add(either.left().get().getOriginal().toPathRule(PathFilterList.toSimpleList(pathFilterLists), pathFilterLists.size() > 1 ? InitialFactPathsFinder.gather(pathFilterLists) : Collections.emptySet())); } return pathRules; }
From source file:com.serphacker.serposcope.db.google.GoogleSerpRescanDB.java
public void rescan(Integer specificRunId, Collection<GoogleTarget> targets, Collection<GoogleSearch> searches, boolean updateSummary) { LOG.debug("SERP rescan (bulk) : starting"); long _start = System.currentTimeMillis(); Map<Integer, Integer> searchCountByGroup = searchDB.countByGroup(); Run specPrevRun = null;/*from w w w. j av a2 s. co m*/ Map<Integer, GoogleTargetSummary> specPrevRunSummaryByTarget = new HashMap<>(); if (specificRunId != null) { specPrevRun = runDB.findPrevious(specificRunId); if (specPrevRun != null) { specPrevRunSummaryByTarget = targetSummaryDB.list(specPrevRun.getId()).stream() .collect(Collectors.toMap(GoogleTargetSummary::getTargetId, Function.identity())); } } List<GoogleRank> ranks = new ArrayList<>(); for (GoogleTarget target : targets) { Map<Integer, GoogleTargetSummary> summaryByRunId = new HashMap<>(); GoogleTargetSummary specificPreviousSummary = specPrevRunSummaryByTarget.get(target.getId()); if (specificPreviousSummary != null) { summaryByRunId.put(specPrevRun.getId(), specificPreviousSummary); } for (GoogleSearch search : searches) { final MutableInt previousRunId = new MutableInt(0); final MutableInt previousRank = new MutableInt(GoogleRank.UNRANKED); GoogleBest searchBest = new GoogleBest(target.getGroupId(), target.getId(), search.getId(), GoogleRank.UNRANKED, null, null); if (specPrevRun != null) { previousRunId.setValue(specPrevRun.getId()); previousRank.setValue( rankDB.get(specPrevRun.getId(), target.getGroupId(), target.getId(), search.getId())); GoogleBest specificBest = rankDB.getBest(target.getGroupId(), target.getId(), search.getId()); if (specificBest != null) { searchBest = specificBest; } } final GoogleBest best = searchBest; serpDB.stream(specificRunId, specificRunId, search.getId(), (GoogleSerp res) -> { int rank = GoogleRank.UNRANKED; String rankedUrl = null; for (int i = 0; i < res.getEntries().size(); i++) { if (target.match(res.getEntries().get(i).getUrl())) { rankedUrl = res.getEntries().get(i).getUrl(); rank = i + 1; break; } } // only update last run GoogleRank gRank = new GoogleRank(res.getRunId(), target.getGroupId(), target.getId(), search.getId(), rank, previousRank.shortValue(), rankedUrl); ranks.add(gRank); if (ranks.size() > 2000) { rankDB.insert(ranks); ranks.clear(); } if (updateSummary) { GoogleTargetSummary summary = summaryByRunId.get(res.getRunId()); if (summary == null) { summaryByRunId.put(res.getRunId(), summary = new GoogleTargetSummary(target.getGroupId(), target.getId(), res.getRunId(), 0)); } summary.addRankCandidat(gRank); } if (rank != GoogleRank.UNRANKED && rank <= best.getRank()) { best.setRank((short) rank); best.setUrl(rankedUrl); best.setRunDay(res.getRunDay()); } previousRunId.setValue(res.getRunId()); previousRank.setValue(rank); }); if (best.getRank() != GoogleRank.UNRANKED) { rankDB.insertBest(best); } } // fill previous summary score if (updateSummary) { TreeMap<Integer, GoogleTargetSummary> summaries = new TreeMap<>(summaryByRunId); GoogleTargetSummary previousSummary = null; for (Map.Entry<Integer, GoogleTargetSummary> entry : summaries.entrySet()) { GoogleTargetSummary summary = entry.getValue(); summary.computeScoreBP(searchCountByGroup.getOrDefault(summary.getGroupId(), 0)); if (previousSummary != null) { summary.setPreviousScoreBP(previousSummary.getScoreBP()); } previousSummary = summary; } if (specPrevRun != null) { summaries.remove(specPrevRun.getId()); } if (!summaries.isEmpty()) { targetSummaryDB.insert(summaries.values()); } } } if (!ranks.isEmpty()) { rankDB.insert(ranks); ranks.clear(); } LOG.debug("SERP rescan : done, duration = {}", DurationFormatUtils.formatDurationHMS(System.currentTimeMillis() - _start)); }
From source file:com.ikanow.aleph2.data_model.utils.CrudServiceUtils.java
/** CRUD service proxy that optionally adds an extra term and allows the user to modify the results after they've run (eg to apply security service settings) * @author Alex// w w w . ja v a 2 s . c om */ @SuppressWarnings("unchecked") public static <T> ICrudService<T> intercept(final Class<T> clazz, final ICrudService<T> delegate, final Optional<QueryComponent<T>> extra_query, final Optional<Function<QueryComponent<T>, QueryComponent<T>>> query_transform, final Map<String, BiFunction<Object, Object[], Object>> interceptors, final Optional<BiFunction<Object, Object[], Object>> default_interceptor) { InvocationHandler handler = new InvocationHandler() { @Override public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { final Method m = delegate.getClass().getMethod(method.getName(), method.getParameterTypes()); // First off, apply the extra term to any relevant args: final Object[] args_with_extra_query_pretransform = query_transform.map(q -> { return (null != args) ? Arrays.stream(args) .map(o -> (null != o) && QueryComponent.class.isAssignableFrom(o.getClass()) ? q.apply((QueryComponent<T>) o) : o) .collect(Collectors.toList()).toArray() : args; }).orElse(args); final Object[] args_with_extra_query = extra_query.map(q -> { return (null != args_with_extra_query_pretransform) ? Arrays.stream(args_with_extra_query_pretransform) .map(o -> (null != o) && QueryComponent.class.isAssignableFrom(o.getClass()) ? CrudUtils.allOf((QueryComponent<T>) o, q) : o) .collect(Collectors.toList()).toArray() : args_with_extra_query_pretransform; }).orElse(args_with_extra_query_pretransform); // Special cases for: readOnlyVersion, getFilterdRepo / countObjects / getRawService / *byId final Object o = Lambdas.get(() -> { final SingleQueryComponent<T> base_query = JsonNode.class.equals(clazz) ? (SingleQueryComponent<T>) CrudUtils.allOf() : CrudUtils.allOf(clazz); try { if (extra_query.isPresent() && m.getName().equals("countObjects")) { // special case....change method and apply spec return delegate.countObjectsBySpec(extra_query.get()); } else if (extra_query.isPresent() && m.getName().equals("getObjectById")) { // convert from id to spec and append extra_query if (1 == args.length) { return delegate.getObjectBySpec(CrudUtils.allOf(extra_query.get(), base_query.when(JsonUtils._ID, args[0]))); } else { return delegate.getObjectBySpec( CrudUtils.allOf(extra_query.get(), base_query.when(JsonUtils._ID, args[0])), (List<String>) args[1], (Boolean) args[2]); } } else if (extra_query.isPresent() && m.getName().equals("deleteDatastore")) { CompletableFuture<Long> l = delegate.deleteObjectsBySpec(extra_query.get()); return l.thenApply(ll -> ll > 0); } else if (extra_query.isPresent() && m.getName().equals("deleteObjectById")) { // convert from id to spec and append extra_query return delegate.deleteObjectBySpec( CrudUtils.allOf(extra_query.get(), base_query.when(JsonUtils._ID, args[0]))); } else if (extra_query.isPresent() && m.getName().equals("updateObjectById")) { // convert from id to spec and append extra_query return delegate.updateObjectBySpec( CrudUtils.allOf(extra_query.get(), base_query.when(JsonUtils._ID, args[0])), Optional.empty(), (UpdateComponent<T>) args[1]); } else if (m.getName().equals("getRawService")) { // special case....convert the default query to JSON, if present Object o_internal = m.invoke(delegate, args_with_extra_query); Optional<QueryComponent<JsonNode>> json_extra_query = extra_query .map(qc -> qc.toJson()); return intercept(JsonNode.class, (ICrudService<JsonNode>) o_internal, json_extra_query, Optional.empty(), interceptors, default_interceptor); } else { // wrap any CrudService types Object o_internal = m.invoke(delegate, args_with_extra_query); return (null != o_internal) && ICrudService.class.isAssignableFrom(o_internal.getClass()) ? intercept(clazz, (ICrudService<T>) o_internal, extra_query, Optional.empty(), interceptors, default_interceptor) : o_internal; } } catch (IllegalAccessException ee) { throw new RuntimeException(ee); } catch (InvocationTargetException e) { throw new RuntimeException(e.getCause().getMessage(), e); } }); return interceptors .getOrDefault(m.getName(), default_interceptor.orElse(CrudServiceUtils::identityInterceptor)) .apply(o, args_with_extra_query); } }; return ICrudService.IReadOnlyCrudService.class.isAssignableFrom(delegate.getClass()) ? (ICrudService<T>) Proxy.newProxyInstance(ICrudService.IReadOnlyCrudService.class.getClassLoader(), new Class[] { ICrudService.IReadOnlyCrudService.class }, handler) : (ICrudService<T>) Proxy.newProxyInstance(ICrudService.class.getClassLoader(), new Class[] { ICrudService.class }, handler); }
From source file:sg.ncl.MainController.java
private int getAcademicUsage(Map<String, Integer> statsAcademicMap, YearMonth m_s, YearMonth m_e, ProjectDetails project) {//from w w w . j a va 2 s. c o m int nodeHours = 0; if (project.getOrganisationType().equals("Academic")) { int totalNodeHours = statsAcademicMap.getOrDefault(project.getOrganisationName(), 0); nodeHours = project.getProjectUsages().stream().filter(p -> p.hasUsageWithinPeriod(m_s, m_e)) .mapToInt(ProjectUsage::getUsage).sum(); statsAcademicMap.put(project.getOrganisationName(), totalNodeHours + nodeHours); } return nodeHours; }
From source file:sg.ncl.MainController.java
private int getCategoryUsage(Map<String, Integer> statsCategoryMap, YearMonth m_s, YearMonth m_e, ProjectDetails project) {/* w ww .java 2s.c o m*/ String key = project.getOrganisationType(); if (key.equals("Academic")) { key = project.isEducation() ? "Academia (Education)" : "Academia (R&D)"; } int totalNodeHours = statsCategoryMap.getOrDefault(key, 0); int nodeHours = project.getProjectUsages().stream().filter(p -> p.hasUsageWithinPeriod(m_s, m_e)) .mapToInt(ProjectUsage::getUsage).sum(); statsCategoryMap.put(key, totalNodeHours + nodeHours); return nodeHours; }
From source file:edu.cmu.cs.lti.discoursedb.io.mturk.converter.MturkConverter.java
License:asdf
private void convert(String directory, String datasetName) throws ParseException, IOException { // xu_end_id is string: group + _ team + _ + id // username is group:username Map<String, String> xu_id2username = new HashMap<String, String>(); Map<String, String> wen_username2groupteam = new HashMap<String, String>(); Map<String, String> wen_wenHandle2discId = new HashMap<String, String>(); Map<String, String> discId2expHandle = new HashMap<String, String>(); Map<String, Long> ddb_user_ids = new HashMap<String, Long>(); Map<String, String> discId2discHandle = new HashMap<String, String>(); Map<String, String> discHandle2discId = new HashMap<String, String>(); Map<String, String> expHandle2group = new HashMap<String, String>(); Map<String, String> expHandle2team = new HashMap<String, String>(); Map<String, String> expHandle2experiment = new HashMap<String, String>(); Boolean summerSchool = true;/*from ww w . jav a 2 s. c om*/ Pattern forum_team_user0 = Pattern.compile("(\\d\\d\\d)_(\\d+)_(\\d+)"); Matcher m11 = forum_team_user0.matcher("234pre234_2_3.csv"); m11.find(); assert m11.group(1) == "234"; System.out.println("Success!"); /* * Read xu and wen's users -> write users, * keep map xu_user_id -> username, group, team, experiment; username->xu_user_id,group, team, experiment; also username -> discoursedb_userid * write group entities for all three levels and link to users * write DPs for group, team, experiment, and link them (dprs) */ /* userid,groupid,group,newuserid,username,id,trans,bazaar,teamid,rolereasoning, * chattrans,score,tradeoff,discussiontran,totalprelen,reasoning,chatlength, * numofwords,reasoning_percentage,bazaarprompt,tranpercentage,pre_epi_rt, * pre_epi_wr,pre_tf_rt,pre_tf_wr,post_epi_rt,post_epi_wr,post_tf_rt,post_tf_wr * * 1,222_1,222,222_1,Bobs,222_1_1,1,1,mturk987641,4,10,23... */ //for (Map<String,String> row : csvIteratorExistingHeaders(directory + "/xustudy/individualdata_0622.csv")) { for (Map<String, String> row : csvIteratorExistingHeaders( directory + "/summerschool/individualuser_0710.csv")) { String group = row.get("id").split("_")[0]; String team = row.get("id").split("_")[1]; String groupteam = group + "_" + team; String xuHandle = group + ":" + row.get("username"); xu_id2username.put(row.get("id"), xuHandle); ddb_user_ids.put(xuHandle, mcs.mapUser(xuHandle, xuDiscourseName, datasetName, "individualdata_0622", "id", row.get("id"))); expHandle2group.put(xuHandle, group); expHandle2team.put(xuHandle, team); expHandle2experiment.put(xuHandle, xuDiscourseName); mcs.mapTeamAndGroup(xuDiscourseName, group, team, datasetName, "individualdata_0622", "id", row.get("id")); } /* discforum2wenstudy.csv * * wenHandle,discId,forum,discName c1:Shan,726,10,shan c1:StickyWicket,707,10,StickyWicket c1:WT89,701,10,WT89 c1:hjo,712,10,hjo */ if (!summerSchool) { for (Map<String, String> row : csvIteratorExistingHeaders(directory + "/discforum2wenstudy.csv")) { String groupteam = row.get("wenHandle").split(":")[0]; String group = wenGroup(groupteam); String team = wenTeam(groupteam); if (row.get("discId") == "") { System.out .println("Skipping user " + row.get("wenHandle") + ": discussionforum id is not known"); continue; } String wenHandle = row.get("wenHandle"); discId2expHandle.put(row.get("discId"), wenHandle); wen_wenHandle2discId.put(wenHandle, row.get("discId")); ddb_user_ids.put(wenHandle, mcs.mapUser(wenHandle, wenDiscourseName, datasetName, "discforum2wenstudy", "discId", row.get("discId"))); expHandle2group.put(wenHandle, group); expHandle2team.put(wenHandle, team); expHandle2experiment.put(wenHandle, wenDiscourseName); mcs.mapTeamAndGroup(wenDiscourseName, group, team, datasetName, "discforum2wenstudy", "id", row.get("discId")); } } /* * userid,assign_groupsize,groupid,totalpost,cothreadwithteammates,gotreplyfromteammate,replytoteammate,initialproposal,team_score,team_energy_requirement,team_energy_energy,team_additional,team_incorrect,score,energy_requirement,energy_energy,additional,incorrect,cntwords,cntchats,Experience,End_Result,Communication_Quality,Topic_Familiarity,Perceived_Learning,type,energy * * Amy,3,ff1,6,0,0,0,226,2,2,0,0,0,0,0,0,0,0,0,0,5,5,5,2,5,community-early,1 * for (Map<String,String> row : csvIteratorExistingHeaders(directory + "/wenstudy/exp1-ANOVA-peruser.csv")) { String group = row.get("groupid").substring(0, row.get("groupid").length()-1); String username = group + ":" + row.get("userid"); wen_username2groupteam.put(username, row.get("groupid")); username2group.put(username, group); String team = row.get("groupid"); username2team.put(username, team); username2experiment.put(username, wenDiscourseName); ddb_user_ids.put(username, mcs.mapUser(username, wenDiscourseName, datasetName, "exp1-ANOVA-peruser", "userid", row.get("userid"))); //, team, group, "xustudy")); System.out.println(row.get("userid") + " --> " + row.get("groupid")); mcs.mapTeamAndGroup(wenDiscourseName, group, team, datasetName, "individualdata_0622", "groupid", row.get("groupid")); }*/ /* Table<R, C, V> t2 = csvscan("wenfiles/exp1_sdmething.csv", new Array<String>(flds)); for (t in t2.rows()) { ddb_user_ids[t.get("user_name")] = mcs.addUser(t.get("userxxxxxid"), t.get("user_name"), group, team, experiment); // also fields here } //* Read users.csv -> keep discussion_id in memory; don't write * "user_uid","user_name","user_pwd","forum_uid","uuid","access_enabled","password_reset" * * "1","erose","innerpath","1","CF5725C5-B089-CC1F-509F4E3E9BE24881","1","" * "2603","Amber","Amber64","64","Amber","1","" * "173","64","64","1","64","1","" */ //for (Map<String,String> row : csvIteratorExistingHeaders(directory + "/user.csv")) { for (Map<String, String> row : csvIteratorExistingHeaders(directory + "summerschool/user0710.csv")) { discId2discHandle.put(row.get("user_uid"), row.get("forum_uid") + ":" + row.get("user_name")); discHandle2discId.put(row.get("forum_uid") + ":" + row.get("user_name"), row.get("user_uid")); } /* discussionforum.csv * post_uid,forum_uid,thread_uid,replyto_uid,user_uid,subject,content, * posted_at,uuid * * 15,1,7,0,3,"If Oil is Scarce, Why's It So Damn Cheap?","My question ...", * 4/4/15 20:20,4584DA50-EDFC-B74D-EEAAA78C8CF4F2DC */ Map<Long, Long> sourceDiscId2ddbDiscId = new HashMap<Long, Long>(); for (Map<String, String> row : csvIteratorExistingHeaders(directory + "/summerschool/forum0710.csv")) { String discHandle = discId2discHandle.getOrDefault(row.get("user_uid"), row.get("forum_uid") + ":User" + row.get("user_uid")); String expHandle = discId2expHandle.getOrDefault(row.get("user_uid"), discHandle); String thisDiscourse = expHandle2experiment.getOrDefault(expHandle, "discussionforum"); if (!ddb_user_ids.containsKey(expHandle)) { ddb_user_ids.put(expHandle, mcs.mapUser(expHandle, thisDiscourse, datasetName, "discussionforum", "post_uid(User)", row.get("post_uid"))); } Long post_uid = Long.valueOf(row.get("post_uid")); System.out.println("Mapping post " + row.get("post_uid") + " by user " + expHandle + " aka " + sourceDiscId2ddbDiscId.getOrDefault(Long.valueOf(row.get("user_uid")), 0L)); Long post_ddbid = mcs.mapDiscussionPost(row.get("subject"), row.get("content"), row.get("forum_uid"), row.get("thread_uid"), expHandle2group.get(expHandle), expHandle2team.get(expHandle), ddb_user_ids.getOrDefault(expHandle, 0L), row.get("posted_at"), Long.valueOf(row.get("replyto_uid")), thisDiscourse, datasetName, "discussionforum", "post_uid", row.get("post_uid")); sourceDiscId2ddbDiscId.put(post_uid, post_ddbid); } /* * forumid, offset, forumname * 218,10510,92722 * 222,10810,98764 * 224,11010,79865 */ Map<String, String> xu_forumname2forum = new HashMap<String, String>(); // for (Map<String,String> row : csvIteratorExistingHeaders(directory + "/xustudy/newmapping.csv")) { for (Map<String, String> row : csvIteratorExistingHeaders(directory + "/summerschool/newmapping.csv")) { xu_forumname2forum.put(row.get("forumname"), row.get("forumid")); } File[] listOfFiles = new File(directory + "/summerschool/chats/").listFiles(); // File[] listOfFiles = new File(directory + "/xustudy/chatlogs_transactivity_annotated/").listFiles(); for (File file : listOfFiles) { if (file.isFile() && file.getName().endsWith(".csv")) { //if (true) break; String n = file.getName(); String forum_id = "", team_id = ""; if (n.startsWith("mturkno")) { forum_id = n.substring(7, n.length() - 5); } else if (n.startsWith("mturk")) { forum_id = n.substring(5, n.length() - 5); } forum_id = xu_forumname2forum.getOrDefault(forum_id, "0"); team_id = n.substring(n.length() - 5, n.length() - 4); if (!forum_id.equals("0")) { int lineno = 0; if (summerSchool || n.startsWith("mturkno")) { /* ,type,username,useraddress,userid,timestamp,roomname,content,neg, * 1,presence,BazaarAgent,128.2.220.133:35582,N,6/4/16 21:24,mturkno798238,join,bazaar, */ for (Map<String, String> row : csvIteratorExistingHeaders(file.getAbsolutePath())) { if (row.get("type") == "presence") { //mcs.mapChatInteraction(row.get("timestamp") + ":00", forum_id + ":" + row.get("username"), forum_id, team_id, row.get("content"), // xuDiscourseName, datasetName, "chats/" + file.getName(), "lineno", lineno); } else if (row.get("username") != null && row.get("username").length() > 0) { mcs.mapChat(row.get("timestamp") + ":00", forum_id + ":" + row.get("username"), forum_id, team_id, row.get("content"), xuDiscourseName, datasetName, "chats/" + file.getName(), "lineno", Long.toString(lineno)); } lineno += 1; } } else { /* * 7/11/16,20:53:59,0,Andy,1.46828E+12,Hi,neg,neg,,, * 7/11/16,20:54:07,0,UKCats,1.46828E+12,Hi all,neg,neg,,, */ System.out.println("Trying to scan " + file.getAbsolutePath()); for (Map<String, String> row : csvIteratorNoHeaders(file.getAbsolutePath(), "date,time,zero,username,number,content,fld1,transactivity,fld3,fld4,fld5,ign1,ign2,ign3,ign4,ign5,ign6")) { if (row.get("username") != null && row.get("username").length() > 0) { mcs.mapChat(row.get("date") + " " + row.get("time"), forum_id + ":" + row.get("username"), forum_id, team_id, row.get("content"), xuDiscourseName, datasetName, "chats/" + file.getName(), "lineno", Long.toString(lineno)); } lineno += 1; } } } else { System.out.println("Chat session " + file.getName() + " can't be identified"); } } } /*HOW: * Read xu/userid-namemap -> to get username to userid-within-group * Read xu/newmapping -> to get forumid -> groupname * Read xu/chatlogs -> * add dp for each file, link to experiment, group, team * for each posting add user, text, date; link to dp * */ System.out.println("Doing pre/post tests"); Pattern forum_team_user = Pattern.compile("(\\d\\d\\d)_(\\d+)_(\\d+)"); Matcher m1 = forum_team_user.matcher("234pre234_2_3.csv"); m1.find(); assert m1.group(1) == "234"; //Iterator<File> it = FileUtils.iterateFiles(new File(directory + "/xustudy/preposttest"), null, true); Iterator<File> it = FileUtils.iterateFiles(new File(directory + "/summerschool/242_pretest"), null, true); while (it.hasNext()) { File test = it.next(); if (test.isFile() && test.getName().endsWith(".csv")) { System.out.println("Doing test " + test.getName()); String n = test.getName(); Matcher m = forum_team_user.matcher(n); if (m.find()) { String forum_id = m.group(1); String team_id = m.group(2); String user_id = m.group(3); String testtype = "Pretest"; if (n.contains("post")) { testtype = "Posttest"; } String content = FileUtils.readFileToString(test); content = content.substring(content.indexOf("\n")); // skip first line, which is a false csv header String xu_id = forum_id + "_" + team_id + "_" + user_id; String username = xu_id2username.get(xu_id); System.out.println("Scanning " + testtype + " " + n + " by " + username + " on team " + forum_id + "_" + team_id); mcs.mapFile(forum_id, team_id, username, testtype + " by " + username, testtype.equals("Posttest") ? ContributionTypes.POSTTEST : ContributionTypes.PRETEST, content, xuDiscourseName, datasetName, "preposttests", "for_user", xu_id); } } } System.out.println("Doing xu proposals"); //Iterable<File> it2 = () -> FileUtils.iterateFiles(new File(directory + "/xustudy/group_proposals_txt/"), null, false); Iterable<File> it2 = () -> FileUtils.iterateFiles(new File(directory + "/summerschool/proposals/"), null, false); for (File prop : it2) { //if (true) break; if (prop.isFile() && prop.getName().endsWith(".txt")) { System.out.println("Doing proposal " + prop.getName()); String n = prop.getName(); String forum_id = "", team_id = ""; forum_id = n.substring(0, n.length() - 5); team_id = n.substring(n.length() - 5, n.length() - 4); forum_id = xu_forumname2forum.getOrDefault(forum_id, "0"); System.out.println("Scanning proposal " + n + " by " + forum_id + "_" + team_id); String content = FileUtils.readFileToString(prop); mcs.mapFile(forum_id, team_id, forum_id + "_" + team_id, "Proposal by " + forum_id + "_" + team_id, ContributionTypes.PROPOSAL, content, xuDiscourseName, datasetName, "proposals", "for_team", forum_id + "_" + team_id); } } if (!summerSchool) { System.out.println("Doing wen proposals"); Iterable<File> it3 = () -> FileUtils.iterateFiles(new File(directory + "/wenstudy/proposals/"), null, false); for (File prop : it3) { //if (true) break; if (prop.isFile() && prop.getName().endsWith(".csv")) { System.out.println("Doing proposal " + prop.getName()); String n = prop.getName(); String group_id = "", team_id = ""; group_id = wenGroup(n.substring(0, n.length() - 4)); team_id = wenTeam(n.substring(0, n.length() - 4)); System.out.println("Scanning proposal " + n + " by " + group_id + "_" + team_id + ": " + prop.getAbsolutePath()); // This kludgey code handles the fact that: // * These look like csv files, but quoted strings contain unescaped quotes // * Sometimes there are multiple columns, sometimes not, but we only care about the first column // * Usually if there are extra columns, the first few are empty, so we can ignore anything after ,, // * First row is column names, but first column name is usually (not always) blank String[] content2 = FileUtils.readFileToString(prop).split("\n"); content2[0] = ""; // Skip header row String content = ""; for (String row : content2) { String keep = ""; if (row.length() >= 2 && row.startsWith("\"")) { keep += row.substring(1, row.length() - 1); } else { keep += row + "\n"; } content += keep.split(",,")[0]; // Sometimes these have multiple rows and we only care about the first column } mcs.mapFile(group_id, team_id, group_id + "_" + team_id, "Proposal by " + group_id + "_" + team_id, ContributionTypes.PROPOSAL, content, wenDiscourseName, datasetName, "proposals", "for_team", group_id + "_" + team_id); } } System.out.println("Doing wen study chats"); File[] listOfFiles2 = new File(directory + "/wenstudy/chats/").listFiles(); for (File file : listOfFiles2) { if (file.isFile() && file.getName().endsWith(".csv")) { //if (true) break; String n = file.getName(); String group_id = "", team_id = ""; group_id = wenGroup(n.substring(0, n.length() - 4)); team_id = wenTeam(n.substring(0, n.length() - 4)); int lineno = 0; /* userid,chat,, * asdf,"Plan 2 exceeds the total budget, though.",, * asdf,"Plan 1 is the only one that falls within their ""tight"" budget.",1, */ for (Map<String, String> row : csvIteratorExistingHeaders(file.getAbsolutePath())) { System.out.println(group_id + team_id + " " + row.get("userid") + " says " + row.get("chat").substring(0, java.lang.Math.min(30, row.get("chat").length()))); if (row.get("userid") != null && row.get("userid").length() > 0 && row.get("userid").length() < 50) { mcs.mapChat(null, group_id + ":" + row.get("userid"), group_id, team_id, row.get("chat"), wenDiscourseName, datasetName, "chats/" + file.getName(), "lineno", Long.toString(lineno)); } lineno += 1; } } } } /* * read xu/preposttest files -> * look up author from * create dp for pre/post+teamname, dpr link to team, group, experiment from filename * import as contribution/content; ignore first line * read xu/proposal files * Leave author blank; place directly as posting under team's dp * read wen/chats * make dp for each file: wen team ff1 chat, link to team, group, experiment * put contribution from each one. Number sequentially from 1972-01-01 incrementing by one minute each through the whole set of files. * read wen/proposals * leave author blank; place directly as posting under team's dp. title="wen team ff1 proposal" Mechanism: * thingy to read in a csv file with or without a fixed set of fields * thingy to coalesce a whole csv column into a single string with carriage returns * thingy to store a map * thingy to write elements and store the discoursedb-indexes to a map Map<String, String> roomIdNameMap = new HashMap<>(); List<String> messages = new ArrayList<>(); //Read input file and preprocess String lineFragment = null; for(String line:FileUtils.readLines(new File(messageFileDir))){ //line fragments occur in case we have line feeds in a column if(lineFragment!=null){ line=lineFragment+line; lineFragment=null; } if (line.endsWith("\\")||line.endsWith("\\\r\f")){ line = line.replaceAll("\\\r\f", ""); lineFragment = line; }else{ if (line.contains("\\\"We're Ready\\\"")) { line = line.replaceAll("\"We're Ready\\\\\"", "We're Ready\\\\"); } if (line.contains("\\\"ready\\\"")) { line = line.replaceAll("\\\\\"ready\\\\\"", "\\\\ready\\\\"); } if (line.contains("\\\""+agentname+"\\\"")){ line = line.replaceAll("\\\\\""+agentname+"\\\\\"", "\\\\"+agentname+"\\\\"); } messages.add(line); } } // Phase 1: read through input room file once and map all entities try (InputStream in = new FileInputStream(roomFileDir)) { CsvMapper mapper = new CsvMapper(); CsvSchema schema = mapper.schemaFor(Room.class).withColumnSeparator(','); MappingIterator<Room> rIter = mapper.readerFor(Room.class).with(schema).readValues(in); while (rIter.hasNextValue()) { Room r = rIter.next(); if (!roomIdNameMap.containsKey(r.getId())) roomIdNameMap.put(r.getId(), r.getName()); converterService.mapRoom(r, dataSetName, discourseName); } } catch (IOException e) { log.error("Error reading room file",e); } // Phase 2: read through input message file and map relationships between room and message CsvMapper mapper = new CsvMapper(); CsvSchema schema = mapper.schemaFor(Message.class).withColumnSeparator(','); for(String message:messages){ Message m = mapper.readerFor(Message.class).with(schema).readValue(message); if (m.getType().equals("text") || m.getType().equals("image") || m.getType().equals("private")){ converterService.mapMessage(m, dataSetName, discourseName, roomIdNameMap); }else{ converterService.mapInteraction(m, dataSetName, discourseName, roomIdNameMap); } } */ }