List of usage examples for java.util HashSet size
public int size()
From source file:expansionBlocks.ProcessCommunities.java
public static Pair<Map<Entity, Double>, Map<Entity, Double>> execute(Configuration configuration, Query query) throws Exception { Map<Set<Long>, Map<Entity, Double>> mapPathCommunities = query.getCommunities(); HashSet<Map<Entity, Double>> initialCommunities = new HashSet<>(mapPathCommunities.values()); Set<Map<Entity, Double>> scaledCommunities = new HashSet<>(); AbstractCommunityScalator as = configuration.getAbstractCommunityScalator(); for (Map<Entity, Double> community : initialCommunities) { Map<Entity, Double> scaledCommunity = as.scaledEmphasisArticlesInCommunity(configuration, query, community);//from ww w. j av a 2s. c om scaledCommunities.add(scaledCommunity); } Set<Map<Entity, Double>> communitiesFusioned = getCommunitiesFromCommunitiesBasedOnSimilarity( scaledCommunities, configuration.getFusionThreshold()); if (configuration.DEBUG_INFO) { println("Fusion communities based on similarity communities: "); for (Map<Entity, Double> community : communitiesFusioned) { println(community); } } println(initialCommunities.size() + " communities have been fusioned into " + communitiesFusioned.size()); println("[[WARNING]] - Select best community algorithm seems to differ from select best path. You may want to double ckeck it."); Set<Map<Entity, Double>> selectBestCommunities = selectBestCommunities(configuration, communitiesFusioned, query.getTokenNames()); if (configuration.DEBUG_INFO) { println("Selected best communities: "); for (Map<Entity, Double> community : selectBestCommunities) { println(StringUtilsQueryExpansion.MapDoubleValueToString(community)); } } Map<Entity, Double> result = agregateCommunities(selectBestCommunities); if (configuration.DEBUG_INFO) { println("Agragated community(size: " + result.size() + "): "); println(StringUtilsQueryExpansion.MapDoubleValueToString(result)); } Set<Entity> entitiesToRemove = new HashSet<>(); /*for (Map.Entry<Entity, Double> e : result.entrySet()) { Set<Category> categories = e.getKey().getCategories(); println("Categories of \"" + e.getKey() + "\": " + categories); if (categories.isEmpty()) entitiesToRemove.add(e.getKey()); }*/ entitiesToRemove.addAll(removableAccordingToCategories(result)); Map<Entity, Double> filteredCommunity = new HashMap<>(result); for (Entity e : entitiesToRemove) { filteredCommunity.remove(e); } println("Based on category analisy I would suggest to remove: " + entitiesToRemove); println("New Community in case of category based filtering" + StringUtilsQueryExpansion.MapDoubleValueToString(filteredCommunity)); query.setCommunityAfterRemoval(filteredCommunity); query.setCommunity(result); return new Pair<>(result, filteredCommunity); }
From source file:com.ibm.bi.dml.runtime.matrix.CMCOVMR.java
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String cmNcomInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception { JobConf job = new JobConf(CMCOVMR.class); job.setJobName("CM-COV-MR"); //whether use block representation or cell representation MRJobConfiguration.setMatrixValueClassForCM_N_COM(job, true); //added for handling recordreader instruction String[] realinputs = inputs; InputInfo[] realinputInfos = inputInfos; long[] realrlens = rlens; long[] realclens = clens; int[] realbrlens = brlens; int[] realbclens = bclens; byte[] realIndexes = new byte[inputs.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;// w ww . j a v a 2s.c o m //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, realinputs, realinputInfos, realbrlens, realbclens, true, ConvertTarget.WEIGHTEDCELL); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, realrlens, realclens); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, realbrlens, realbclens); //set up unary instructions that will perform in the mapper MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setCM_N_COMInstructions(job, cmNcomInstructions); //set up the replication factor for the results job.setInt("dfs.replication", replication); //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, null, cmNcomInstructions, resultIndexes); //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, new byte[resultIndexes.length], outputs, outputInfos, false); // configure mapper and the mapper output key value pairs job.setMapperClass(CMCOVMRMapper.class); job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class); job.setMapOutputValueClass(CM_N_COVCell.class); job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class); job.setPartitionerClass(TaggedFirstSecondIndexes.TagPartitioner.class); //configure reducer job.setReducerClass(CMCOVMRReducer.class); //job.setReducerClass(PassThroughReducer.class); MatrixCharacteristics[] stats = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, null, null, cmNcomInstructions, resultIndexes, mapoutputIndexes, false).stats; //set up the number of reducers MRJobConfiguration.setNumReducers(job, mapoutputIndexes.size(), numReducers);//each output tag is a group // Print the complete instruction if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats); // By default, the job executes in "cluster" mode. // Determine if we can optimize and run it in "local" mode. MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length]; for (int i = 0; i < inputs.length; i++) { inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]); } //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); RunningJob runjob = JobClient.runJob(job); return new JobReturn(stats, outputInfos, runjob.isSuccessful()); }
From source file:gov.nih.nci.firebird.data.AnnualRegistrationPersistenceTest.java
@Test public void testNotificationEmailAddresses() { AnnualRegistration registration = createNewRegistration(); String email1 = ValueGenerator.getUniqueEmailAddress(); String email2 = ValueGenerator.getUniqueEmailAddress(); String email3 = ValueGenerator.getUniqueEmailAddress(); HashSet<String> emailsAsSet = Sets.newHashSet(email1, email2, email3); registration.getNotificationEmailAddresses().addAll(emailsAsSet); saveAndFlush(registration);/*from w w w . ja va 2 s .c o m*/ AnnualRegistration retrievedRegistration = reloadObject(registration); assertEquals(emailsAsSet.size(), retrievedRegistration.getNotificationEmailAddresses().size()); assertTrue(retrievedRegistration.getNotificationEmailAddresses().containsAll(emailsAsSet)); }
From source file:opennlp.tools.parse_thicket.opinion_processor.StopList.java
public String getRandomFirstName() { HashSet<String> firstNames = m_stopHash.get("FIRST_NAMES"); int indexRand = (int) (Math.random() * new Float(firstNames.size())); Iterator iter = firstNames.iterator(); for (int i = 0; i < indexRand; i++) { iter.next();//from ww w . j a va 2 s .com } return ((String) iter.next()).toLowerCase(); }
From source file:org.cloudgraph.hbase.mutation.Update.java
@Override public void collect(DataGraph dataGraph, PlasmaDataObject dataObject, DistributedWriter graphWriter, TableWriter tableWriter, RowWriter rowWriter) throws IllegalAccessException, IOException { PlasmaType type = (PlasmaType) dataObject.getType(); CoreNode dataNode = (CoreNode) dataObject; // FIXME: get rid of cast - define instance properties in 'base type' Timestamp snapshotDate = (Timestamp) dataNode.getValue(CoreConstants.PROPERTY_NAME_SNAPSHOT_TIMESTAMP); if (snapshotDate == null) throw new RequiredPropertyException( "instance property '" + CoreConstants.PROPERTY_NAME_SNAPSHOT_TIMESTAMP + "' is required to update data object, " + dataObject); if (log.isDebugEnabled()) log.debug(dataObject + " timestamp: " + String.valueOf(snapshotDate)); Long sequence = (Long) dataNode.getValue(CloudGraphConstants.SEQUENCE); if (sequence == null) throw new RequiredPropertyException("instance property '" + CloudGraphConstants.SEQUENCE + "' is required to update data object, " + dataObject); if (log.isDebugEnabled()) log.debug(dataObject + " (seq: " + sequence + ")"); List<Setting> settingList = dataGraph.getChangeSummary().getOldValues(dataObject); HashSet<PlasmaProperty> properties = this.collectProperties(settingList); Iterator<PlasmaProperty> iter = properties.iterator(); while (iter.hasNext()) { PlasmaProperty property = iter.next(); if (property.getConcurrent() != null) return; // processed above if (property.isReadOnly()) throw new IllegalAccessException("attempt to modify read-only property, " + property); Object dataValue = dataObject.get(property); if (dataValue != null) if (log.isDebugEnabled()) log.debug("updating " + property.toString()); else if (log.isDebugEnabled()) log.debug("removing " + property.toString()); if (!property.getType().isDataType()) { SettingCollector<PlasmaDataObject> settingCollector = new SettingCollector<>(); HashSet<PlasmaDataObject> oldSettings = settingCollector.collect(property, settingList); HashSet<PlasmaDataObject> oldValues = new HashSet<>(oldSettings.size()); for (PlasmaDataObject oldSettingObject : oldSettings) { if (!oldSettingObject.getDataGraph().getChangeSummary().isCreated(oldSettingObject)) oldValues.add(oldSettingObject); }//from w w w.j a v a2 s . co m EdgeWriter edgeWriter = rowWriter.getEdgeWriter(dataObject, property, sequence); if (!property.isMany()) { this.collectSingular(edgeWriter, dataObject, oldValues, property, dataValue); } else { this.collectMulti(edgeWriter, dataObject, oldValues, property, dataValue); } edgeWriter.write(); } else { Increment increment = property.getIncrement(); if (dataValue != null) { if (increment == null) { byte[] valueBytes = HBaseDataConverter.INSTANCE.toBytes(property, dataValue); rowWriter.writeRowData(dataObject, sequence, property, valueBytes); } else { // increment if (type.isConcurrent()) throw new GraphServiceException( "increment property, " + property + ", found on concurrent type, " + type + " - increment properties cannot coexist within a concurrent type"); DataType dataType = DataType.valueOf(property.getType().getName()); if (increment != null) { // user can increment/decrement by whatever // value if (dataType.ordinal() != DataType.Long.ordinal()) throw new GraphServiceException("property, " + property + ", must be datatype " + DataType.Long + " to support increment operations"); long longDataValue = DataConverter.INSTANCE.toLong(property.getType(), dataValue); rowWriter.incrementRowData(dataObject, sequence, property, longDataValue); } } } else { rowWriter.deleteRowData(dataObject, sequence, property); } } } }
From source file:org.metaservice.core.AbstractDispatcher.java
public static List<Statement> getGeneratedStatements(RepositoryConnection resultConnection, Set<Statement> loadedStatements) throws RepositoryException { RepositoryResult<Statement> all = resultConnection.getStatements(null, null, null, true); ArrayList<Statement> allList = new ArrayList<>(); HashSet<Resource> undefined = new HashSet<>(); while (all.hasNext()) { Statement s = all.next(); if (!loadedStatements.contains(s)) { if (s.getPredicate().equals(RDFS.SUBPROPERTYOF) || s.getPredicate().equals(RDFS.SUBCLASSOF) || s.getPredicate().equals(RDF.TYPE) && s.getObject().equals(RDFS.RESOURCE) || s.getPredicate().equals(RDF.TYPE) && s.getObject().equals(OWL.THING) || s.getPredicate().equals(RDF.TYPE) && s.getObject().equals(RDF.PROPERTY)) { if (!s.getSubject().stringValue().startsWith("http://metaservice.org/d/")) { LOGGER.debug("UNDEFINED {} {} {}", s.getSubject(), s.getPredicate(), s.getObject()); undefined.add(s.getSubject()); }// ww w . jav a 2 s. c o m } else { if (s.getSubject() instanceof BNode || s.getObject() instanceof BNode) { LOGGER.error("ATTENTION - BNodes are not supported by Metaservice, skipping statement"); continue; } allList.add(s); } } } if (undefined.size() != 0) { //grep for logfiles: //grep "not defi" *| sed -e 's/^.*WARN.*define://' | uniq | sort | uniq | sed -e 's/,/\n/g' | tr -d ' ' | sort | uniq LOGGER.warn("Did not define: {}", StringUtils.join(undefined, ", ")); } return allList; }
From source file:org.apache.sysml.runtime.matrix.CMCOVMR.java
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String cmNcomInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception { JobConf job = new JobConf(CMCOVMR.class); job.setJobName("CM-COV-MR"); //whether use block representation or cell representation MRJobConfiguration.setMatrixValueClassForCM_N_COM(job, true); //added for handling recordreader instruction String[] realinputs = inputs; InputInfo[] realinputInfos = inputInfos; long[] realrlens = rlens; long[] realclens = clens; int[] realbrlens = brlens; int[] realbclens = bclens; byte[] realIndexes = new byte[inputs.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;/*from ww w . j a v a2 s. c o m*/ //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, realinputs, realinputInfos, realbrlens, realbclens, true, ConvertTarget.WEIGHTEDCELL); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, realrlens, realclens); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, realbrlens, realbclens); //set up unary instructions that will perform in the mapper MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setCM_N_COMInstructions(job, cmNcomInstructions); //set up the replication factor for the results job.setInt(MRConfigurationNames.DFS_REPLICATION, replication); //set up custom map/reduce configurations DMLConfig config = ConfigurationManager.getDMLConfig(); MRJobConfiguration.setupCustomMRConfigurations(job, config); //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, null, cmNcomInstructions, resultIndexes); //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, new byte[resultIndexes.length], outputs, outputInfos, false); // configure mapper and the mapper output key value pairs job.setMapperClass(CMCOVMRMapper.class); job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class); job.setMapOutputValueClass(CM_N_COVCell.class); job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class); job.setPartitionerClass(TaggedFirstSecondIndexes.TagPartitioner.class); //configure reducer job.setReducerClass(CMCOVMRReducer.class); //job.setReducerClass(PassThroughReducer.class); MatrixCharacteristics[] stats = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, null, null, cmNcomInstructions, resultIndexes, mapoutputIndexes, false).stats; //set up the number of reducers MRJobConfiguration.setNumReducers(job, mapoutputIndexes.size(), numReducers);//each output tag is a group // Print the complete instruction if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats); // By default, the job executes in "cluster" mode. // Determine if we can optimize and run it in "local" mode. MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length]; for (int i = 0; i < inputs.length; i++) { inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]); } //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); RunningJob runjob = JobClient.runJob(job); return new JobReturn(stats, outputInfos, runjob.isSuccessful()); }
From source file:com.cloudera.recordservice.tests.MiniClusterController.java
/** * This method checks the current state of the MiniClusterController object * against the actual state of the system. Returns false if some running * cluster nodes are not tracked by this MiniClusterController, or if some * nodes tracked by this MiniClusterController are not running. Returns true * otherwise.//from w w w. j av a 2s. c om */ public boolean isClusterStateCorrect() { HashSet<Integer> pidSet = getRunningMiniNodePids(); // Check the cluster list if (pidSet.size() > 0 && (clusterList_ == null || clusterList_.size() <= 0)) { printPids(pidSet, "were found but are not being tracked by the MiniClusterController"); return false; } else { for (MiniClusterNode node : clusterList_) { if (!pidSet.contains(node.pid_)) { System.err.println("Node with pid = " + node.pid_ + " was expected but not found"); return false; } // Two nodes cannot share the same process ID pidSet.remove(node.pid_); } if (pidSet.size() > 0) { printPids(pidSet, "were found but are not being tracked by the MiniClusterController"); return false; } } return true; }
From source file:org.apache.fluo.recipes.core.export.it.ExportTestBase.java
protected void diff(Map<String, Set<String>> fr, Map<String, Set<String>> er) { HashSet<String> allKeys = new HashSet<>(fr.keySet()); allKeys.addAll(er.keySet());//from w w w . j a v a2s .c om for (String k : allKeys) { Set<String> s1 = fr.getOrDefault(k, Collections.emptySet()); Set<String> s2 = er.getOrDefault(k, Collections.emptySet()); HashSet<String> sub1 = new HashSet<>(s1); sub1.removeAll(s2); HashSet<String> sub2 = new HashSet<>(s2); sub2.removeAll(s1); if (sub1.size() > 0 || sub2.size() > 0) { System.out.println(k + " " + sub1 + " " + sub2); } } }
From source file:br.fapesp.myutils.MyUtils.java
public static int[] createArrayFromHashSet(HashSet<Integer> hash) { int[] ar = new int[hash.size()]; int i = 0;/*from www .j a v a 2 s .c o m*/ for (Integer val : hash) { ar[i++] = val; } return ar; }