List of usage examples for java.util HashSet size
public int size()
From source file:amie.keys.CombinationsExplorationNew.java
/** * Construction of the maps from properties -> id and id -> properties *///from www . ja va 2 s . c om private static HashSet<HashSet<Integer>> buildDictionaries(List<List<String>> nonKeys, List<Integer> propertiesList) { int id = 0; HashSet<HashSet<Integer>> result = new HashSet<>(); for (List<String> nonKey : nonKeys) { // System.out.println("nonkey:" + nonKey); HashSet<Integer> nonKeyInt = new HashSet<>(); for (int k = 0; k < nonKey.size(); ++k) { // Ignore String property = nonKey.get(k); // boolean supportCutProperty =false; // if (getPropertySupport(property) < support) { // System.out.println("getPropertySupport(property):"+getPropertySupport(property)); // supportCutProperty = true; // System.out.println("ee"); // continue; // } //**** Debugging code ***/ /**List<String> testingProperties = Arrays.asList("db:campus", "db:mascot", "db:officialschoolcolour", "db:athletics", "db:country"); if (getPropertySupport(property) < support || !testingProperties.contains(property)) { continue; }**/ //**** Debugging code ***/ if (!property2Id.containsKey(property)) { property2Id.put(property, id); id2Property.put(id, property); // if(!supportCutProperty){ propertiesList.add(id);//} ++id; } Integer idProperty = property2Id.get(property); nonKeyInt.add(idProperty); } //System.out.println("propertyList:"+propertiesList); if (nonKeyInt.size() != 0) { result.add(nonKeyInt); // System.out.println("nonKeyInt!:" + nonKeyInt + " " + nonKeyInt.size() + "\n"); } } // System.out.println("id2Property:"+id2Property); // System.out.println("nonKeysInt:" + result.size() + "\n"); result = simplifyHashNonKeySet(result); //System.out.println("nonKeysInt:" + result.size() + "\n"); return result; }
From source file:org.apache.sysml.hops.codegen.opt.PlanSelectionFuseCostBased.java
private void createAndAddMultiAggPlans(CPlanMemoTable memo, ArrayList<Hop> roots) { //collect full aggregations as initial set of candidates HashSet<Long> fullAggs = new HashSet<>(); Hop.resetVisitStatus(roots);/*from w w w . jav a2s . c o m*/ for (Hop hop : roots) rCollectFullAggregates(hop, fullAggs); Hop.resetVisitStatus(roots); //remove operators with assigned multi-agg plans fullAggs.removeIf(p -> memo.contains(p, TemplateType.MAGG)); //check applicability for further analysis if (fullAggs.size() <= 1) return; if (LOG.isTraceEnabled()) { LOG.trace("Found across-partition ua(RC) aggregations: " + Arrays.toString(fullAggs.toArray(new Long[0]))); } //collect information for all candidates //(subsumed aggregations, and inputs to fused operators) List<AggregateInfo> aggInfos = new ArrayList<>(); for (Long hopID : fullAggs) { Hop aggHop = memo.getHopRefs().get(hopID); AggregateInfo tmp = new AggregateInfo(aggHop); for (int i = 0; i < aggHop.getInput().size(); i++) { Hop c = HopRewriteUtils.isMatrixMultiply(aggHop) && i == 0 ? aggHop.getInput().get(0).getInput().get(0) : aggHop.getInput().get(i); rExtractAggregateInfo(memo, c, tmp, TemplateType.CELL); } if (tmp._fusedInputs.isEmpty()) { if (HopRewriteUtils.isMatrixMultiply(aggHop)) { tmp.addFusedInput(aggHop.getInput().get(0).getInput().get(0).getHopID()); tmp.addFusedInput(aggHop.getInput().get(1).getHopID()); } else tmp.addFusedInput(aggHop.getInput().get(0).getHopID()); } aggInfos.add(tmp); } if (LOG.isTraceEnabled()) { LOG.trace("Extracted across-partition ua(RC) aggregation info: "); for (AggregateInfo info : aggInfos) LOG.trace(info); } //sort aggregations by num dependencies to simplify merging //clusters of aggregations with parallel dependencies aggInfos = aggInfos.stream().sorted(Comparator.comparing(a -> a._inputAggs.size())) .collect(Collectors.toList()); //greedy grouping of multi-agg candidates boolean converged = false; while (!converged) { AggregateInfo merged = null; for (int i = 0; i < aggInfos.size(); i++) { AggregateInfo current = aggInfos.get(i); for (int j = i + 1; j < aggInfos.size(); j++) { AggregateInfo that = aggInfos.get(j); if (current.isMergable(that)) { merged = current.merge(that); aggInfos.remove(j); j--; } } } converged = (merged == null); } if (LOG.isTraceEnabled()) { LOG.trace("Merged across-partition ua(RC) aggregation info: "); for (AggregateInfo info : aggInfos) LOG.trace(info); } //construct and add multiagg template plans (w/ max 3 aggregations) for (AggregateInfo info : aggInfos) { if (info._aggregates.size() <= 1) continue; Long[] aggs = info._aggregates.keySet().toArray(new Long[0]); MemoTableEntry me = new MemoTableEntry(TemplateType.MAGG, aggs[0], aggs[1], (aggs.length > 2) ? aggs[2] : -1, aggs.length); for (int i = 0; i < aggs.length; i++) { memo.add(memo.getHopRefs().get(aggs[i]), me); addBestPlan(aggs[i], me); if (LOG.isTraceEnabled()) LOG.trace("Added multiagg* plan: " + aggs[i] + " " + me); } } }
From source file:edu.nyu.vida.data_polygamy.exp.NoiseExp.java
void load2DData(String aggregateFile, String graphFile, int year) { String[] s = null;/*w w w.ja va 2 s . c om*/ IntOpenHashSet nodeSet = new IntOpenHashSet(); try { BufferedReader buf = new BufferedReader(new FileReader(aggregateFile)); s = Utilities.getLine(buf, ","); //System.out.println(s[0]); while (true) { if (s == null) { break; } String attr = Utilities.splitString(s[0], ":")[1].trim(); Attribute a = attributes.get(attr); if (a == null) { a = new Attribute(); attributes.put(attr, a); } s = Utilities.getLine(buf, ":"); int sid = Integer.parseInt(s[1].trim()); nodeSet.add(sid); s = Utilities.getLine(buf, ","); while (s != null && s.length > 0) { int month = Integer.parseInt(Utilities.splitString(s[0], ":")[1].trim()); s = Utilities.getLine(buf, ","); HashSet<SpatioTemporalVal> set = new HashSet<SpatioTemporalVal>(); while (s != null && s.length == 2) { if (month / 100 == year) { int time = Integer.parseInt(s[0]); float value = Float.parseFloat(s[1]); SpatioTemporalVal val = new SpatioTemporalVal(sid, time, value); ArrayList<Float> vals = (values.get(attr) == null) ? new ArrayList<Float>() : values.get(attr); vals.add(value); values.put(attr, vals); set.add(val); } s = Utilities.getLine(buf, ","); } if (set.size() > 0) { ArrayList<SpatioTemporalVal> monthlyArr = a.data.get(month); if (monthlyArr == null) { monthlyArr = new ArrayList<>(); a.data.put(month, monthlyArr); } monthlyArr.addAll(set); } } if (dataAttributesHashSet.contains(attr)) { attributes.put(attr, a); } s = Utilities.getLine(buf, ","); } buf.close(); for (Attribute a : attributes.values()) { for (ArrayList<SpatioTemporalVal> arr : a.data.values()) { Collections.sort(arr); } a.nodeSet = nodeSet; } } catch (Exception e) { e.printStackTrace(); } }
From source file:it.iit.genomics.cru.structures.bridges.uniprot.UniprotkbUtils.java
/** * * @param refSeqs/*from w ww .j a v a2s.c o m*/ * @return * @throws BridgesRemoteAccessException */ public MapOfMap<String, MoleculeEntry> getUniprotEntriesFromRefSeqs(Collection<String> refSeqs) throws BridgesRemoteAccessException { String tool = UNIPROT_TOOL; MapOfMap<String, MoleculeEntry> refseq2uniprots = new MapOfMap<>(refSeqs); if (refSeqs.isEmpty()) { return refseq2uniprots; } HashSet<String> refs2get = new HashSet<>(); try { for (String refseq : refSeqs) { if (cache.containsKey(refseq.toUpperCase().split("\\.")[0])) { refseq2uniprots.addAll(refseq, cache.get(refseq.toUpperCase().split("\\.")[0])); } else { refs2get.add(refseq); // if size == limit, do query if (refs2get.size() == maxQueries) { String location = UNIPROT_SERVER + tool + "/?" + "query=keyword:181+AND+organism:" + URLEncoder.encode("\"" + taxid + "\"", "UTF-8") + "+AND+(database%3A(type%3Arefseq+" + URLEncoder.encode( "" + StringUtils.join(refs2get, ") OR database:(type:refseq ") + "", "UTF-8") + "))"; Collection<MoleculeEntry> uniprotEntries = getUniprotEntriesXML(location); for (MoleculeEntry entry : uniprotEntries) { for (String xref : entry.getRefseqs()) { if (xref.endsWith(".")) { xref = xref.substring(0, xref.length() - 1); } if (refseq2uniprots.containsKey(xref.trim())) { refseq2uniprots.add(xref, entry); } else if (refseq2uniprots.containsKey(xref.split("[.]")[0])) { refseq2uniprots.add(xref.split("[.]")[0], entry); } } } refs2get.clear(); } } } if (refs2get.isEmpty()) { return refseq2uniprots; } String location = UNIPROT_SERVER + tool + "/?" + "query=keyword:181+AND+organism:" + URLEncoder.encode("\"" + taxid + "\"", "UTF-8") + "+AND+(database%3A(type%3Arefseq+" + URLEncoder.encode("" + StringUtils.join(refs2get, ") OR database:(type:refseq ") + "", "UTF-8") + "))"; Collection<MoleculeEntry> uniprotEntries = getUniprotEntriesXML(location); for (MoleculeEntry entry : uniprotEntries) { for (String xref : entry.getRefseqs()) { if (xref.endsWith(".")) { xref = xref.substring(0, xref.length() - 1); } if (refseq2uniprots.containsKey(xref.trim())) { refseq2uniprots.add(xref, entry); } else if (refseq2uniprots.containsKey(xref.split("[.]")[0])) { refseq2uniprots.add(xref.split("[.]")[0], entry); } } } } catch (UnsupportedEncodingException e) { logger.error("cannot get proteins for " + StringUtils.join(refSeqs, ", "), e); } return refseq2uniprots; }
From source file:hms.hwestra.interactionrebuttal.InteractionRebuttal.java
public void prepareDataForCelltypeSpecificEQTLMapping(DoubleMatrixDataset<String, String> rawExpressionDataset, String inexpraw, String outdirectory, Double correlationThreshold, String celltypeSpecificProbeFile, String mdsComponentFile, String cellCountFile, String gte, Integer threads) throws IOException { String rawExpressionDataFile = inexpraw; // 7. select Cell type specific probes System.out.println("Loading list of cell type specific probes from: " + celltypeSpecificProbeFile); HashSet<String> cellTypeSpecificProbeSet = new HashSet<String>(); TextFile cellSpecificProbeTF = new TextFile(celltypeSpecificProbeFile, TextFile.R); cellTypeSpecificProbeSet.addAll(cellSpecificProbeTF.readAsArrayList()); cellSpecificProbeTF.close();/* w ww .j av a 2s . c o m*/ if (cellTypeSpecificProbeSet.isEmpty()) { System.err.println("Error: " + celltypeSpecificProbeFile + " is empty!"); System.exit(-1); } else { System.out.println(cellTypeSpecificProbeSet.size() + " cell type specific probes loaded."); } // 1. load gene expression data System.out.println("Loading gene expression data."); double[][] rawExpressionData = rawExpressionDataset.getRawData(); // determine the number of cell type specific probes in this dataset int probeCounter = 0; List<String> probes = rawExpressionDataset.rowObjects; for (int i = 0; i < probes.size(); i++) { if (cellTypeSpecificProbeSet.contains(probes.get(i))) { probeCounter++; } } if (probeCounter == 0) { System.err .println("Error: none of the cell type specific probes defined in " + celltypeSpecificProbeFile + " are present in expression dataset: " + rawExpressionDataset.fileName); System.exit(-1); } else { System.out.println(probeCounter + " of the cell type specific probes are in your dataset."); } System.out.println("Now reloading the gene expression data for the samples that passed the QC."); // 6. Remove samples with r < 0.9 for PC1 // reload expression file, include only samples that pass QC... // rawExpressionDataset = new DoubleMatrixDataset<String, String>(rawExpressionDataFile); // rawExpressionData = rawExpressionDataset.getRawData(); // // quantile normalize, log2 transform again, because the number of samples might have been changed.. // QuantileNormalization.quantilenormalize(rawExpressionData); // Log2Transform.log2transform(rawExpressionData); rawExpressionData = rawExpressionDataset.rawData; // collect data for cell type specific probes double[][] probeData = new double[probeCounter][rawExpressionDataset.colObjects.size()]; probeCounter = 0; ArrayList<String> cellTypeSpecificProbeDatasetRowNames = new ArrayList<String>(); for (int i = 0; i < probes.size(); i++) { if (cellTypeSpecificProbeSet.contains(probes.get(i))) { probeData[probeCounter] = rawExpressionData[i]; cellTypeSpecificProbeDatasetRowNames.add(probes.get(i)); probeCounter++; } } // initiate cell type specific probe correlation matrix double[][] celltypeSpecificCorrelationMatrix = new double[probeCounter][probeCounter]; for (int i = 0; i < probeCounter; i++) { for (int j = i + 1; j < probeCounter; j++) { double r = Correlation.correlate(probeData[i], probeData[j]); celltypeSpecificCorrelationMatrix[i][j] = r; celltypeSpecificCorrelationMatrix[j][i] = r; } celltypeSpecificCorrelationMatrix[i][i] = 1; } // save the correlation matrix DoubleMatrixDataset<String, String> probeCorrelationMatrixOut = new DoubleMatrixDataset<String, String>(); probeCorrelationMatrixOut.colObjects = cellTypeSpecificProbeDatasetRowNames; probeCorrelationMatrixOut.rowObjects = cellTypeSpecificProbeDatasetRowNames; probeCorrelationMatrixOut.rawData = celltypeSpecificCorrelationMatrix; probeCorrelationMatrixOut.recalculateHashMaps(); // probeCorrelationMatrixOut.save(outdirectory + "CelltypeSpecificProbeCorrelationMatrix.txt.gz"); // 9. PCA over cell specific probe correlation matrix DoubleMatrixDataset<String, String> cellTypeSpecificDataset = new DoubleMatrixDataset<String, String>( probeData); cellTypeSpecificDataset.colObjects = rawExpressionDataset.colObjects; cellTypeSpecificDataset.rowObjects = cellTypeSpecificProbeDatasetRowNames; // cellTypeSpecificDataset.save(expressionOutputDirectory + "CellTypeSpecificProbeExpression.txt.gz"); cellTypeSpecificDataset.transposeDataset(); Normalizer n = new Normalizer(); // calculate first Principal Component over the cell type specific probe matrix... Pair<DoubleMatrixDataset<String, String>, DoubleMatrixDataset<String, String>> PCAResults = n.calculatePCA( cellTypeSpecificDataset, celltypeSpecificCorrelationMatrix, outdirectory + "CellTypeSpecificProbePCA", 1); // 10. PC1 scores: cell specific proxy -- write to file for future use... DoubleMatrixDataset<String, String> cellSpecificPCScores = PCAResults.getLeft(); //Ensure that the cellTypeSpecificPCScores correlate positively with the set of probes that we have used to determine this component: double[] pcScoresSamples = new double[cellSpecificPCScores.nrRows]; for (int i = 0; i < cellSpecificPCScores.nrRows; i++) { pcScoresSamples[i] = cellSpecificPCScores.rawData[i][0]; } cellTypeSpecificDataset.transposeDataset(); int nrProbesCorrelatingPositively = 0; for (int i = 0; i < cellTypeSpecificDataset.rawData.length; i++) { double corr = JSci.maths.ArrayMath.correlation(pcScoresSamples, cellTypeSpecificDataset.rawData[i]); if (corr >= 0) { nrProbesCorrelatingPositively++; } else { nrProbesCorrelatingPositively--; } } if (nrProbesCorrelatingPositively < 0) { for (int i = 0; i < cellSpecificPCScores.nrRows; i++) { cellSpecificPCScores.rawData[i][0] = -cellSpecificPCScores.rawData[i][0]; } } TextFile tfOutCellSpecific = new TextFile(outdirectory + "CellTypeProxyFile.txt", TextFile.W); tfOutCellSpecific.writeln("Sample\tCellCountProxyValue"); for (int i = 0; i < cellSpecificPCScores.nrRows; i++) { tfOutCellSpecific .writeln(cellSpecificPCScores.rowObjects.get(i) + "\t" + cellSpecificPCScores.rawData[i][0]); } tfOutCellSpecific.close(); File f = new File(outdirectory + "CellTypeSpecificProbePCA.PCAOverSamplesEigenvalues.txt.gz"); f.delete(); f = new File(outdirectory + "CellTypeSpecificProbePCA.PCAOverSamplesEigenvectors.txt.gz"); f.delete(); f = new File(outdirectory + "CellTypeSpecificProbePCA.PCAOverSamplesEigenvectorsTransposed.txt.gz"); f.delete(); f = new File(outdirectory + "CellTypeSpecificProbePCA.PCAOverSamplesPrincipalComponents.txt.gz"); f.delete(); }
From source file:org.apache.roller.weblogger.business.WeblogEntryTest.java
public void testUpdateTags() throws Exception { WeblogEntryManager mgr = WebloggerFactory.getWeblogger().getWeblogEntryManager(); // setup some test entries to use testWeblog = TestUtils.getManagedWebsite(testWeblog); testUser = TestUtils.getManagedUser(testUser); WeblogEntry entry = TestUtils.setupWeblogEntry("entry1", testWeblog.getDefaultCategory(), testWeblog, testUser);/* w ww . j a v a 2 s. c o m*/ entry.addTag("testWillStayTag"); entry.addTag("testTagWillBeRemoved"); String id = entry.getId(); mgr.saveWeblogEntry(entry); TestUtils.endSession(true); entry = mgr.getWeblogEntry(id); assertEquals(2, entry.getTags().size()); List updateTags = new ArrayList(); updateTags.add("testwillstaytag"); updateTags.add("testnewtag"); updateTags.add("testnewtag3"); entry.updateTags(updateTags); mgr.saveWeblogEntry(entry); TestUtils.endSession(true); entry = mgr.getWeblogEntry(id); HashSet tagNames = new HashSet(); for (Iterator it = entry.getTags().iterator(); it.hasNext();) { WeblogEntryTag tagData = (WeblogEntryTag) it.next(); tagNames.add(tagData.getName()); } assertEquals(3, entry.getTags().size()); assertEquals(3, tagNames.size()); assertEquals(true, tagNames.contains("testwillstaytag")); assertEquals(true, tagNames.contains("testnewtag")); assertEquals(true, tagNames.contains("testnewtag3")); // teardown our test entry TestUtils.teardownWeblogEntry(id); TestUtils.endSession(true); }
From source file:edu.nyu.vida.data_polygamy.exp.NoiseExp.java
void load1DData(String aggregatesFile, int year) { String[] s = null;// ww w.j av a2 s.c o m try { BufferedReader buf = new BufferedReader(new FileReader(aggregatesFile)); s = Utilities.getLine(buf, ","); while (true) { if (s == null) { break; } String attr = Utilities.splitString(s[0], ":")[1].trim(); //System.out.println("Attribute: " + attr); Attribute a = new Attribute(); a.nodeSet.add(0); s = Utilities.getLine(buf, ","); if (s != null && s.length > 0 && s[0].toLowerCase().startsWith("spatial")) { s = Utilities.getLine(buf, ","); } if (s == null || s.length == 0) { System.out.println("Empty: ---------------------- " + attr); } while (s != null && s.length > 0) { int month = Integer.parseInt(Utilities.splitString(s[0], ":")[1].trim()); s = Utilities.getLine(buf, ","); HashSet<SpatioTemporalVal> set = new HashSet<SpatioTemporalVal>(); while (s != null && s.length == 2) { if (month / 100 == year) { int time = Integer.parseInt(s[0]); float value = Float.parseFloat(s[1]); SpatioTemporalVal val = new SpatioTemporalVal(0, time, value); set.add(val); ArrayList<Float> vals = (values.get(attr) == null) ? new ArrayList<Float>() : values.get(attr); vals.add(value); values.put(attr, vals); set.add(val); } s = Utilities.getLine(buf, ","); } if (set.size() > 0) { ArrayList<SpatioTemporalVal> arr = new ArrayList<SpatioTemporalVal>(set); Collections.sort(arr); a.data.put(month, arr); } } if (dataAttributesHashSet.contains(attr)) { attributes.put(attr, a); } s = Utilities.getLine(buf, ","); } buf.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.isatools.isatab.export.pride.DraftPrideExporter.java
/** * walk across the experimental pipeline, passed to * {@link ExperimentalPipelineVisitor#visit(ProcessingVisitAction, uk.ac.ebi.bioinvindex.model.impl.processing.Node)}. *///from ww w .ja v a 2 s . c o m public boolean visit(GraphElement graphElement) { if (graphElement instanceof Processing<?, ?>) { Processing<?, ?> processing = (Processing<?, ?>) graphElement; for (ProtocolApplication protoApp : processing.getProtocolApplications()) { // TODO: filter on the protocol type // Processes the protocol // Set<CvParam> cvparams = new HashSet<CvParam>(); HashSet<UserParam> uparams = new HashSet<UserParam>(); Protocol protocol = protoApp.getProtocol(); if (protocol != null) { ProtocolType type = protocol.getType(); if (type != null) { ReferenceSource typeSrc = type.getSource(); if (typeSrc == null) { throw new TabMissingValueException(i18n.msg("ontoterm_without_source", type)); } uparams.add(new UserParamImpl("BioInvestigation Index Protocol Type", 0, type.getName())); cvparams.add( new CvParamImpl(type.getAcc(), type.getSource().getAcc(), type.getName(), 0, "")); } uparams.add(new UserParamImpl("BioInvestigation Index Protocol Name", 2 + uparams.size(), protocol.getName())); uparams.add(new UserParamImpl("BioInvestigation Index Protocol Description", 2 + uparams.size(), protocol.getDescription())); } else { log.trace("PRIDE exporting, no protocol defined for the processing " + processing.getAcc()); } // Adds up the parameters // for (ParameterValue param : protoApp.getParameterValues()) { Parameter ptype = param.getType(); Collection<Param> prideParams = exportPropertyValue(param, ptype.getOrder() + 3); if (ptype.getRole() == PropertyRole.FACTOR) { prideExperimentParams.addAll(prideParams); } for (Param prideParam : prideParams) { if (prideParam instanceof CvParam) { cvparams.add((CvParam) prideParam); } else if (prideParam instanceof UserParam) { uparams.add((UserParam) prideParam); } } } ProtocolStep prideProtoStep = new ProtocolStepImpl(prideProtoSteps.size(), cvparams, uparams); prideProtoSteps.add(prideProtoStep); } } else if (graphElement instanceof MaterialNode) { Material material = ((MaterialNode) graphElement).getMaterial(); // Get the factors for (CharacteristicValue characteristic : material.getCharacteristicValues()) { Characteristic ctype = characteristic.getType(); if (ctype.getRole() == PropertyRole.FACTOR) { prideExperimentParams.addAll(exportPropertyValue(characteristic, ctype.getOrder())); } } } else if (graphElement instanceof DataNode) { Data data = ((DataNode) graphElement).getData(); String dataTypeAcc = data.getType().getAcc(); if ("bii:ms_spec_raw_data".equals(dataTypeAcc)) { mzDataPath = data.getUrl(); } // Get the proteins files from the MS/SPEC run material if ("bii:ms_spec_derived_data".equals(dataTypeAcc) || "bii:ms_spec_normalized_data".equals(dataTypeAcc)) { String identificationsPath = StringUtils.trimToNull(data.getSingleAnnotationValue("proteinsFile")); if (identificationsPath != null) { this.identificationsPath = importPath + "/" + identificationsPath; } String peptidesPath = StringUtils.trimToNull(data.getSingleAnnotationValue("peptidesFile")); if (peptidesPath != null) { this.peptidesPath = importPath + "/" + peptidesPath; } String modificationsPath = StringUtils.trimToNull(data.getSingleAnnotationValue("ptmsFile")); if (modificationsPath != null) { this.modificationsPath = importPath + "/" + modificationsPath; } } } // if graphElement return true; }
From source file:amie.keys.CSAKey.java
/** * * @param ruleToExtendWith//from w w w .j a v a 2 s . c om * @param ruleToGraphFirstLevel * @param ruleToGraphLastLevel * @param kb */ private void discoverConditionalKeysPerLevel(HashMap<Rule, HashSet<String>> ruleToExtendWith, HashMap<Rule, Graph> ruleToGraphFirstLevel, HashMap<Rule, Graph> ruleToGraphLastLevel, Set<Rule> output) { //System.out.println("discoverConditionalKeysPerLevel()"); HashMap<Rule, Graph> ruleToGraphThisLevel = new HashMap<>(); for (Rule currentRule : ruleToExtendWith.keySet()) { Graph graph = ruleToGraphLastLevel.get(currentRule); //System.out.println("Current rule: " + currentRule+ " Graph:"+graph); for (String conditionProperty : ruleToExtendWith.get(currentRule)) { if (Utilities.getRelationIds(currentRule, property2Id).last() > property2Id .get(conditionProperty)) { Graph currentGraphNew = (Graph) graph.clone(); Integer propertyId = property2Id.get(conditionProperty); HashSet<Integer> propertiesSet = new HashSet<>(); propertiesSet.add(propertyId); Node node = currentGraphNew.createOrGetNode(propertiesSet); //Before it was createNode node.toExplore = false; Iterable<Rule> conditions = Utilities.getConditions(currentRule, conditionProperty, (int) support, kb); for (Rule conditionRule : conditions) { Rule complementaryRule = getComplementaryRule(conditionRule); if (!ruleToGraphFirstLevel.containsKey(complementaryRule)) { // We should never fall in this case for (Rule r : ruleToGraphFirstLevel.keySet()) { System.out.println(r.getDatalogBasicRuleString()); } System.out.println(complementaryRule.getDatalogBasicRuleString()); System.out.println(complementaryRule + " not found in the first level graph"); } Graph complementaryGraphNew = ruleToGraphFirstLevel.get(complementaryRule); //System.out.println("Complementary rule: " + complementaryRule + "\tThread " + Thread.currentThread().getId() + "\t" + complementaryGraphNew); Graph newGraphNew = (Graph) currentGraphNew.clone(); HashSet<Integer> conditionProperties = new HashSet<>(); conditionProperties.addAll(getRelations(conditionRule, property2Id)); conditionProperties.addAll(getRelations(currentRule, property2Id)); //System.out.println("currentGraph:"+currentGraphNew); //System.out.println("clone of currentGraph:"+newGraphNew); newGraphNew = mergeGraphs(newGraphNew, complementaryGraphNew, newGraphNew.topGraphNodes(), conditionProperties); //System.out.println("newMergeGraph:"+newGraphNew); discoverConditionalKeysForComplexConditions(newGraphNew, newGraphNew.topGraphNodes(), conditionRule, output); ruleToGraphThisLevel.put(conditionRule, newGraphNew); } } } } HashMap<Rule, HashSet<String>> newRuleToExtendWith = new HashMap<>(); for (Rule conRule : ruleToGraphThisLevel.keySet()) { Graph newGraphNew = ruleToGraphThisLevel.get(conRule); for (Node node : newGraphNew.topGraphNodes()) { HashSet<String> properties = new HashSet<>(); if (node.toExplore) { Iterator<Integer> it = node.set.iterator(); int prop = it.next(); String propertyStr = id2Property.get(prop); properties.add(propertyStr); } if (properties.size() != 0) { newRuleToExtendWith.put(conRule, properties); } } } if (newRuleToExtendWith.size() != 0) { discoverConditionalKeysPerLevel(newRuleToExtendWith, ruleToGraphFirstLevel, ruleToGraphThisLevel, output); } //System.out.println("discoverConditionalKeysPerLevel()"); }
From source file:it.iit.genomics.cru.structures.bridges.uniprot.UniprotkbUtils.java
/** * * @param ensemblGeneIDs// w w w . j ava 2 s . c o m * @return * @throws BridgesRemoteAccessException */ public MapOfMap<String, MoleculeEntry> getUniprotEntriesFromEnsembl(Collection<String> ensemblGeneIDs) throws BridgesRemoteAccessException { String tool = UNIPROT_TOOL; MapOfMap<String, MoleculeEntry> ensembl2uniprots = new MapOfMap<>(ensemblGeneIDs); if (ensemblGeneIDs.isEmpty()) { return ensembl2uniprots; } HashSet<String> refs2get = new HashSet<>(); try { for (String ensemblGeneID : ensemblGeneIDs) { if (cache.containsKey(ensemblGeneID.toUpperCase().split("\\.")[0])) { ensembl2uniprots.addAll(ensemblGeneID, cache.get(ensemblGeneID.toUpperCase().split("\\.")[0])); } else { refs2get.add(ensemblGeneID); // if size == limit, do query if (refs2get.size() == maxQueries) { String location = UNIPROT_SERVER + tool + "/?" + "query=keyword:181+AND+organism:" + URLEncoder.encode("\"" + taxid + "\"", "UTF-8") + "+AND+(database%3A(type%3Aensembl+" + URLEncoder.encode( "" + StringUtils.join(refs2get, ") OR database:(type:ensembl ") + "", "UTF-8") + "))"; Collection<MoleculeEntry> uniprotEntries = getUniprotEntriesXML(location); for (MoleculeEntry entry : uniprotEntries) { for (String xref : entry.getEnsemblGenes()) { if (xref.endsWith(".")) { xref = xref.substring(0, xref.length() - 1); } if (ensembl2uniprots.containsKey(xref.trim())) { ensembl2uniprots.add(xref, entry); } else if (ensembl2uniprots.containsKey(xref.split("[.]")[0])) { ensembl2uniprots.add(xref.split("[.]")[0], entry); } } } refs2get.clear(); } } } if (refs2get.isEmpty()) { return ensembl2uniprots; } String location = UNIPROT_SERVER + tool + "/?" + "query=keyword:181+AND+organism:" + URLEncoder.encode("\"" + taxid + "\"", "UTF-8") + "+AND+(database%3A(type%3Aensembl+" + URLEncoder.encode("" + StringUtils.join(refs2get, ") OR database:(type:ensembl ") + "", "UTF-8") + "))"; Collection<MoleculeEntry> uniprotEntries = getUniprotEntriesXML(location); for (MoleculeEntry entry : uniprotEntries) { for (String xref : entry.getEnsemblGenes()) { if (xref.endsWith(".")) { xref = xref.substring(0, xref.length() - 1); } if (ensembl2uniprots.containsKey(xref.trim())) { ensembl2uniprots.add(xref, entry); } else if (ensembl2uniprots.containsKey(xref.split("[.]")[0])) { ensembl2uniprots.add(xref.split("[.]")[0], entry); } } } } catch (UnsupportedEncodingException e) { logger.error("cannot get proteins for " + StringUtils.join(ensemblGeneIDs, ", "), e); } return ensembl2uniprots; }