List of usage examples for java.util HashSet contains
public boolean contains(Object o)
From source file:de.tudarmstadt.ukp.dkpro.wsd.si.dictionary.util.GoogleDictionary.java
public GoogleDictionary(String path, String neededMentionsPath) throws FileNotFoundException, IOException { HashSet<String> neededMentions = new HashSet<String>(FileUtils.readLines(new File(neededMentionsPath))); mentionMap = new HashMap<String, List<String[]>>(); targetMap = new HashMap<String, Integer>(); BufferedReader reader = new BufferedReader( new InputStreamReader(new BZip2CompressorInputStream(new FileInputStream(path)))); String line;/* w ww. jav a 2s .c om*/ String[] lineArray; String anchor; String target; String score; boolean inKB; boolean isTranslation; // Timer timer = new Timer(297073139); while ((line = reader.readLine()) != null) { lineArray = line.split("\t"); anchor = lineArray[0]; if (neededMentions.contains(anchor)) { anchor = anchor.replaceAll("\"", "").trim().replaceAll(" ", "_").toLowerCase(); score = lineArray[1].split(" ")[0].trim(); target = lineArray[1].split(" ")[1].trim(); inKB = false; isTranslation = false; for (String flag : lineArray[1].substring(lineArray[1].indexOf(target) + target.length()) .split(" ")) { if (flag.equals("KB")) { inKB = true; } if (flag.equals("x")) { isTranslation = true; } } // if(inKB && !isTranslation) if (!isTranslation) { //add target to targetMap if (targetMap.containsKey(target)) { targetMap.put(target, targetMap.get(target) + 1); } else { targetMap.put(target, 1); } //add targets to mentionMap if (!mentionMap.containsKey(anchor)) { mentionMap.put(anchor, new LinkedList<String[]>()); } mentionMap.get(anchor).add(new String[] { target, score }); } } } reader.close(); }
From source file:edu.cens.loci.classes.LociWifiFingerprint.java
public void setRepAPs(HashSet<String> repAPs) { Set<String> keys = mAPs.keySet(); Iterator<String> iter = keys.iterator(); while (iter.hasNext()) { String bssid = iter.next(); if (repAPs.contains(bssid)) mAPs.get(bssid).isRep = true; else/*from w w w .j a va 2 s . co m*/ mAPs.get(bssid).isRep = false; } }
From source file:org.eclipse.lyo.testsuite.oslcv2.AbstractCreationAndUpdateRdfTests.java
private void fillInPropertyFromValueType(Resource toCreate, Resource propertyResource, Property requestProp, int depth) throws IOException { Model requestModel = toCreate.getModel(); Model shapeModel = propertyResource.getModel(); final Property valueTypeProp = shapeModel.createProperty(OSLCConstants.VALUE_TYPE); if (propertyResource.hasProperty(valueTypeProp)) { final Property rangeProp = shapeModel.createProperty(OSLCConstants.RANGE); final Property valueShapeProp = shapeModel.createProperty(OSLCConstants.VALUE_SHAPE_PROP); HashSet<String> valueTypes = new HashSet<String>(); StmtIterator valueTypeIter = propertyResource.listProperties(valueTypeProp); while (valueTypeIter.hasNext()) { String typeUri = valueTypeIter.next().getResource().getURI(); valueTypes.add(typeUri);/*from ww w . j av a 2s.c o m*/ } /* * Look at each type. Try to fill in something reasonable. */ if (valueTypes.contains(OSLCConstants.STRING_TYPE)) { String string = generateStringValue(getMaxSize(propertyResource)); toCreate.addProperty(requestProp, string); } else if (valueTypes.contains(OSLCConstants.XML_LITERAL_TYPE)) { String string = generateStringValue(getMaxSize(propertyResource)); Literal literal = requestModel.createTypedLiteral(string, XMLLiteralType.theXMLLiteralType); toCreate.addLiteral(requestProp, literal); } else if (valueTypes.contains(OSLCConstants.BOOLEAN_TYPE)) { toCreate.addLiteral(requestProp, true); } else if (valueTypes.contains(OSLCConstants.INTEGER_TYPE)) { toCreate.addLiteral(requestProp, 1); } else if (valueTypes.contains(OSLCConstants.DOUBLE_TYPE)) { toCreate.addLiteral(requestProp, 1.0d); } else if (valueTypes.contains(OSLCConstants.FLOAT_TYPE)) { toCreate.addLiteral(requestProp, 1.0f); } else if (valueTypes.contains(OSLCConstants.DECIMAL_TYPE)) { Literal literal = requestModel.createTypedLiteral(1, OSLCConstants.DECIMAL_TYPE); toCreate.addLiteral(requestProp, literal); } else if (valueTypes.contains(OSLCConstants.DATE_TIME_TYPE)) { toCreate.addLiteral(requestProp, requestModel.createTypedLiteral(Calendar.getInstance())); } else { // It appears to be a resource. Statement valueShapeStatement = propertyResource.getProperty(valueShapeProp); if (valueShapeStatement == null) { // We have no shape, so this will likely fail. We can try, though. // Create an empty resource. Add an rdf:type if the property has a range. Resource valueResource = requestModel.createResource(); StmtIterator rangeIter = propertyResource.listProperties(rangeProp); if (rangeIter.hasNext()) { valueResource.addProperty(RDF.type, rangeIter.next().getResource()); } toCreate.addProperty(requestProp, valueResource); } else { Resource nested = createResourceFromShape(requestModel, valueShapeStatement.getResource().getURI(), depth + 1); toCreate.addProperty(requestProp, nested); } } } else { // We have no hints. Try to set a string value. This may fail. String string = generateStringValue(getMaxSize(propertyResource)); toCreate.addProperty(requestProp, string); } }
From source file:fr.ericlab.sondy.core.DataManipulation.java
public String[] getFrequentCoocurringTermsFromIndex(IndexReader r, int numTerms, String baseTerm, AppVariables appVariables) {/*from ww w . j a va 2 s. co m*/ String[] frequentTerms = new String[numTerms]; try { TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:org.broad.igv.util.stats.KMPlotFrame.java
public XYDataset updateDataset() { XYSeriesCollection dataset = new XYSeriesCollection(); final String survivalColumn = (String) survivalColumnControl.getSelectedItem(); final String censureColumn = (String) censurColumnControl.getSelectedItem(); final String groupByColumn = (String) groupByControl.getSelectedItem(); if (survivalColumn != null) { ArrayList<DataPoint> dataPoints = new ArrayList(tracks.size()); HashSet<String> participants = new HashSet(); for (Track t : tracks) { try { // Get the participant (sample) attribute value for this track //final Object selectedItem = sampleColumnControl.getSelectedItem(); //if (selectedItem != null) { String participant = t.getSample(); // t.getAttributeValue(selectedItem.toString()); if (!participants.contains(participant)) { // Don't add same participant twice. participants.add(participant); // Get the survival time. String survivalString = t.getAttributeValue(survivalColumn); int survivalDays = Integer.parseInt(survivalString); int survival = survivalDays; // Is the patient censured at the end of the survival period? String censureString = censureColumn == null ? null : t.getAttributeValue(censureColumn); boolean censured = censureString != null && censureString.equals("1"); String group = groupByColumn == null ? null : t.getAttributeValue(groupByColumn); if (group == null) group = "<No value>"; dataPoints.add(new DataPoint(participant, survival, censured, group)); } else { // TODO -- check consistency of participant data }//from w w w. j a v a 2s.co m // } } catch (NumberFormatException e) { // Just skip } } // Segregate by group Map<String, ArrayList<DataPoint>> map = new HashMap(); for (DataPoint dp : dataPoints) { String g = dp.getGroup(); ArrayList<DataPoint> pts = map.get(g); if (pts == null) { pts = new ArrayList(); map.put(g, pts); } pts.add(dp); } //XYSeries series1; for (Map.Entry<String, ArrayList<DataPoint>> entry : map.entrySet()) { java.util.List<DataPoint> pts = entry.getValue(); Collections.sort(pts); int[] time = new int[pts.size()]; boolean[] censured = new boolean[pts.size()]; for (int i = 0; i < pts.size(); i++) { //int months = Math.max(1, pts.get(i).time / 30); // <= TODO -- HARDCODED MONTH DATE time[i] = pts.get(i).time; censured[i] = pts.get(i).censured; } java.util.List<KaplanMeierEstimator.Interval> controlIntervals = KaplanMeierEstimator.compute(time, censured); XYSeries series1 = new XYSeries(entry.getKey()); for (KaplanMeierEstimator.Interval interval : controlIntervals) { series1.add(interval.getEnd(), interval.getCumulativeSurvival()); } dataset.addSeries(series1); } } return dataset; }
From source file:com.nextdoor.bender.ipc.es.ElasticSearchTransport.java
@Override public void checkResponse(HttpResponse resp, String responseString) throws TransportException { /*//from w ww. j a v a 2 s. com * Check responses status code of the overall bulk call. The call can succeed but have * individual failures which are checked later. */ if (resp.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { throw new TransportException("es call failed because " + resp.getStatusLine().getReasonPhrase()); } /* * Short circuit deserializing the response by just looking if there are any errors */ if (responseString.contains("\"errors\":false")) { return; } /* * Convert response text to a POJO. Only tested with ES 2.4.x and 5.x. */ Gson gson = new GsonBuilder().create(); EsResponse esResp = null; try { esResp = gson.fromJson(responseString, EsResponse.class); } catch (JsonSyntaxException e) { throw new TransportException("es call failed because " + resp.getStatusLine().getReasonPhrase(), e); } /* * Look for the errors per index request */ int failures = 0; if (esResp.items == null) { throw new TransportException("es call failed because " + resp.getStatusLine().getReasonPhrase()); } HashSet<String> errorTypes = new HashSet<String>(); for (Item item : esResp.items) { Index index = item.index; if (index == null || index.error == null || index.error.reason == null) { continue; } /* * For now just allow 200's and 400's. Both are considered non-fatal errors from the lambda's * perspective. */ switch (index.status) { case HttpStatus.SC_OK: case HttpStatus.SC_BAD_REQUEST: continue; default: failures++; if (index.error != null && index.error.reason != null && index.error.type != null) { if (!errorTypes.contains(index.error.type)) { logger.error("Indexing Error Reason: " + index.error.reason); if (index.error.caused_by != null) { logger.error("Indexing Error Cause: " + index.error.caused_by.reason); } errorTypes.add(index.error.type); } } } } errorTypes.clear(); if (failures != 0) { throw new TransportException("es index failure count is " + failures); } }
From source file:fr.ericlab.sondy.core.DataManipulation.java
public String[] getFrequentCoocurringTerms(String document, int numTerms, String baseTerm, AppVariables appVariables) {/*from ww w. j a v a 2 s .c o m*/ String[] frequentTerms = new String[numTerms]; try { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:edu.ku.brc.specify.conversion.MSULichensFixer.java
private void convertTaxonRecords() { IdMapperMgr.getInstance().setDBs(oldDBConn, newDBConn); txMapper = IdMapperMgr.getInstance().addTableMapper("taxonname", "TaxonNameID", false); txTypMapper = IdMapperMgr.getInstance().addTableMapper("TaxonomyType", "TaxonomyTypeID", false); txUnitTypMapper = IdMapperMgr.getInstance().addTableMapper("TaxonomicUnitType", "TaxonomicUnitTypeID", false);// ww w . j a v a2s. com mappers = new IdMapperIFace[] { txMapper, txMapper, txTypMapper, txMapper, txUnitTypMapper }; newToOldColMap.put("TaxonID", "TaxonNameID"); newToOldColMap.put("ParentID", "ParentTaxonNameID"); newToOldColMap.put("TaxonTreeDefID", "TaxonomyTypeID"); newToOldColMap.put("TaxonTreeDefItemID", "TaxonomicUnitTypeID"); newToOldColMap.put("Name", "TaxonName"); newToOldColMap.put("FullName", "FullTaxonName"); newToOldColMap.put("IsAccepted", "Accepted"); oldToNewColMap.put("TaxonNameID", "TaxonID"); oldToNewColMap.put("ParentTaxonNameID", "ParentID"); oldToNewColMap.put("TaxonomyTypeID", "TaxonTreeDefID"); oldToNewColMap.put("TaxonomicUnitTypeID", "TaxonTreeDefItemID"); oldToNewColMap.put("TaxonName", "Name"); oldToNewColMap.put("FullTaxonName", "FullName"); oldToNewColMap.put("Accepted", "IsAccepted"); BasicSQLUtils.setDBConnection(newDBConn); StringBuilder newSB = new StringBuilder(); StringBuilder vl = new StringBuilder(); for (int i = 0; i < cols.length; i++) { fieldToColHash.put(cols[i], i + 1); colToFieldHash.put(i + 1, cols[i]); if (newSB.length() > 0) newSB.append(", "); newSB.append(cols[i]); if (vl.length() > 0) vl.append(','); vl.append('?'); } StringBuilder oldSB = new StringBuilder(); for (int i = 0; i < oldCols.length; i++) { oldFieldToColHash.put(oldCols[i], i + 1); if (oldSB.length() > 0) oldSB.append(", "); oldSB.append("ttx."); oldSB.append(oldCols[i]); } rankIdOldDBInx = oldFieldToColHash.get("RankID"); String sqlStr = String.format("SELECT %s FROM taxon ", newSB.toString()); log.debug(sqlStr); String fromClause = " FROM taxonname ttx LEFT JOIN msu_lichens.taxonname_TaxonNameID ON OldID = ttx.TaxonNameID LEFT JOIN msu_lichens_6.taxon AS ntx ON NewID = ntx.TaxonID WHERE ntx.TaxonID IS NULL"; String sql = String.format("SELECT %s %s", oldSB.toString(), fromClause); log.debug(sql); String cntSQL = String.format("SELECT COUNT(*) %s", fromClause); log.debug(cntSQL); int txCnt = BasicSQLUtils.getCountAsInt(oldDBConn, cntSQL); if (frame != null) { frame.setProcess(0, txCnt); } log.debug(txCnt); String pStr = String.format("INSERT INTO taxon (%s) VALUES (%s)", newSB.toString(), vl.toString()); log.debug(pStr); try { stmtTx = newDBConn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); ResultSet rs1 = stmtTx.executeQuery(sqlStr); ResultSetMetaData rsmd1 = rs1.getMetaData(); colTypes = new int[rsmd1.getColumnCount()]; colSizes = new int[rsmd1.getColumnCount()]; for (int i = 0; i < colTypes.length; i++) { colTypes[i] = rsmd1.getColumnType(i + 1); colSizes[i] = rsmd1.getPrecision(i + 1); } rs1.close(); stmtTx.close(); missingParentTaxonCount = 0; lastEditedByInx = oldFieldToColHash.get("LastEditedBy"); modifiedByAgentInx = fieldToColHash.get("ModifiedByAgentID"); stmtTx = oldDBConn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); pStmtTx = newDBConn.prepareStatement(pStr); int cnt = 0; ResultSet rs = stmtTx.executeQuery(sql); ResultSetMetaData rsmd = rs.getMetaData(); while (rs.next()) { processRow(rs, rsmd, null); cnt++; if (cnt % 1000 == 0) { log.debug(cnt); if (frame != null) { frame.setProcess(cnt); } } } rs.close(); if (frame != null) { frame.setProcess(txCnt, txCnt); } String msg = String.format("Stranded Taxon (no parent): %d", missingParentTaxonCount); tblWriter.log(msg); log.debug(msg); if (missingParentTaxonCount > 0) { if (frame != null) frame.setDesc("Renumbering the tree nodes, this may take a while..."); HashSet<Integer> ttdHash = new HashSet<Integer>(); for (CollectionInfo colInfo : CollectionInfo.getFilteredCollectionInfoList()) { if (!ttdHash.contains(colInfo.getTaxonTreeDef().getId())) { DataProviderSessionIFace session = null; try { session = DataProviderFactory.getInstance().createSession(); TaxonTreeDef taxonTreeDef = colInfo.getTaxonTreeDef(); taxonTreeDef = (TaxonTreeDef) session .getData("FROM TaxonTreeDef WHERE id = " + taxonTreeDef.getId()); sql = "SELECT TaxonID FROM taxon WHERE RankID = 0 AND TaxonTreeDefID = " + taxonTreeDef.getId(); log.debug(sql); Integer txRootId = BasicSQLUtils.getCount(sql); Taxon txRoot = (Taxon) session.getData("FROM Taxon WHERE id = " + txRootId); NodeNumberer<Taxon, TaxonTreeDef, TaxonTreeDefItem> nodeNumberer = new NodeNumberer<Taxon, TaxonTreeDef, TaxonTreeDefItem>( txRoot.getDefinition()); nodeNumberer.doInBackground(); } catch (Exception ex) { //session.rollback(); ex.printStackTrace(); } finally { if (session != null) { session.close(); } } ttdHash.add(colInfo.getTaxonTreeDef().getId()); } } if (frame != null) frame.setDesc("Renumbering done."); } missingParentTaxonCount = 0; } catch (SQLException ex) { ex.printStackTrace(); } finally { try { stmtTx.close(); pStmtTx.close(); } catch (Exception ex) { } } System.out.println("Done."); }
From source file:biomine.bmvis2.Vis.java
public void setSelectedNodes(final String[] selectedIds) { Logging.info("js", "setSelectedNodes called with params: " + selectedIds); SwingUtilities.invokeLater(new Runnable() { public void run() { GraphTab tab = (GraphTab) tabs.getSelectedComponent(); Collection<VisualNode> nodes = tab.getVisualGraph().getNodes(); HashSet<String> names = new HashSet<String>(Arrays.asList(selectedIds)); tab.getVisualGraph().clearSelected(); for (VisualNode node : nodes) { String id = node.getBMNode().getType() + "_" + node.getBMNode().getId(); if (names.contains(id)) node.setSelected(true); }//from w w w . ja va 2 s .com tab.getVisualGraph().selectionChanged(); } }); }
From source file:fr.ericlab.sondy.core.DataManipulation.java
public String[] getFrequentCoocurringTermsFromFile(int numTerms, String baseTerm, AppVariables appVariables) { String[] frequentTerms = new String[numTerms]; try {// w w w .j a v a 2 s . co m BufferedReader input = new BufferedReader(new FileReader("tmp.msg")); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); String line = ""; String document = ""; int count = 0; while ((line = input.readLine()) != null) { count++; document += line; if (count == 2000) { Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); count = 0; document = ""; } } Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); input.close(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }