List of usage examples for java.util HashMap replace
@Override
public V replace(K key, V value)
From source file:SeedGenerator.MainForm.java
private void jButton8ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton8ActionPerformed try {/*from w w w. jav a2s .co m*/ PreparedStatement pstmtendpoints = con.prepareStatement( "select endpointid, word as w from crawler.recommender_class_label_hypernym group by endpointid,w\n" + "order by endpointid;"); ResultSet rs = pstmtendpoints.executeQuery(); HashMap<String, Integer> tfCount = new HashMap(); HashMap<String, Integer> idfCount = new HashMap(); // while (rs.next()) { // int endpointid = rs.getInt("endpointid"); // // String word = rs.getString("w").toLowerCase(); // // if (idfCount.containsKey(word)) { // idfCount.replace(word, idfCount.get(word) + 1); // } else { // idfCount.put(word, 1); // } // } // rs.close(); PreparedStatement pstmtendpoints2 = con.prepareStatement( "select endpointid, word as w,local_class_name as l from crawler.recommender_class_label_hypernym group by endpointid,w,l order by endpointid,l;"); rs = pstmtendpoints2.executeQuery(); String className = ""; while (rs.next()) { int endpointid = rs.getInt("endpointid"); String curclassName = rs.getString("l"); String word = rs.getString("w").toLowerCase(); String idword = String.valueOf(endpointid) + "-" + word; if (tfCount.containsKey(idword)) { if (!curclassName.equals(className)) { tfCount.replace(idword, tfCount.get(idword) + 1); className = curclassName; } } else { tfCount.put(idword, 1); } if (idfCount.containsKey(word)) { idfCount.replace(word, idfCount.get(word) + 1); } else { idfCount.put(word, 1); } } Iterator ittf = tfCount.entrySet().iterator(); Iterator itidf = idfCount.entrySet().iterator(); try { PreparedStatement updatestmt = con.prepareStatement( "update recommender_class_label_hypernym set tf=? where endpointid=? and word=?"); int count = 0; while (ittf.hasNext()) { Map.Entry pair = (Map.Entry) ittf.next(); //<> if (Integer.parseInt(pair.getValue().toString()) > 0) { updatestmt.setInt(1, Integer.parseInt(pair.getValue().toString())); updatestmt.setInt(2, Integer.parseInt(pair.getKey().toString().split("-")[0])); updatestmt.setString(3, pair.getKey().toString().split("-")[1]); updatestmt.addBatch(); count++; } } updatestmt.executeBatch(); } catch (Exception ex) { System.out.println("tf update edilemedi"); } try { PreparedStatement updatestmt = con .prepareStatement("update recommender_class_label_hypernym set idf=? where word=?"); while (itidf.hasNext()) { Map.Entry pair = (Map.Entry) itidf.next(); //<> if (Integer.parseInt(pair.getValue().toString()) > 0) { updatestmt.setInt(1, Integer.parseInt(pair.getValue().toString())); updatestmt.setString(2, pair.getKey().toString()); updatestmt.addBatch(); } } updatestmt.executeBatch(); } catch (Exception ex) { System.out.println("tf update edilemedi"); } } catch (Exception e) { } // TODO add your handling code here: }
From source file:SeedGenerator.MainForm.java
private void wordNetTFIDF(String objectName, String semanticType) { try {/* w w w . ja v a2 s .com*/ PreparedStatement pstmtReset = con.prepareStatement( "UPDATE crawler.recommender_" + objectName + "_" + semanticType + " SET `processed` = null;"); pstmtReset.execute(); PreparedStatement pstmtDrop = con.prepareStatement( " DROP TABLE IF EXISTS recommender_" + objectName + "_" + semanticType + "_stf_sidf;"); pstmtDrop.execute(); PreparedStatement pstmt = con.prepareStatement("CREATE TABLE `recommender_" + objectName + "_" + semanticType + "_stf_sidf` (\n" + " `id` int(11) NOT NULL AUTO_INCREMENT,\n" + " `termid` int(11) DEFAULT NULL,\n" + " `endpointid` int(11) DEFAULT NULL,\n" + " `stf` int(11) DEFAULT NULL,\n" + " `sidf` int(11) DEFAULT NULL,\n" + " `totaltermcount` int(11) DEFAULT NULL,\n" + " PRIMARY KEY (`id`),\n" + " KEY `endpointid` (`endpointid`),\n" + " KEY `termid` (`termid`)\n" + ") ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;"); pstmt.execute(); } catch (Exception ex) { System.out.println(ex.getMessage()); } try { HashMap<Integer, Integer> endpointtotaltermcount = new HashMap(); PreparedStatement pstmtHypernyms = con.prepareStatement("SELECT id FROM crawler.recommender_" + objectName + "_" + semanticType + " where processed is null;"); ResultSet rsHypernyms = pstmtHypernyms.executeQuery(); while (rsHypernyms.next()) { //int id = rsHypernyms.getInt("id"); int termid = rsHypernyms.getInt("id"); PreparedStatement pstmtTfs = con .prepareStatement("SELECT endpointid, count(*) tf FROM crawler.recommender_" + objectName + "_" + semanticType + "_word" + " as h inner join \n" + "recommender_" + objectName + "_tf as i on h.idfid = i.idfid\n" + "where termid=? group by endpointid;"); pstmtTfs.setInt(1, termid); ResultSet rsTfs = pstmtTfs.executeQuery(); int idf = 0; ArrayList<String> h = new ArrayList<String>(); while (rsTfs.next()) { int tf = rsTfs.getInt("tf"); int endpointid = rsTfs.getInt("endpointid"); h.add(String.valueOf(tf) + "-" + String.valueOf(endpointid)); idf++; if (endpointtotaltermcount.containsKey(endpointid)) { endpointtotaltermcount.replace(endpointid, endpointtotaltermcount.get(endpointid) + tf); } else { endpointtotaltermcount.put(endpointid, tf); } } PreparedStatement pstmtInsert = con.prepareStatement("INSERT INTO recommender_" + objectName + "_" + semanticType + "_stf_sidf " + "(termid, endpointid,stf,sidf) VALUES (?,?,?,?);"); for (String s : h) { pstmtInsert.setInt(1, termid); pstmtInsert.setInt(2, Integer.parseInt(s.split("-")[1])); pstmtInsert.setInt(3, Integer.parseInt(s.split("-")[0])); pstmtInsert.setInt(4, idf); pstmtInsert.addBatch(); } pstmtInsert.executeBatch(); PreparedStatement pstmtUpdateProcessFlag = con.prepareStatement("UPDATE recommender_" + objectName + "_" + semanticType + " SET `processed` = 1 where id=?;"); pstmtUpdateProcessFlag.setInt(1, termid); pstmtUpdateProcessFlag.execute(); } Iterator ittw = endpointtotaltermcount.entrySet().iterator(); try { while (ittw.hasNext()) { Map.Entry pair = (Map.Entry) ittw.next(); //<> PreparedStatement pstmtUpdate = con.prepareStatement("UPDATE recommender_" + objectName + "_" + semanticType + "_stf_sidf " + "SET totaltermcount=? WHERE endpointid =?;"); pstmtUpdate.setInt(1, Integer.parseInt(pair.getValue().toString())); pstmtUpdate.setInt(2, Integer.parseInt(pair.getKey().toString())); pstmtUpdate.execute(); } } catch (Exception ex) { // System.out.println("tf update edilemedi"); } } catch (Exception ex) { System.out.println(ex.getMessage()); } // TODO add your handling code here: }
From source file:SeedGenerator.MainForm.java
private void calculateEndpointTagsTF() { try {//from ww w. j ava 2s .c om try { PreparedStatement createtablepstmt = con.prepareStatement( "CREATE TABLE `recommender_endpoints_tf` (\n" + " `id` int(11) NOT NULL AUTO_INCREMENT,\n" + " `word` varchar(45) DEFAULT NULL,\n" + " `endpointid` int(11) DEFAULT NULL,\n" + " `queryid` int(11) DEFAULT NULL,\n" + " `count` int(11) DEFAULT NULL,\n" + " `idf` int(11) DEFAULT NULL,\n" + " `idfid` int(11) DEFAULT NULL,\n" + " `totalNumberofWords` int(11) DEFAULT NULL,\n" + " PRIMARY KEY (`id`),\n" + " KEY `word` (`word`),\n" + " KEY `idf` (`idfid`),\n" + " KEY `endpointid` (`endpointid`)\n" + ") ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;"); createtablepstmt.execute(); } catch (Exception ex) { } PreparedStatement pstmtendpoints = con.prepareStatement( "SELECT * from crawler.endpoints where sourceCodeHTML is not null and source != 'spendold' and source != 'spendnew' group by domain ORDER BY id asc;"); ResultSet rs1 = pstmtendpoints.executeQuery(); String htmlsource = ""; HashMap<String, Integer> wordCount = new HashMap(); while (rs1.next()) { htmlsource = rs1.getString("sourceCodeHTML"); HashMap<String, Integer> localwordCount = new HashMap(); org.jsoup.nodes.Document doc = Jsoup.parse(htmlsource);//.connect("http://en.wikipedia.org/").get(); //Elements newsHeadlines = doc.select("#mp-itn b a"); Elements links = doc.getElementsByTag("a"); Elements labels = doc.getElementsByTag("Label"); Elements spans = doc.getElementsByTag("span"); Elements titles = doc.getElementsByTag("title"); Elements meta = doc.getElementsByTag("meta"); Elements h2 = doc.getElementsByTag("h2"); Elements h1 = doc.getElementsByTag("h1"); Elements h3 = doc.getElementsByTag("h3"); Elements li = doc.getElementsByTag("li"); Elements dt = doc.getElementsByTag("dt"); Elements p = doc.getElementsByTag("p"); Elements option = doc.getElementsByTag("option"); links.addAll(labels); links.addAll(spans); links.addAll(titles); links.addAll(meta); links.addAll(h2); links.addAll(h1); links.addAll(h3); links.addAll(li); links.addAll(dt); links.addAll(p); links.addAll(option); for (Element link : links) { String word = link.toString(); if (wordCount.containsKey(word) && !localwordCount.containsKey(word)) { wordCount.replace(word, wordCount.get(word) + 1); } else if (!wordCount.containsKey(word)) { wordCount.put(word, 1); } if (localwordCount.containsKey(word)) { // wordCount.replace(word, wordCount.get(word) + 1); } else { localwordCount.put(word, 1); } String linkHref = link.attr("href"); // String linkText = link.text(); } // // String words[] = htmlsource.split("\n");//\\s+"); // for (String word : words) { // String cleanword; // // cleanword = word.replaceAll("\r", "");//"[^\\p{L}\\p{Nd}]+", ""); // if (!cleanword.equals("")) { // if (!word.equals(cleanword)) { // word = cleanword;//System.out.println(word+"--"+cleanword); // } // word = word.toLowerCase().replace("", "i"); // // if (wordCount.containsKey(word) && !localwordCount.containsKey(word)) { // wordCount.replace(word, wordCount.get(word) + 1); // } else if (!wordCount.containsKey(word)) { // wordCount.put(word, 1); // } // // if (localwordCount.containsKey(word)) { //// wordCount.replace(word, wordCount.get(word) + 1); // } else { // localwordCount.put(word, 1); // } // // } else { // } // // } } pstmtendpoints.close(); rs1.close(); Iterator it = wordCount.entrySet().iterator(); while (it.hasNext()) { Map.Entry pair = (Map.Entry) it.next(); if (Integer.parseInt(pair.getValue().toString()) > 1) { PreparedStatement insertpstmt = con .prepareStatement("insert into recommender_endpoints_tf (word,count) values(?,?);"); if (pair.getKey().toString().length() > 44) { insertpstmt.setString(1, pair.getKey().toString()); } else { insertpstmt.setString(1, pair.getKey().toString()); } insertpstmt.setInt(2, Integer.parseInt(pair.getValue().toString())); insertpstmt.executeUpdate(); insertpstmt.close(); } it.remove(); // avoids a ConcurrentModificationException } } catch (Exception e) { //System.err.println("Got an exception! "); System.err.println(e.getMessage()); } }
From source file:SeedGenerator.MainForm.java
private void wordNetTFIDFLevel(String objectName, String semanticType) { try {//from w ww . j av a 2 s .c o m // PreparedStatement pstmtReset = con.prepareStatement("UPDATE crawler.recommender_" + objectName + "_" + semanticType + " SET `processed` = null;"); // pstmtReset.execute(); PreparedStatement pstmtDrop = con.prepareStatement( " DROP TABLE IF EXISTS recommender_" + objectName + "_" + semanticType + "_stf_sidf_level;"); pstmtDrop.execute(); PreparedStatement pstmt = con.prepareStatement( "CREATE TABLE `recommender_" + objectName + "_" + semanticType + "_stf_sidf_level` (\n" + " `id` int(11) NOT NULL AUTO_INCREMENT,\n" + " `term` varchar(45) DEFAULT NULL,\n" + " `endpointid` int(11) DEFAULT NULL,\n" + " `stf` int(11) DEFAULT NULL,\n" + " `sidf` int(11) DEFAULT NULL,\n" + " `totaltermcount` int(11) DEFAULT NULL,\n" + " PRIMARY KEY (`id`),\n" + " KEY `endpointid` (`endpointid`),\n" + " KEY `term` (`term`)\n" + ") ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;"); pstmt.execute(); } catch (Exception ex) { System.out.println(ex.getMessage()); } try { HashMap<Integer, Integer> endpointtotaltermcount = new HashMap(); PreparedStatement pstmtHypernyms = con.prepareStatement("SELECT term FROM crawler.recommender_" + objectName + "_" + semanticType + "_levels group by term;"); ResultSet rsHypernyms = pstmtHypernyms.executeQuery(); while (rsHypernyms.next()) { //int id = rsHypernyms.getInt("id"); String term = rsHypernyms.getString("term"); PreparedStatement pstmtTfs = con.prepareStatement(" SELECT endpointid, count(*) tf from " + "recommender_" + objectName + "_tf as tf inner join \n" + " (select idfid,termid,mainterm from crawler.recommender_" + objectName + "_" + semanticType + "_word as word inner join \n" + "(SELECT l.term,h.term as mainterm,mainupperid FROM crawler.recommender_" + objectName + "_" + semanticType + "_levels l \n" + "inner join crawler.recommender_" + objectName + "_" + semanticType + " h on h.id = l.mainupperid where l.term = ? group by term,mainterm,mainupperid) as term\n" + "on term.mainupperid = word.termid) as termsidf\n" + "on termsidf.idfid = tf.idfid group by endpointid;"); pstmtTfs.setString(1, term); ResultSet rsTfs = pstmtTfs.executeQuery(); int idf = 0; ArrayList<String> h = new ArrayList<String>(); while (rsTfs.next()) { int tf = rsTfs.getInt("tf"); int endpointid = rsTfs.getInt("endpointid"); h.add(String.valueOf(tf) + "-" + String.valueOf(endpointid)); idf++; if (endpointtotaltermcount.containsKey(endpointid)) { endpointtotaltermcount.replace(endpointid, endpointtotaltermcount.get(endpointid) + tf); } else { endpointtotaltermcount.put(endpointid, tf); } } PreparedStatement pstmtInsert = con.prepareStatement("INSERT INTO recommender_" + objectName + "_" + semanticType + "_stf_sidf_level " + "(term, endpointid,stf,sidf) VALUES (?,?,?,?);"); for (String s : h) { pstmtInsert.setString(1, term); pstmtInsert.setInt(2, Integer.parseInt(s.split("-")[1])); pstmtInsert.setInt(3, Integer.parseInt(s.split("-")[0])); pstmtInsert.setInt(4, idf); pstmtInsert.addBatch(); } pstmtInsert.executeBatch(); } Iterator ittw = endpointtotaltermcount.entrySet().iterator(); try { while (ittw.hasNext()) { Map.Entry pair = (Map.Entry) ittw.next(); //<> PreparedStatement pstmtUpdate = con.prepareStatement("UPDATE recommender_" + objectName + "_" + semanticType + "_stf_sidf_level " + "SET totaltermcount=? WHERE endpointid =?;"); pstmtUpdate.setInt(1, Integer.parseInt(pair.getValue().toString())); pstmtUpdate.setInt(2, Integer.parseInt(pair.getKey().toString())); pstmtUpdate.execute(); } } catch (Exception ex) { // System.out.println("tf update edilemedi"); } } catch (Exception ex) { System.out.println(ex.getMessage()); } // TODO add your handling code here: }
From source file:SeedGenerator.MainForm.java
private void jButtonPrepareEndpointLcnWordTableActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonPrepareEndpointLcnWordTableActionPerformed try {/* www .j a va 2s. c o m*/ PreparedStatement pstmtendpoints = con .prepareStatement("select id, endpointurl from endpoints where lastCheckedDate is null;"); ResultSet rs1 = pstmtendpoints.executeQuery(); int endpointid = 0; int queryid = 27;//class with label collector while (rs1.next()) { try { endpointid = rs1.getInt("id"); PreparedStatement pstmt = con.prepareStatement( "select s,p,o from endpointtriples where queryid=? and endpointid=?;"); pstmt.setInt(1, queryid); pstmt.setInt(2, endpointid); // execute the query, and get a java resultset ResultSet rs = pstmt.executeQuery(); PreparedStatement insertpstmt = con.prepareStatement( "insert into recommender_class_label_hypernym (endpointid,local_class_name, class_count, word, hypernym) values(?,?,?,?,?);"); // iterate through the java resultset HashMap<String, Integer> wordCount = new HashMap(); HashMap<String, Integer> lcnWordCount = new HashMap(); int totalNumberOfWords = 0; while (rs.next()) { String classURI = rs.getString("s"); int classcount = Integer.parseInt(rs.getString("o")); PreparedStatement pstmtcounts = con.prepareStatement( "select s,o from endpointtriples where queryid=? and endpointid=? and s =?;"); pstmtcounts.setInt(1, 26); pstmtcounts.setInt(2, endpointid); pstmtcounts.setString(3, classURI); ResultSet rs2 = pstmtcounts.executeQuery(); String className, label; if (rs2.next()) { label = rs2.getString("o"); className = limitString(classURI.split("/")[classURI.split("/").length - 1], 45); pstmtcounts.cancel(); rs2.close(); // String p = rs.getString("p"); // String o = rs.getString("o"); if (label != null) { String words[] = label.split(" "); for (String word : words) { String cleanword; cleanword = word.replaceAll("[^\\p{L}\\p{Nd}]+", ""); if (!cleanword.equals("")) { if (!word.equals(cleanword)) { word = cleanword;//System.out.println(word+"--"+cleanword); } if (wordCount.containsKey(word)) { int currentCount = wordCount.get(word); wordCount.replace(word, currentCount + 1); } else { wordCount.put(word, 1); } for (String hyp : getHypernyms(word)) { insertpstmt.setInt(1, endpointid); // if (s.split("/")[s.split("/").length - 1].length() > 45) { // insertpstmt.setString(2, s.split("/")[s.split("/").length - 1].substring(0, 44)); // } else { // insertpstmt.setString(2, s.split("/")[s.split("/").length - 1]); // } insertpstmt.setString(2, className); insertpstmt.setInt(3, classcount); insertpstmt.setString(4, word); insertpstmt.setString(5, hyp); insertpstmt.addBatch(); // insertpstmt.setInt(6,); } } totalNumberOfWords++; } } } else { // System.out.println("bos"); } } insertpstmt.executeBatch(); PreparedStatement pstmtupdateendpoint = con .prepareStatement("update endpoints set lastCheckedDate=? where id=?;"); pstmtupdateendpoint.setTimestamp(1, new java.sql.Timestamp(System.currentTimeMillis())); pstmtupdateendpoint.setInt(2, endpointid); pstmtupdateendpoint.execute(); pstmtupdateendpoint.close(); //String firstName = rs.getString("url"); pstmt.close(); rs.close(); // Iterator it = lcnWordCount.entrySet().iterator(); // while (it.hasNext()) { // Map.Entry pair = (Map.Entry) it.next(); // //<> // if (Integer.parseInt(pair.getValue().toString()) > 1) { // PreparedStatement insertpstmt // = con.prepareStatement("insert into recommender_class_label_hypernym (endpointid,local_class_name, class_count, word, hypernym,tf) values(?,?,?,?,?);"); // if (pair.getKey().toString().length() > 44) { // insertpstmt.setString(1, pair.getKey().toString().substring(0, 44)); // } else { // insertpstmt.setString(1, pair.getKey().toString()); // } // insertpstmt.setInt(2, Integer.parseInt(pair.getValue().toString())); // insertpstmt.setInt(3, endpointid); // insertpstmt.setInt(4, queryid); // insertpstmt.executeUpdate(); // insertpstmt.close(); // } // it.remove(); // avoids a ConcurrentModificationException // } // PreparedStatement updatepstmt // = con.prepareStatement("update endpoints set commentsWordCount=? where id=?;"); // updatepstmt.setInt(1, totalNumberOfWords); // updatepstmt.setInt(2, endpointid); // updatepstmt.executeUpdate(); // updatepstmt.close(); } catch (Exception e) { System.err.println("inner while" + e.getMessage()); } } pstmtendpoints.close(); } catch (Exception e) { //System.err.println("Got an exception! "); System.err.println(e.getMessage()); } // TODO add your handling code here: }