Java tutorial
/* * Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.thesmartweb.swebrank; import java.util.*; import java.sql.PreparedStatement; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.util.logging.Level; import java.util.logging.Logger; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.node.Node; import static org.elasticsearch.node.NodeBuilder.*; import org.json.simple.JSONArray; import org.json.simple.JSONObject; /** * Class for analysis of all the queries through Search APIs and capturing of the result statistics * @author Themistoklis Mavridis */ public class Search_analysis { /** * Method to perform the queries to the search engines, get the links and get all the webpage and semantic stats for the links * @param iteration_counter The iteration number of the algorithm (to use it in the id for elasticsearch) * @param directory_save The directory we are going to several files * @param domain The domain that we are searching for (to use it in the id for elasticsearch) * @param enginechoice The search engines that were chosen to be used * @param quer the query we search for * @param results_number the results number that we are going to get from every search engine * @param top_visible the number of results if we use Visibility score * @param SWebRankSettings the settings for LDA and SwebRank in general (check the ReadInput Class) * @param alpha alpha value of LDA * @param mozMetrics the metrics of choice if Moz is going to be used * @param top_count_moz the amount of results if we use Moz * @param moz_threshold_option flag to show if we are going to use a threshold in Moz metrics or not * @param moz_threshold the moz threshold value * @param ContentSemantics get the choice of Content Semantic Analysis algorithm that we are going to use * @param SensebotConcepts the amount of concepts to be recognized if Sensebot is used * @param config_path the configuration path to get all the api keys * @return a list with the words recognized as important by the content semantic analysis algorithm we have chosen */ public List<String> perform(int iteration_counter, String directory_save, String domain, List<Boolean> enginechoice, String quer, int results_number, int top_visible, List<Double> SWebRankSettings, double alpha, List<Boolean> mozMetrics, int top_count_moz, boolean moz_threshold_option, double moz_threshold, List<Boolean> ContentSemantics, int SensebotConcepts, String config_path) { //=======connect to mysql========= Connection conn = null; PreparedStatement stmt = null; try { ReadInput ri = new ReadInput(); List<String> mysqlAdminSettings = ri.GetKeyFile(config_path, "mysqlAdmin"); String port = mysqlAdminSettings.get(2); String dbname = mysqlAdminSettings.get(3); String url = "jdbc:mysql://localhost:" + port + "/" + dbname + "?zeroDateTimeBehavior=convertToNull"; String user = mysqlAdminSettings.get(0); String password = mysqlAdminSettings.get(1); System.out.println("Connecting to database..."); conn = DriverManager.getConnection(url, user, password); LinksParseAnalysis ld = new LinksParseAnalysis(); //we create the array that are going to store the results from each search engine String[] links_google = new String[results_number]; String[] links_yahoo = new String[results_number]; String[] links_bing = new String[results_number]; //we create the array that is going to store all the results from all the search engines together String[] links_total = new String[(results_number * 3)]; //--------if we have selected to use a Moz metric, then we should set the links_total to be of size of top_count_seomoz*3 since it means that the results_number has been set to its max value (50) if (mozMetrics.get(0)) { links_total = new String[(top_count_moz) * 3]; } int[] nlinks = new int[2]; if (enginechoice.get(0)) { //get bing results BingResults br = new BingResults(); links_bing = br.Get(quer, results_number, directory_save, config_path); } if (enginechoice.get(1)) { //get google results GoogleResults gr = new GoogleResults(); links_google = gr.Get(quer, results_number, directory_save, config_path); } if (enginechoice.get(2)) { //get yahoo results YahooResults yr = new YahooResults(); links_yahoo = yr.Get(quer, results_number, directory_save, config_path); } HashMap<Integer, List<String>> EntitiesMapDBP = new HashMap<>(); HashMap<Integer, List<String>> CategoriesMapDBP = new HashMap<>(); HashMap<Integer, List<String>> EntitiesMapDand = new HashMap<>(); HashMap<Integer, List<String>> CategoriesMapDand = new HashMap<>(); HashMap<Integer, List<String>> EntitiesMapYahoo = new HashMap<>(); HashMap<Integer, List<String>> CategoriesMapYahoo = new HashMap<>(); HashMap<Integer, String> parseOutputList = new HashMap<>(); for (int i = 0; i < results_number * 3; i++) { parseOutputList.put(i, ""); } //************* boolean false_flag = true; if (false_flag) { if (mozMetrics.get(0)) { //we check if moz works Moz moz = new Moz(); boolean checkmoz = moz.check(config_path); if (checkmoz) { //perform if (links_yahoo.length > 0) { links_yahoo = moz.perform(links_yahoo, top_count_moz, moz_threshold, moz_threshold_option, mozMetrics, config_path); } if (links_google.length > 0) { links_google = moz.perform(links_google, top_count_moz, moz_threshold, moz_threshold_option, mozMetrics, config_path); } if (links_bing.length > 0) { links_bing = moz.perform(links_bing, top_count_moz, moz_threshold, moz_threshold_option, mozMetrics, config_path); } } } //we are creating Sindice class in order to get the number of semantic triples of a webpage Sindice striple = new Sindice(); //create htmlparser to get the number of links in a webpage if (mozMetrics.get(0)) { results_number = links_yahoo.length; } WebParser htm = new WebParser(); //create an array that contains all the links together for (int i = 0; i < 3; i++) { try { if (i == 0) { System.arraycopy(links_yahoo, 0, links_total, 0, results_number); } if (i == 1) { System.arraycopy(links_google, 0, links_total, links_yahoo.length, results_number); } if (i == 2) { System.arraycopy(links_bing, 0, links_total, ((links_yahoo.length) + (links_google.length)), results_number); } } catch (ArrayIndexOutOfBoundsException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); ArrayList<String> finalList = new ArrayList<String>(); return finalList; } } //merged true => visibility score if (enginechoice.get(3)) { VisibilityScore vb = new VisibilityScore();//we have a merged engine //erase using vb.perform all the duplicate links links_total = vb.perform(links_google, links_yahoo, links_bing); //if we have Moz option set to true we have to get the results rearranged according to the moz metric selected if (mozMetrics.get(0)) { Moz checkMoz = new Moz(); boolean check_seo = checkMoz.check(config_path); if (check_seo) { Moz MOZ = new Moz(); links_total = MOZ.perform(links_total, top_count_moz, moz_threshold, moz_threshold_option, mozMetrics, config_path); } } //here we calculate the visibility score links_total = vb.visibility_score(links_total, links_yahoo, links_bing, links_google, top_visible); } String[][] total_catent = new String[links_total.length][2]; for (int r = 0; r < total_catent.length; r++) { total_catent[r][0] = ""; total_catent[r][1] = ""; } for (int j = 0; j < links_total.length; j++) { if (links_total[j] != null) { String urlString = links_total[j]; if (urlString.length() > 199) { urlString = links_total[j].substring(0, 198); } int rank = -1; int engine = -1;//0 for yahoo,1 for google,2 for bing if (j < results_number) { rank = j; engine = 0; } else if (j < results_number * 2) { rank = j - results_number; engine = 1; } else if (j < results_number * 3) { rank = j - results_number * 2; engine = 2; } try { //we initialize the row in settings table conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "INSERT INTO SETTINGS (url,query,search_engine,search_engine_rank,domain) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE url=VALUES(url),query=VALUES(query),search_engine=VALUES(search_engine),domain=VALUES(domain)"); stmt.setString(1, urlString); stmt.setString(2, quer); stmt.setInt(3, engine); stmt.setInt(4, rank); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { //we initialize the row in semantic stats table conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "INSERT INTO SEMANTICSTATS (url,query,search_engine,search_engine_rank,domain) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE url=VALUES(url),query=VALUES(query),search_engine=VALUES(search_engine),domain=VALUES(domain)"); stmt.setString(1, urlString); stmt.setString(2, quer); stmt.setInt(3, engine); stmt.setInt(4, rank); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { //we initialize the row in namespaces stats table conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "INSERT INTO NAMESPACESSTATS (url,query,search_engine,search_engine_rank,domain) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE url=VALUES(url),query=VALUES(query),search_engine=VALUES(search_engine),domain=VALUES(domain)"); stmt.setString(1, urlString); stmt.setString(2, quer); stmt.setInt(3, engine); stmt.setInt(4, rank); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { //we put the info inside the settings conn = DriverManager.getConnection(url, user, password); StringBuilder settingsStmBuild = new StringBuilder(); settingsStmBuild.append("UPDATE SETTINGS SET "); settingsStmBuild.append("`nTopics`=? , "); settingsStmBuild.append("`alpha`=? , "); settingsStmBuild.append("`beta`=? , "); settingsStmBuild.append("`niters`=? , "); settingsStmBuild.append("`prob_threshold`=? , "); settingsStmBuild.append("`moz`=? , "); settingsStmBuild.append("`top_count_moz`=? , "); settingsStmBuild.append("`moz_threshold`=? , "); settingsStmBuild.append("`moz_threshold_option`=? , "); settingsStmBuild.append("`top_visible`=? , "); settingsStmBuild.append("`Domain_Authority`=? , "); settingsStmBuild.append("`External_MozRank`=? , "); settingsStmBuild.append("`MozRank`=? , "); settingsStmBuild.append("`MozTrust`=? , "); settingsStmBuild.append("`Page_Authority`=? , "); settingsStmBuild.append("`Subdomain_mozRank`=? , "); settingsStmBuild.append("`merged`=? , "); settingsStmBuild.append("`results_number`=? , "); settingsStmBuild.append("`Diffbotflag`=? , "); settingsStmBuild.append("`LDAflag`=? , "); settingsStmBuild.append("`Sensebotflag`=? , "); settingsStmBuild.append("`TFIDFflag`=? , "); settingsStmBuild.append("`SensebotConcepts`=? , "); settingsStmBuild.append("`nTopTopics`=? , "); settingsStmBuild.append("`combinelimit`=? ,"); settingsStmBuild.append("`newtermstocombine`=? ,"); settingsStmBuild.append("`newqueriesmax`=? ,"); settingsStmBuild.append("`ngdthreshold`=? ,"); settingsStmBuild.append("`entitiesconfi`=? ,"); settingsStmBuild.append("`dbpediasup`=? "); settingsStmBuild .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(settingsStmBuild.toString()); stmt.setInt(1, SWebRankSettings.get(1).intValue()); stmt.setDouble(2, alpha); stmt.setDouble(3, SWebRankSettings.get(0)); stmt.setInt(4, SWebRankSettings.get(2).intValue()); stmt.setDouble(5, SWebRankSettings.get(3)); stmt.setBoolean(6, mozMetrics.get(0)); stmt.setInt(7, top_count_moz); stmt.setDouble(8, moz_threshold); stmt.setBoolean(9, moz_threshold_option); stmt.setInt(10, top_visible); stmt.setBoolean(11, mozMetrics.get(1)); stmt.setBoolean(12, mozMetrics.get(2)); stmt.setBoolean(13, mozMetrics.get(3)); stmt.setBoolean(14, mozMetrics.get(4)); stmt.setBoolean(15, mozMetrics.get(5)); stmt.setBoolean(16, mozMetrics.get(6)); stmt.setBoolean(17, enginechoice.get(3)); stmt.setInt(18, results_number); stmt.setBoolean(19, ContentSemantics.get(0)); stmt.setBoolean(20, ContentSemantics.get(1)); stmt.setBoolean(21, ContentSemantics.get(2)); stmt.setBoolean(22, ContentSemantics.get(3)); stmt.setInt(23, SensebotConcepts); stmt.setInt(24, SWebRankSettings.get(11).intValue()); stmt.setInt(25, SWebRankSettings.get(7).intValue()); stmt.setInt(26, SWebRankSettings.get(9).intValue()); stmt.setInt(27, SWebRankSettings.get(10).intValue()); stmt.setDouble(28, SWebRankSettings.get(6)); stmt.setDouble(29, SWebRankSettings.get(12)); stmt.setDouble(30, SWebRankSettings.get(13)); stmt.setString(31, urlString); stmt.setString(32, quer); stmt.setInt(33, engine); stmt.setString(34, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } if (htm.checkconn(links_total[j])) {//if we can connect to the url we continue to update semantics stats and namespaces stats tables nlinks = htm.getnlinks(links_total[j]); StringBuilder webstatsStmBuild = new StringBuilder(); try { conn = DriverManager.getConnection(url, user, password); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`number_links`=? , "); webstatsStmBuild.append("`redirect_links`=? , "); webstatsStmBuild.append("`internal_links`=? "); webstatsStmBuild .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, nlinks[0]);//total numbers of links stmt.setInt(2, nlinks[0] - nlinks[1]); stmt.setInt(3, nlinks[1]);//internal links stmt.setString(4, urlString); stmt.setString(5, quer); stmt.setInt(6, engine); stmt.setString(7, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); System.out.println("I am going to get the stats from Sindice\n"); int ntriples = striple.getsindicestats(links_total[j]);//get the amount of semantic triples using Sindice API System.out.println("I am going insert the semantic triples number in the DB\n"); stmt = conn.prepareStatement( "UPDATE SEMANTICSTATS SET `total_semantic_triples`=? WHERE `url` =? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setInt(1, ntriples); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); System.out.println("I inserted the semantic triples number in the DB\n"); //---namespaces----- System.out.println("I am going to insert the namespaces in the DB\n"); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } boolean flagStriple = false; if (flagStriple) { if (striple.namespaces[0]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/vocab/bio/0.1/` = ? WHERE `url` = ? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[1]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/dc/elements/1.1/` =? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[2]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/coo/n` = ? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[3]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://web.resource.org/cc/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[4]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://diligentarguont.ontoware.org/2005/10/arguonto`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[5]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://usefulinc.com/ns/doap`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[6]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://xmlns.com/foaf/0.1/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[7]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/goodrelations/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[8]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/muto/core`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[9]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://webns.net/mvcb/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[10]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/ontology/mo/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[11]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/innovation/ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[12]) { try { stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://openguid.net/rdf`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[13]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://www.slamka.cz/ontologies/diagnostika.owl`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[14]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/ontology/po/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[15]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/net/provenance/ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[16]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/rss/1.0/modules/syndication`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[17]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://rdfs.org/sioc/ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[18]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://madskills.com/public/xml/rss/module/trackback/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[19]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://rdfs.org/ns/void`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[20]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://www.fzi.de/2008/wise/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[21]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://xmlns.com/wot/0.1`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[22]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://www.w3.org/1999/02/22-rdf-syntax-ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[23]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `rdf-schema`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[24]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `XMLschema`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[25]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `OWL`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[26]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/dc/terms/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[27]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `VCARD`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[28]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://www.geonames.org/ontology`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[29]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://search.yahoo.com/searchmonkey/commerce/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[30]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://search.yahoo.com/searchmonkey/media/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[31]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://cb.semsol.org/ns#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[32]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://blogs.yandex.ru/schema/foaf/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[33]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://www.w3.org/2003/01/geo/wgs84_pos#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[34]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://rdfs.org/sioc/ns#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[35]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://rdfs.org/sioc/types#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[36]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://smw.ontoware.org/2005/smw#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[37]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://purl.org/rss/1.0/`= ? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (striple.namespaces[38]) { try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement( "UPDATE NAMESPACESSTATS SET `http://www.w3.org/2004/12/q/contentlabel#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt.setBoolean(1, true); stmt.setString(2, urlString); stmt.setString(3, quer); stmt.setInt(4, engine); stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } } System.out.println("I inserted the namespaces in the DB\n"); System.out.println("I will get the semantic entities and categories\n"); //get the semantic entities and categories from Yahoo Content Analysis Service YahooEntityCategory yec = new YahooEntityCategory(); yec.connect(links_total[j], quer, false, SWebRankSettings.get(12));//without stemming EntitiesMapYahoo.put(j, yec.GetEntitiesYahoo()); CategoriesMapYahoo.put(j, yec.GetCategoriesYahoo()); double ent_avg_yahoo_score = yec.GetEntitiesYahooScore(); double cat_avg_yahoo_score = yec.GetCategoriesYahooScore(); int cat_cnt = yec.GetCatQuerCnt(); int ent_cnt = yec.GetEntQuerCnt(); int cat_cnt_whole = yec.GetCatQuerCntWhole(); int ent_cnt_whole = yec.GetEntQuerCntWhole(); yec.connect(links_total[j], quer, true, SWebRankSettings.get(12));//with stemming int cat_cnt_stem = yec.GetCatQuerCnt(); int ent_cnt_stem = yec.GetEntQuerCnt(); int cat_cnt_whole_stem = yec.GetCatQuerCntWhole(); int ent_cnt_whole_stem = yec.GetEntQuerCntWhole(); //get the semantic entities and categories from Dandelion Named entity extraction API DandelionEntities dec = new DandelionEntities(); dec.connect(links_total[j], quer, false, config_path, SWebRankSettings.get(12));//without stemming EntitiesMapDand.put(j, dec.GetEntitiesDand()); CategoriesMapDand.put(j, dec.GetCategoriesDand()); double ent_avg_d_score = dec.GetEntitiesScoreDand(); int cat_cnt_dand = dec.getCat(); int ent_cnt_dand = dec.getEnt(); int cat_cnt_dand_whole = dec.getCatWhole(); int ent_cnt_dand_whole = dec.getEntWhole(); dec.connect(links_total[j], quer, true, config_path, SWebRankSettings.get(12));//with stemming int cat_cnt_dand_stem = dec.getCat(); int ent_cnt_dand_stem = dec.getEnt(); int cat_cnt_dand_whole_stem = dec.getCatWhole(); int ent_cnt_dand_whole_stem = dec.getEntWhole(); //get the semantic entities and categories from dbpedia spotlight DBpediaSpotlightClient dbpspot = new DBpediaSpotlightClient(SWebRankSettings.get(12), SWebRankSettings.get(13).intValue()); dbpspot.countEntCat(links_total[j], quer, false);//false is not stemming EntitiesMapDBP.put(j, dbpspot.getEntities()); CategoriesMapDBP.put(j, dbpspot.getCategories()); double ent_avg_dbpspot_score = dbpspot.getEntitiesAvgScore(); double ent_max_dbpspot_score = dbpspot.getEntitiesMaxScore(); double ent_min_dbpspot_score = dbpspot.getEntitiesMinScore(); double ent_median_dbpspot_score = dbpspot.getEntitiesMedianScore(); double ent_std_dbpspot_score = dbpspot.getEntitiesStdScore(); double ent_avg_dbpspot_support = dbpspot.getEntitiesAvgSupport(); double ent_max_dbpspot_support = dbpspot.getEntitiesMaxSupport(); double ent_min_dbpspot_support = dbpspot.getEntitiesMinSupport(); double ent_median_dbpspot_support = dbpspot.getEntitiesMedianSupport(); double ent_std_dbpspot_support = dbpspot.getEntitiesStdSupport(); double ent_avg_dbpspot_dif = dbpspot.getEntitiesAvgDif(); double ent_max_dbpspot_dif = dbpspot.getEntitiesMaxDif(); double ent_min_dbpspot_dif = dbpspot.getEntitiesMinDif(); double ent_median_dbpspot_dif = dbpspot.getEntitiesMedianDif(); double ent_std_dbpspot_dif = dbpspot.getEntitiesStdDif(); double unique_ent_cnt_dbpspot = dbpspot.getUniqueEntCnt(); double unique_ent_scoreSum_dbpspot = dbpspot.getUniqueEntScoreSum(); int cat_cnt_dbpspot = dbpspot.getcountCat(); int ent_cnt_dbpspot = dbpspot.getcountEnt(); int cat_cnt_dbpspot_whole = dbpspot.getcountCatWhole(); int ent_cnt_dbpspot_whole = dbpspot.getcountEntWhole(); double ent_sup_cnt_dbpspot = dbpspot.getcountSupEnt(); double ent_sim_cnt_dbpspot = dbpspot.getcountSimEnt(); double ent_dif_cnt_dbpspot = dbpspot.getcountDifEnt(); double high_precision_content_dbpspot = dbpspot.getHighPrecEntities(); dbpspot.countEntCat(links_total[j], quer, true);//true is for stemming int cat_cnt_dbpspot_stem = dbpspot.getcountCat(); int ent_cnt_dbpspot_stem = dbpspot.getcountEnt(); int cat_cnt_dbpspot_whole_stem = dbpspot.getcountCatWhole(); int ent_cnt_dbpspot_whole_stem = dbpspot.getcountEntWhole(); double ent_sup_cnt_dbpspot_stem = dbpspot.getcountSupEnt(); double ent_sim_cnt_dbpspot_stem = dbpspot.getcountSimEnt(); double ent_dif_cnt_dbpspot_stem = dbpspot.getcountDifEnt(); System.out.println("I insert the semantic entities and categories stats in the DB\n"); StringBuilder entitiesStatementBuilder = new StringBuilder(); try { entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`ent_avg_y_score`=?,"); entitiesStatementBuilder.append("`cat_avg_y_score`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, ent_avg_yahoo_score); stmt.setDouble(2, cat_avg_yahoo_score); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`ent_avg_dand_score`=?,"); entitiesStatementBuilder.append("`ent_avg_dbpspot_score`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, ent_avg_d_score); stmt.setDouble(2, ent_avg_dbpspot_score); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`ent_max_dbpspot_score`=?,"); entitiesStatementBuilder.append("`ent_min_dbpspot_score`=?,"); entitiesStatementBuilder.append("`ent_median_dbpspot_score`=?,"); entitiesStatementBuilder.append("`ent_std_dbpspot_score`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, ent_max_dbpspot_score); stmt.setDouble(2, ent_min_dbpspot_score); stmt.setDouble(3, ent_median_dbpspot_score); stmt.setDouble(4, ent_std_dbpspot_score); stmt.setString(5, links_total[j]); stmt.setString(6, quer); if (j < results_number) { stmt.setInt(7, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(7, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(7, 2);//2 for bing } stmt.setString(8, domain); stmt.executeUpdate(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`ent_avg_dbpspot_support`=?,"); entitiesStatementBuilder.append("`ent_max_dbpspot_support`=?,"); entitiesStatementBuilder.append("`ent_min_dbpspot_support`=?,"); entitiesStatementBuilder.append("`ent_median_dbpspot_support`=?,"); entitiesStatementBuilder.append("`ent_std_dbpspot_support`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, ent_avg_dbpspot_support); stmt.setDouble(2, ent_max_dbpspot_support); stmt.setDouble(3, ent_min_dbpspot_support); stmt.setDouble(4, ent_median_dbpspot_support); stmt.setDouble(5, ent_std_dbpspot_support); stmt.setString(6, links_total[j]); stmt.setString(7, quer); if (j < results_number) { stmt.setInt(8, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(8, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(8, 2);//2 for bing } stmt.setString(9, domain); System.out.println("avg db support" + ent_avg_dbpspot_support); stmt.executeUpdate(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`ent_avg_dbpspot_dif`=?,"); entitiesStatementBuilder.append("`ent_max_dbpspot_dif`=?,"); entitiesStatementBuilder.append("`ent_min_dbpspot_dif`=?,"); entitiesStatementBuilder.append("`ent_median_dbpspot_dif`=?,"); entitiesStatementBuilder.append("`ent_std_dbpspot_dif`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, ent_avg_dbpspot_dif); stmt.setDouble(2, ent_max_dbpspot_dif); stmt.setDouble(3, ent_min_dbpspot_dif); stmt.setDouble(4, ent_median_dbpspot_dif); stmt.setDouble(5, ent_std_dbpspot_dif); stmt.setString(6, links_total[j]); stmt.setString(7, quer); if (j < results_number) { stmt.setInt(8, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(8, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(8, 2);//2 for bing } stmt.setString(9, domain); stmt.executeUpdate(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`ent_sup_cnt_dbpspot`=?,"); entitiesStatementBuilder.append("`ent_dif_cnt_dbpspot`=?,"); entitiesStatementBuilder.append("`ent_sim_cnt_dbpspot`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, ent_sup_cnt_dbpspot); stmt.setDouble(2, ent_dif_cnt_dbpspot); stmt.setDouble(3, ent_sim_cnt_dbpspot); stmt.setString(4, links_total[j]); stmt.setString(5, quer); if (j < results_number) { stmt.setInt(6, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(6, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(6, 2);//2 for bing } stmt.setString(7, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`ent_sup_cnt_dbpspot_stem`=?,"); entitiesStatementBuilder.append("`ent_dif_cnt_dbpspot_stem`=?,"); entitiesStatementBuilder.append("`ent_sim_cnt_dbpspot_stem`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, ent_sup_cnt_dbpspot_stem); stmt.setDouble(2, ent_dif_cnt_dbpspot_stem); stmt.setDouble(3, ent_sim_cnt_dbpspot_stem); stmt.setString(4, links_total[j]); stmt.setString(5, quer); if (j < results_number) { stmt.setInt(6, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(6, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(6, 2);//2 for bing } stmt.setString(7, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`unique_ent_cnt_dbpspot`=?,"); entitiesStatementBuilder.append("`unique_ent_scoreSum_dbpspot`=?,"); entitiesStatementBuilder.append("`high_precision_content_dbpspot`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setDouble(1, unique_ent_cnt_dbpspot); stmt.setDouble(2, unique_ent_scoreSum_dbpspot); stmt.setDouble(3, high_precision_content_dbpspot); stmt.setString(4, links_total[j]); stmt.setString(5, quer); if (j < results_number) { stmt.setInt(6, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(6, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(6, 2);//2 for bing } stmt.setString(7, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_Y`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_Y`=?,"); entitiesStatementBuilder.append("`Categories_Contained_Query_Y_W`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt); stmt.setInt(2, ent_cnt); stmt.setInt(3, cat_cnt_whole); stmt.setString(4, urlString); stmt.setString(5, quer); if (j < results_number) { stmt.setInt(6, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(6, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(6, 2);//2 for bing } stmt.setString(7, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Entities_Contained_Query_Y_W`=?,"); entitiesStatementBuilder.append("`Categories_Contained_Query_D`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_D`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, ent_cnt_whole); stmt.setInt(2, cat_cnt_dand); stmt.setInt(3, ent_cnt_dand); stmt.setString(4, urlString); stmt.setString(5, quer); if (j < results_number) { stmt.setInt(6, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(6, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(6, 2);//2 for bing } stmt.setString(7, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_D_W`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_D_W`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_dand_whole); stmt.setInt(2, ent_cnt_dand_whole); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_dbpspot); stmt.setInt(2, ent_cnt_dbpspot); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot_W`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot_W`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_dbpspot_whole); stmt.setInt(2, ent_cnt_dbpspot_whole); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_Y_Stem`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_Y_Stem`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_stem); stmt.setInt(2, ent_cnt_stem); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_Y_W_Stem`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_Y_W_Stem`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_whole_stem); stmt.setInt(2, ent_cnt_whole_stem); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_D_Stem`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_D_Stem`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_dand_stem); stmt.setInt(2, ent_cnt_dand_stem); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_D_W_Stem`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_D_W_Stem`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_dand_whole_stem); stmt.setInt(2, ent_cnt_dand_whole_stem); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot_Stem`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot_Stem`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_dbpspot_stem); stmt.setInt(2, ent_cnt_dbpspot_stem); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); entitiesStatementBuilder = new StringBuilder(); entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET "); entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot_W_Stem`=?,"); entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot_W_Stem`=? "); entitiesStatementBuilder .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(entitiesStatementBuilder.toString()); stmt.setInt(1, cat_cnt_dbpspot_whole_stem); stmt.setInt(2, ent_cnt_dbpspot_whole_stem); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println("I inserted the semantic entities and categories stats in the DB\n"); System.out.println( "I will get the html stats for the " + j + " link:" + links_total[j] + "\n"); boolean flag_htmlstats = htm.gethtmlstats(links_total[j]);//get the semantic stats from the html code if (flag_htmlstats) { System.out.println( "I got the html stats for the " + j + " link:" + links_total[j] + "\n"); int scripts_cnt = htm.scripts_number; int nschem = htm.nschem; int hreln = htm.hreln; int total_micron = htm.total_micron; int micron1 = htm.micron1; int micron2 = htm.micron2; int microd = htm.microd; System.out.println("I will insert webstats in the DB\n"); webstatsStmBuild.setLength(0); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`scripts_cnt`=? "); webstatsStmBuild .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, scripts_cnt); stmt.setString(2, urlString); stmt.setString(3, quer); if (j < results_number) { stmt.setInt(4, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(4, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(4, 2);//2 for bing } stmt.setString(5, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); System.out.println("I inserted webstats in the DB\n"); System.out.println("I will insert semantic stats in the DB\n"); StringBuilder semanticstatsStmBuild = new StringBuilder(); semanticstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); semanticstatsStmBuild.append("`schema.org_entities`=? , "); semanticstatsStmBuild.append("`hreltags`=? "); semanticstatsStmBuild.append( "WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(semanticstatsStmBuild.toString()); stmt.setInt(1, nschem); stmt.setInt(2, hreln); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); StringBuilder semanticstatsStmBuild = new StringBuilder(); semanticstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); semanticstatsStmBuild.append("`total_microformats`=? , "); semanticstatsStmBuild.append("`Microformats-1`=? "); semanticstatsStmBuild.append( "WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(semanticstatsStmBuild.toString()); stmt.setInt(1, total_micron); stmt.setInt(2, micron1); stmt.setString(3, urlString); stmt.setString(4, quer); if (j < results_number) { stmt.setInt(5, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(5, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(5, 2);//2 for bing } stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } try { conn = DriverManager.getConnection(url, user, password); StringBuilder semanticstatsStmBuild = new StringBuilder(); semanticstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); semanticstatsStmBuild.append("`Microformats-2`=? , "); semanticstatsStmBuild.append("`Microdata`=? , "); semanticstatsStmBuild.append("`FOAF_HTML`=? "); semanticstatsStmBuild.append( "WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); stmt = conn.prepareStatement(semanticstatsStmBuild.toString()); stmt.setInt(1, micron2); stmt.setInt(2, microd); stmt.setInt(3, htm.foaf); stmt.setString(4, urlString); stmt.setString(5, quer); if (j < results_number) { stmt.setInt(6, 0);//0 for yahoo } else if (j < results_number * 2) { stmt.setInt(6, 1);//1 for google } else if (j < results_number * 3) { stmt.setInt(6, 2);//2 for bing } stmt.setString(7, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println("I inserted semantic stats in the DB\n"); } } } } String[] parse_output; if (ContentSemantics.get(3) || ContentSemantics.get(1)) { //we perform LDA or TFIDF analysis to the links obtained if (!enginechoice.get(3)) { if (enginechoice.get(2)) {//Yahoo parse_output = ld.perform(links_yahoo, domain, "yahoo", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(), ContentSemantics.get(1), ContentSemantics.get(3), config_path); int j = 0; for (String s : parse_output) { parseOutputList.put(j, s); j++; } System.gc(); } if (enginechoice.get(1)) {//Google parse_output = ld.perform(links_google, domain, "google", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(), ContentSemantics.get(1), ContentSemantics.get(3), config_path); int j = results_number; for (String s : parse_output) { parseOutputList.put(j, s); j++; } System.gc(); } if (enginechoice.get(0)) {//Bing parse_output = ld.perform(links_bing, domain, "bing", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(), ContentSemantics.get(1), ContentSemantics.get(3), config_path); int j = results_number * 2; for (String s : parse_output) { parseOutputList.put(j, s); j++; } System.gc(); } } /*else{ System.gc();//links_total parse_output=ld.perform(links_total, domain, "merged", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(),"Merged",ContentSemantics.get(1),ContentSemantics.get(3), config_path); Collections.addAll(parseOutputList, parse_output); System.gc(); }*/ } } System.gc(); List<String> wordList = null; //hashmap for every engine, with topics, words and probability of each word HashMap<String, HashMap<Integer, HashMap<String, Double>>> enginetopicwordprobmap = new HashMap<>(); List<String> lda_output = new ArrayList<>(); if (ContentSemantics.get(3)) { //get the top content from TFIDF System.out.println("i ll try to read the keys"); wordList = ld.return_topWordsTFIDF(); System.out.println("i returned the wordlist to search analysis"); } else if (ContentSemantics.get(0)) {//get the wordlist from Diffbot Diffbot db = new Diffbot(); wordList = db.compute(links_total, directory_save, config_path); } else if (ContentSemantics.get(2)) {//get the wordllist from Sensebot Sensebot sb = new Sensebot(); wordList = sb.compute(links_total, directory_save, SensebotConcepts, config_path); } else { //get the top content from LDA System.out.println("i ll try to read the keys"); LDAtopicsWords rk = new LDAtopicsWords(); enginetopicwordprobmap = rk.readFile(directory_save, SWebRankSettings.get(4), SWebRankSettings.get(3).intValue(), SWebRankSettings.get(1).intValue(), SWebRankSettings.get(11).intValue()); JSONArray ArrayEngineLevel = new JSONArray(); List<String> ids = new ArrayList<>(); //Node node = nodeBuilder().client(true).clusterName("lshrankldacluster").node(); //Client client = node.client(); Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "lshrankldacluster") .build(); Client client = new TransportClient(settings) .addTransportAddress(new InetSocketTransportAddress("localhost", 9300)); //save in elastic search the produced by LDA distributions of words over topics for every engine for (String engine : enginetopicwordprobmap.keySet()) { HashMap<Integer, HashMap<String, Double>> topicwordprobmap = new HashMap<>(); topicwordprobmap = enginetopicwordprobmap.get(engine); JSONObject objEngineLevel = new JSONObject(); JSONArray ArrayTopicLevel = new JSONArray(); //for every topic get the words and their probability for (Integer topicindex : topicwordprobmap.keySet()) { JSONObject objTopicLevel = new JSONObject(); objTopicLevel.put("topic", topicindex); JSONObject objmap = new JSONObject(topicwordprobmap.get(topicindex)); Set keySet = objmap.keySet(); Iterator iterator = keySet.iterator(); while (iterator.hasNext()) { String word = iterator.next().toString(); if (!lda_output.contains(word)) { lda_output.add(word); } //get the words in a separate list } objTopicLevel.put("wordsmap", objmap);//write the words in elastic search ArrayTopicLevel.add(objTopicLevel); } objEngineLevel.put("engine", engine); objEngineLevel.put("query", quer); objEngineLevel.put("domain", domain); objEngineLevel.put("iteration", iteration_counter); objEngineLevel.put("TopicsWordMap", ArrayTopicLevel); ArrayEngineLevel.add(objEngineLevel); String id = domain + "/" + quer + "/" + engine + "/" + iteration_counter;//create unique id for the elasticsearch document ids.add(id);//add to the ids list which contains the ids of the current round List<String> elasticIndexes = ri.GetKeyFile(config_path, "elasticSearchIndexes"); IndexRequest indexReq = new IndexRequest(elasticIndexes.get(2), "content", id); indexReq.source(objEngineLevel); IndexResponse indexRes = client.index(indexReq).actionGet(); } //node.close(); client.close(); ElasticGetWordList elasticGetwordList = new ElasticGetWordList();//get the wordlist from elastic search for the ids from the current round wordList = elasticGetwordList.get(ids, config_path); DataManipulation datamanipulation = new DataManipulation(); wordList = datamanipulation.clearListString(wordList); System.out.println("i returned the wordlist to search analysis"); } //get some stats regarding the entities, categories and parsed content from each link comparing it to the top words produced by lda for (int j = 0; j < links_total.length; j++) { if (links_total[j] != null) { String urlString = links_total[j]; if (urlString.length() > 199) { urlString = links_total[j].substring(0, 198); } int rank = -1; int engine = -1;//0 for yahoo,1 for google,2 for bing if (j < results_number) { rank = j; engine = 0; } else if (j < results_number * 2) { rank = j - results_number; engine = 1; } else if (j < results_number * 3) { rank = j - results_number * 2; engine = 2; } LDAsemStats ldaSemStats = new LDAsemStats();//get the stats by comparing the top words produced by LDA and the parsed content //check the LDAsemStats class for more StringBuilder webstatsStmBuild = new StringBuilder(); if (!parseOutputList.isEmpty()) { if (!parseOutputList.get(j).equalsIgnoreCase("") && !parseOutputList.get(j).equalsIgnoreCase("null") && (parseOutputList.get(j).length() > 0)) { ldaSemStats.getTopWordsStats(parseOutputList.get(j), lda_output, false);//without stemming int top_words_lda = ldaSemStats.getTopStats(); double top_words_lda_per = ldaSemStats.getTopPercentageStats(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`top_words_lda`=? , "); webstatsStmBuild.append("`top_words_lda_per`=? "); webstatsStmBuild .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, top_words_lda); stmt.setDouble(2, top_words_lda_per); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } ldaSemStats.getTopWordsStats(parseOutputList.get(j), lda_output, true);//with stemming int top_words_lda_stem = ldaSemStats.getTopStats(); double top_words_lda_per_stem = ldaSemStats.getTopPercentageStats(); webstatsStmBuild = new StringBuilder(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`top_words_lda_stem`=? , "); webstatsStmBuild.append("`top_words_lda_per_stem`=? "); webstatsStmBuild .append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, top_words_lda_stem); stmt.setDouble(2, top_words_lda_per_stem); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } } if (EntitiesMapDBP.get(j) != null && CategoriesMapDBP.get(j) != null) { //we are going to check if semantic entities and categories recognized exist in the lda words recognized as prominent //we are going to use DBPEDIA spotligh and Dandelion named Entity Extraction API //and stemming through Snowball Stemmer ldaSemStats.getEntCatStats(EntitiesMapDBP.get(j), CategoriesMapDBP.get(j), lda_output, false); int ent_cnt_dbpspot_lda = ldaSemStats.getEntStats(); int cat_cnt_dbpspot_lda = ldaSemStats.getCategoryStats(); webstatsStmBuild = new StringBuilder(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`ent_cnt_dbpspot_lda`=? , "); webstatsStmBuild.append("`cat_cnt_dbpspot_lda`=? "); webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, ent_cnt_dbpspot_lda); stmt.setInt(2, cat_cnt_dbpspot_lda); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } ldaSemStats.getEntCatStats(EntitiesMapDBP.get(j), CategoriesMapDBP.get(j), lda_output, true); int ent_cnt_dbpspot_lda_stem = ldaSemStats.getEntStats(); int cat_cnt_dbpspot_lda_stem = ldaSemStats.getCategoryStats(); webstatsStmBuild = new StringBuilder(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`ent_cnt_dbpspot_lda_stem`=? , "); webstatsStmBuild.append("`cat_cnt_dbpspot_lda_stem`=? "); webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, ent_cnt_dbpspot_lda_stem); stmt.setInt(2, cat_cnt_dbpspot_lda_stem); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (EntitiesMapDand.get(j) != null && CategoriesMapDand.get(j) != null) { ldaSemStats.getEntCatStats(EntitiesMapDand.get(j), CategoriesMapDand.get(j), lda_output, false); int ent_cnt_dand_lda = ldaSemStats.getEntStats(); int cat_cnt_dand_lda = ldaSemStats.getCategoryStats(); webstatsStmBuild = new StringBuilder(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`ent_cnt_dand_lda`=? , "); webstatsStmBuild.append("`cat_cnt_dand_lda`=? "); webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, ent_cnt_dand_lda); stmt.setInt(2, cat_cnt_dand_lda); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } ldaSemStats.getEntCatStats(EntitiesMapDand.get(j), CategoriesMapDand.get(j), lda_output, true); int ent_cnt_dand_lda_stem = ldaSemStats.getEntStats(); int cat_cnt_dand_lda_stem = ldaSemStats.getCategoryStats(); webstatsStmBuild = new StringBuilder(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`ent_cnt_dand_lda_stem`=? , "); webstatsStmBuild.append("`cat_cnt_dand_lda_stem`=? "); webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, ent_cnt_dand_lda_stem); stmt.setInt(2, cat_cnt_dand_lda_stem); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } if (EntitiesMapYahoo.get(j) != null && CategoriesMapYahoo.get(j) != null) { //we are going to check if semantic entities and categories recognized exist in the lda words recognized as prominent //we are going to use DBPEDIA spotligh and Dandelion named Entity Extraction API //and stemming through Snowball Stemmer ldaSemStats.getEntCatStats(EntitiesMapYahoo.get(j), CategoriesMapYahoo.get(j), lda_output, false); int ent_cnt_y_lda = ldaSemStats.getEntStats(); int cat_cnt_y_lda = ldaSemStats.getCategoryStats(); webstatsStmBuild = new StringBuilder(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`ent_cnt_y_lda`=? , "); webstatsStmBuild.append("`cat_cnt_y_lda`=? "); webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, ent_cnt_y_lda); stmt.setInt(2, cat_cnt_y_lda); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } ldaSemStats.getEntCatStats(EntitiesMapYahoo.get(j), CategoriesMapYahoo.get(j), lda_output, true); int ent_cnt_y_lda_stem = ldaSemStats.getEntStats(); int cat_cnt_y_lda_stem = ldaSemStats.getCategoryStats(); webstatsStmBuild = new StringBuilder(); webstatsStmBuild.append("UPDATE SEMANTICSTATS SET "); webstatsStmBuild.append("`ent_cnt_y_lda_stem`=? , "); webstatsStmBuild.append("`cat_cnt_y_lda_stem`=? "); webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?"); try { conn = DriverManager.getConnection(url, user, password); stmt = conn.prepareStatement(webstatsStmBuild.toString()); stmt.setInt(1, ent_cnt_y_lda_stem); stmt.setInt(2, cat_cnt_y_lda_stem); stmt.setString(3, urlString); stmt.setString(4, quer); stmt.setInt(5, engine); stmt.setString(6, domain); stmt.executeUpdate(); } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } } } return wordList; } catch (NullPointerException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); ArrayList<String> finalList = new ArrayList<>(); return finalList; } catch (SQLException | ElasticsearchException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); ArrayList<String> finalList = new ArrayList<>(); return finalList; } finally { try { if (stmt != null) stmt.close(); if (conn != null) conn.close(); } catch (SQLException ex) { Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex); } } } }