Java tutorial
/* * Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.thesmartweb.swebrank; /** * * @author Themis Mavridis */ import java.util.*; import java.util.List; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.node.Node; import static org.elasticsearch.node.NodeBuilder.nodeBuilder; import org.json.simple.JSONObject; /** * Class of the main algorithm functionalities * @author Themistoklis Mavridis */ public class Total_analysis { /** * a list with all the wordlists produced */ protected List<ArrayList<String>> array_wordLists = new ArrayList<>(); /** * a list with all the words from all the wordlists */ protected List<String> wordList_total = new ArrayList<>(); /** * the convergence score */ protected double convergence; /** * Method to call search analysis for every query and to save the wordlists * @param wordList_previous the previous wordlist to check convergence * @param iteration_counter the iteration number * @param example_dir the directory to save the files * @param domain the domain we analyze * @param enginechoice the search engines chosen * @param queries the queries we search for * @param results_number the amount of results for each query * @param top_visible the amount of results if we use Visibility Score (http://www.advancedwebranking.com/user-guide/html/en/ch08s06.html) * @param mozMetrics the metrics of Moz chosen * @param moz_threshold_option flag if we are going to use Moz threshold or not * @param moz_threshold the threshold to moz metrics * @param top_count_moz the amount of links to keep if we use Moz for evaluation * @param ContentSemantics get the choice of Content Semantic Analysis algorithm that we are going to use * @param SensebotConcepts the amount of concepts to be recognized if Sensebot is used * @param SWebRankSettings the settings for LDA and SwebRank in general (check the ReadInput Class) * @param config_path the configuration path to get all the api keys */ public void perform(List<String> wordList_previous, int iteration_counter, String example_dir, String domain, List<Boolean> enginechoice, List<String> queries, int results_number, int top_visible, List<Boolean> mozMetrics, boolean moz_threshold_option, double moz_threshold, int top_count_moz, List<Boolean> ContentSemantics, int SensebotConcepts, List<Double> SWebRankSettings, String config_path) { //for every term of the query String[] it performs the search analysis function //which includes sumbission of the term to the search engines, getting the results according to the options selected //parsing the websites and getting the content and the running LDA on them and getting the top content for (String query : queries) { System.gc(); System.gc(); System.gc(); List<String> wordList = new ArrayList<>(); //we call search analysis that is doing all the work needed and returns to us the wordlists Search_analysis sa = new Search_analysis(); //the following string represents the directory for each query String example_directory = example_dir + query + "-query//"; //we set the alpha variable of the LDA algorithm to the value that is said to be optimal in the paper of LDA, alpha double alpha = 50 / SWebRankSettings.get(1); //we call perform method of search analysis wordList = sa.perform(iteration_counter, example_directory, domain, enginechoice, query, results_number, top_visible, SWebRankSettings, alpha, mozMetrics, top_count_moz, moz_threshold_option, moz_threshold, ContentSemantics, SensebotConcepts, config_path); //we add the wordlist to the vector of word list ArrayList<String> wordArrayList = new ArrayList<>(wordList); array_wordLists.add(wordArrayList); //we add the wordlist and to the total wordlist wordList_total.addAll(wordList); } //we are going to check the convergence rate CheckConvergence cc = new CheckConvergence(); // here we check the convergence between the two wordLists, the new and the previous //the concergence percentage of this iteration, we save it in Elastic Search convergence = cc.ConvergenceCalc(wordList_total, wordList_previous); //Node node = nodeBuilder().client(true).clusterName("lshrankldacluster").node(); //Client client = node.client(); ReadInput ri = new ReadInput(); List<String> elasticIndexes = ri.GetKeyFile(config_path, "elasticSearchIndexes"); Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "lshrankldacluster").build(); Client client = new TransportClient(settings) .addTransportAddress(new InetSocketTransportAddress("localhost", 9300)); JSONObject objEngineLevel = new JSONObject(); objEngineLevel.put("RoundContent", wordList_total); objEngineLevel.put("Round", iteration_counter); objEngineLevel.put("Convergence", convergence); String id = domain + "/" + iteration_counter; IndexRequest indexReq = new IndexRequest(elasticIndexes.get(1), "content", id); indexReq.source(objEngineLevel); IndexResponse indexRes = client.index(indexReq).actionGet(); client.close(); //node.close(); } /** * Getter of convergence score * @return convergence score */ public double getConvergence() { return convergence; } /** * Getter of the total wordlist * @return the list with all the words produced from all queries */ public List<String> getwordList_total() { return wordList_total; } /** * Getter of all the wordlists for each query in separate * @return all the wordlists for each query */ public List<ArrayList<String>> getarray_wordLists() { return array_wordLists; } }