Java tutorial
/* * Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.thesmartweb.swebrank; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.client.Client; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.node.Node; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map.Entry; import java.util.stream.Stream; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import static org.elasticsearch.node.NodeBuilder.nodeBuilder; /** * Class that contains method that retrieve words from an index in the cluster of ElasticSearch where the content is saved * @author Themistoklis Mavridis */ public class ElasticGetWordList { /** * Method gets all the words of all the documents regardless of topic for the ids passed as input * @param ids It contains all the ids for which the words are going to be captured * @param config_path configuration directory to get the names of the elastic search indexes * @return All the words in a List */ public List<String> get(List<String> ids, String config_path) { try { //Node node = nodeBuilder().client(true).clusterName("lshrankldacluster").node(); //Client client = node.client(); Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "lshrankldacluster") .build(); Client client = new TransportClient(settings) .addTransportAddress(new InetSocketTransportAddress("localhost", 9300)); ReadInput ri = new ReadInput(); List<String> elasticIndexes = ri.GetKeyFile(config_path, "elasticSearchIndexes"); List<String> wordList = new ArrayList<>(); for (String id : ids) { SearchResponse responseSearch = client.prepareSearch(elasticIndexes.get(2)) .setSearchType(SearchType.QUERY_AND_FETCH).setQuery(QueryBuilders.idsQuery().ids(id)) .execute().actionGet(); XContentBuilder builder = XContentFactory.jsonBuilder(); builder.startObject(); responseSearch.toXContent(builder, ToXContent.EMPTY_PARAMS); builder.endObject(); String JSONresponse = builder.string(); JsonParser parser = new JsonParser(); JsonObject JSONobject = (JsonObject) parser.parse(JSONresponse); JsonObject hitsJsonObject = JSONobject.getAsJsonObject("hits"); JsonArray hitsJsonArray = hitsJsonObject.getAsJsonArray("hits"); for (JsonElement hitJsonElement : hitsJsonArray) { JsonObject jsonElementObj = hitJsonElement.getAsJsonObject(); jsonElementObj = jsonElementObj.getAsJsonObject("_source"); JsonArray TopicsArray = jsonElementObj.getAsJsonArray("TopicsWordMap"); for (JsonElement Topic : TopicsArray) { JsonObject TopicObj = Topic.getAsJsonObject(); JsonObject wordsmap = TopicObj.getAsJsonObject("wordsmap"); Set<Map.Entry<String, JsonElement>> entrySet = wordsmap.entrySet(); Iterator<Map.Entry<String, JsonElement>> iterator = entrySet.iterator(); while (iterator.hasNext()) { Map.Entry<String, JsonElement> next = iterator.next(); String word = next.getKey(); wordList.add(word); } } } } //node.close(); client.close(); return wordList; } catch (IOException ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); List<String> wordList = new ArrayList<>(); return wordList; } } /** * Method gets all the top N max words for each topic of all the documents with their IDs (of the documents) passed as input. * @param ids It contains all the ids for which the words are going to be captured * @param top It contains the number of max words to be returned * @return All the words in a List */ public List<String> getMaxWords(List<String> ids, int top, String config_path) { try { ReadInput ri = new ReadInput(); List<String> elasticIndexes = ri.GetKeyFile(config_path, "elasticSearchIndexes"); Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "lshrankldacluster") .build(); Client client = new TransportClient(settings) .addTransportAddress(new InetSocketTransportAddress("localhost", 9300)); //Node node = nodeBuilder().client(true).clusterName("lshrankldacluster").node(); //Client client = node.client(); List<String> MaxwordList = new ArrayList<>(); HashMap<String, Double> wordsMap = new HashMap<>(); SortedSetMultimap<Double, String> wordsMultisorted = TreeMultimap.create(); for (String id : ids) {//for every id loop SearchResponse responseSearch = client.prepareSearch(elasticIndexes.get(2)) .setSearchType(SearchType.QUERY_AND_FETCH).setQuery(QueryBuilders.idsQuery().ids(id)) .execute().actionGet();//search for this id //----build an object with the response XContentBuilder builder = XContentFactory.jsonBuilder(); builder.startObject(); responseSearch.toXContent(builder, ToXContent.EMPTY_PARAMS); builder.endObject(); String JSONresponse = builder.string(); //----parse the JSON response JsonParser parser = new JsonParser(); JsonObject JSONobject = (JsonObject) parser.parse(JSONresponse); JsonObject hitsJsonObject = JSONobject.getAsJsonObject("hits"); JsonArray hitsJsonArray = hitsJsonObject.getAsJsonArray("hits"); //get all the JSON hits (check ElasticSearch typical response format for more) for (JsonElement hitJsonElement : hitsJsonArray) { JsonObject jsonElementObj = hitJsonElement.getAsJsonObject(); jsonElementObj = jsonElementObj.getAsJsonObject("_source"); JsonArray TopicsArray = jsonElementObj.getAsJsonArray("TopicsWordMap");//get the topics word map (every word has a probability for (JsonElement Topic : TopicsArray) {//for every topic I get the word with the max score JsonObject TopicObj = Topic.getAsJsonObject(); JsonObject wordsmap = TopicObj.getAsJsonObject("wordsmap");//get the wordmap Set<Map.Entry<String, JsonElement>> entrySet = wordsmap.entrySet(); Iterator<Map.Entry<String, JsonElement>> iterator = entrySet.iterator(); double max = 0.0; String maxword = ""; while (iterator.hasNext()) { Map.Entry<String, JsonElement> next = iterator.next(); if (next.getValue().getAsDouble() > max) { maxword = next.getKey(); max = next.getValue().getAsDouble(); } } if (wordsMap.containsKey(maxword)) { if (wordsMap.get(maxword) < max) { wordsMap.put(maxword, max); } } else { wordsMap.put(maxword, max); } } } } //we are going to sort all the max words Map<String, Double> wordsMapsorted = new HashMap<>(); wordsMapsorted = sortByValue(wordsMap);//sorts the map in ascending fashion Iterator<Entry<String, Double>> iterator = wordsMapsorted.entrySet().iterator(); //we are going to get the first top words from the list of Max words int beginindex = 0; //===we find the beginning index if (wordsMapsorted.entrySet().size() > top) { beginindex = wordsMapsorted.entrySet().size() - top; } int index = 0; //if the beginning index is larger we try to find the element while (index < beginindex) { iterator.next(); index++; } //while the maxword list size is smaller than the top number and we have an extra value, add this word while (MaxwordList.size() < top && iterator.hasNext()) { String word = iterator.next().getKey(); MaxwordList.add(word); } client.close(); //node.close(); return MaxwordList; } catch (IOException ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); List<String> MaxwordList = new ArrayList<>(); return MaxwordList; } } /** * Method that sorts a Map * @param <K> any primitive * @param <V> any primitive * @param map the map to be sorted * @return The map sorted in ascending fashion */ public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map) { Map<K, V> result = new LinkedHashMap<>(); Stream<Entry<K, V>> st = map.entrySet().stream(); st.sorted(Comparator.comparing(e -> e.getValue())).forEach(e -> result.put(e.getKey(), e.getValue())); return result; } }