Java tutorial
/* * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package esiptestbed.mudrod.weblog.pre; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import java.io.IOException; import java.util.Arrays; import java.util.Map; import java.util.Properties; import java.util.concurrent.ExecutionException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchResponse; import org.joda.time.DateTime; import org.joda.time.Seconds; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.metrics.MetricsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.stats.Stats; import esiptestbed.mudrod.discoveryengine.DiscoveryStepAbstract; import esiptestbed.mudrod.driver.ESDriver; import esiptestbed.mudrod.driver.SparkDriver; import esiptestbed.mudrod.main.MudrodConstants; import esiptestbed.mudrod.weblog.structure.RequestUrl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Supports ability to post-process session, including summarizing statistics and filtering * */ public class SessionStatistic extends DiscoveryStepAbstract { /** * */ private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(SessionStatistic.class); public SessionStatistic(Properties props, ESDriver es, SparkDriver spark) { super(props, es, spark); } @Override public Object execute() { LOG.info("Starting Session Summarization."); startTime = System.currentTimeMillis(); try { processSession(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ExecutionException e) { e.printStackTrace(); } endTime = System.currentTimeMillis(); es.refreshIndex(); LOG.info("Session Summarization complete. Time elapsed {} seconds.", (endTime - startTime) / 1000); return null; } /** * Method to summarize duration, numbers of searching, viewing, and downloading requests, and * filter out suspicious sessions * @throws IOException IOException * @throws InterruptedException InterruptedException * @throws ExecutionException ExecutionException */ public void processSession() throws IOException, InterruptedException, ExecutionException { es.createBulkProcessor(); String inputType = this.cleanupType; String outputType = this.sessionStats; MetricsAggregationBuilder<?> statsAgg = AggregationBuilders.stats("Stats").field("Time"); SearchResponse sr = es.getClient().prepareSearch(props.getProperty(MudrodConstants.ES_INDEX_NAME)) .setTypes(inputType).setQuery(QueryBuilders.matchAllQuery()) .addAggregation( AggregationBuilders.terms("Sessions").field("SessionID").size(0).subAggregation(statsAgg)) .execute().actionGet(); Terms Sessions = sr.getAggregations().get("Sessions"); DateTimeFormatter fmt = ISODateTimeFormat.dateTime(); String min = null; String max = null; DateTime start = null; DateTime end = null; int duration = 0; float request_rate = 0; int session_count = 0; Pattern pattern = Pattern.compile("get (.*?) http/*"); for (Terms.Bucket entry : Sessions.getBuckets()) { if (entry.getDocCount() >= 3 && !entry.getKey().equals("invalid")) { Stats agg = entry.getAggregations().get("Stats"); min = agg.getMinAsString(); max = agg.getMaxAsString(); start = fmt.parseDateTime(min); end = fmt.parseDateTime(max); duration = Seconds.secondsBetween(start, end).getSeconds(); int searchDataListRequest_count = 0; int searchDataRequest_count = 0; int searchDataListRequest_byKeywords_count = 0; int ftpRequest_count = 0; int keywords_num = 0; String IP = null; String keywords = ""; String views = ""; String downloads = ""; QueryBuilder filter_search = QueryBuilders.boolQuery() .must(QueryBuilders.termQuery("SessionID", entry.getKey())); QueryBuilder query_search = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filter_search); SearchResponse scrollResp = es.getClient().prepareSearch(props.getProperty("indexName")) .setTypes(inputType).setScroll(new TimeValue(60000)).setQuery(query_search).setSize(100) .execute().actionGet(); while (true) { for (SearchHit hit : scrollResp.getHits().getHits()) { Map<String, Object> result = hit.getSource(); String request = (String) result.get("Request"); String logType = (String) result.get("LogType"); IP = (String) result.get("IP"); Matcher matcher = pattern.matcher(request.trim().toLowerCase()); while (matcher.find()) { request = matcher.group(1); } String datasetlist = "/datasetlist?"; String dataset = "/dataset/"; if (request.contains(datasetlist)) { searchDataListRequest_count++; RequestUrl requestURL = new RequestUrl(this.props, this.es, null); String info = requestURL.getSearchInfo(request) + ","; if (!info.equals(",")) { if (keywords.equals("")) { keywords = keywords + info; } else { String[] items = info.split(","); String[] keywordList = keywords.split(","); for (int m = 0; m < items.length; m++) { if (!Arrays.asList(keywordList).contains(items[m])) { keywords = keywords + items[m] + ","; } } } } } if (request.startsWith(dataset)) { searchDataRequest_count++; if (findDataset(request) != null) { String view = findDataset(request); if ("".equals(views)) { views = view; } else { if (views.contains(view)) { } else { views = views + "," + view; } } } } if ("ftp".equals(logType)) { ftpRequest_count++; String download = ""; String requestLowercase = request.toLowerCase(); if (requestLowercase.endsWith(".jpg") == false && requestLowercase.endsWith(".pdf") == false && requestLowercase.endsWith(".txt") == false && requestLowercase.endsWith(".gif") == false) { download = request; } if ("".equals(downloads)) { downloads = download; } else { if (downloads.contains(download)) { } else { downloads = downloads + "," + download; } } } } scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()) .setScroll(new TimeValue(600000)).execute().actionGet(); // Break condition: No hits are returned if (scrollResp.getHits().getHits().length == 0) { break; } } if (!keywords.equals("")) { keywords_num = keywords.split(",").length; } if (searchDataListRequest_count != 0 && searchDataListRequest_count <= Integer.parseInt(props.getProperty("searchf")) && searchDataRequest_count != 0 && searchDataRequest_count <= Integer.parseInt(props.getProperty("viewf")) && ftpRequest_count <= Integer.parseInt(props.getProperty("downloadf"))) { String sessionURL = props.getProperty("SessionPort") + props.getProperty("SessionUrl") + "?sessionid=" + entry.getKey() + "&sessionType=" + outputType + "&requestType=" + inputType; session_count++; IndexRequest ir = new IndexRequest(props.getProperty("indexName"), outputType) .source(jsonBuilder().startObject().field("SessionID", entry.getKey()) .field("SessionURL", sessionURL).field("Request_count", entry.getDocCount()) .field("Duration", duration).field("Number of Keywords", keywords_num) .field("Time", min).field("End_time", max) .field("searchDataListRequest_count", searchDataListRequest_count) .field("searchDataListRequest_byKeywords_count", searchDataListRequest_byKeywords_count) .field("searchDataRequest_count", searchDataRequest_count) .field("keywords", es.customAnalyzing(props.getProperty("indexName"), keywords)) .field("views", views).field("downloads", downloads) .field("request_rate", request_rate).field("Comments", "") .field("Validation", 0).field("Produceby", 0).field("Correlation", 0) .field("IP", IP) // .field("Coordinates", loc.latlon) .endObject()); es.getBulkProcessor().add(ir); } } } LOG.info("Session count: {}", Integer.toString(session_count)); es.destroyBulkProcessor(); } /** * Extract the dataset ID from a long request * @param request raw log request * @return dataset ID */ public String findDataset(String request) { String pattern1 = "/dataset/"; String pattern2; if (request.contains("?")) { pattern2 = "?"; } else { pattern2 = " "; } Pattern p = Pattern.compile(Pattern.quote(pattern1) + "(.*?)" + Pattern.quote(pattern2)); Matcher m = p.matcher(request); if (m.find()) { return m.group(1); } return null; } @Override public Object execute(Object o) { return null; } }