Java tutorial
/** * Copyright 2016 Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cc.kave.episodes.mining.evaluation; import static cc.recommenders.assertions.Asserts.assertTrue; import java.io.File; import java.io.IOException; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.io.FileUtils; import org.jgrapht.DirectedGraph; import org.jgrapht.graph.DefaultEdge; import com.google.common.collect.Maps; import com.google.inject.Inject; import com.google.inject.name.Named; import cc.kave.episodes.mining.graphs.EpisodeAsGraphWriter; import cc.kave.episodes.mining.graphs.EpisodeToGraphConverter; import cc.kave.episodes.mining.graphs.TransitivelyClosedEpisodes; import cc.kave.episodes.mining.patterns.MaximalEpisodes; import cc.kave.episodes.mining.reader.MappingParser; import cc.kave.episodes.mining.reader.ReposParser; import cc.kave.episodes.mining.reader.StreamParser; import cc.kave.episodes.model.EnclosingMethods; import cc.kave.episodes.model.Episode; import cc.kave.episodes.model.events.Event; import cc.kave.episodes.model.events.EventKind; import cc.kave.episodes.model.events.Fact; import cc.kave.episodes.postprocessor.EpisodesPostprocessor; import cc.recommenders.io.Logger; public class PatternsIdentifier { private File patternsFolder; private StreamParser streamParser; private MappingParser mappingParser; private EpisodesPostprocessor episodeProcessor; private MaximalEpisodes maxEpisodes; private TransitivelyClosedEpisodes transClosure; private EpisodeToGraphConverter episodeGraphConverter; private EpisodeAsGraphWriter graphWriter; private ReposParser repos; @Inject public PatternsIdentifier(@Named("patterns") File folder, StreamParser streamParser, EpisodesPostprocessor episodes, MappingParser mappingParser, MaximalEpisodes maxEpisodes, ReposParser repos, TransitivelyClosedEpisodes transClosure, EpisodeToGraphConverter episodeGraphConverter, EpisodeAsGraphWriter graphWriter) { assertTrue(folder.exists(), "Patterns folder does not exist"); assertTrue(folder.isDirectory(), "Patterns folder is not a folder, but a file"); this.patternsFolder = folder; this.streamParser = streamParser; this.mappingParser = mappingParser; this.episodeProcessor = episodes; this.maxEpisodes = maxEpisodes; this.repos = repos; this.transClosure = transClosure; this.episodeGraphConverter = episodeGraphConverter; this.graphWriter = graphWriter; } public void trainingCode(int numbRepos, int frequency, double entropy) throws Exception { List<List<Fact>> stream = streamParser.parseStream(numbRepos); List<Event> events = mappingParser.parse(numbRepos); // Logger.log("Number of events: %d", events.size()); Map<Integer, Set<Episode>> postpEpisodes = episodeProcessor.postprocess(numbRepos, frequency, entropy); Map<Integer, Set<Episode>> patterns = maxEpisodes.getMaximalEpisodes(postpEpisodes); // checkMethodSize(stream, events); for (Map.Entry<Integer, Set<Episode>> entry : patterns.entrySet()) { if (entry.getKey() < 2) { continue; } // Episode debug = createDebuggingEpisode(); for (Episode episode : entry.getValue()) { Set<Fact> episodeFacts = episode.getEvents(); EnclosingMethods methodsOrderRelation = new EnclosingMethods(true); for (List<Fact> method : stream) { if (method.size() < 3) { continue; } if (method.containsAll(episodeFacts)) { methodsOrderRelation.addMethod(episode, method, events); // if (episode.equals(debug)) { // Logger.log("Method: %s\noccurrence: %d", // method.toString(), // methodsOrderRelation.getOccurrences()); // } } } if (methodsOrderRelation.getOccurrences() < episode.getFrequency()) { Logger.log("Episode: %s", episode.toString()); Logger.log("Frequency = %d, occurrence = %d", episode.getFrequency(), methodsOrderRelation.getOccurrences()); throw new Exception("Episode is not found sufficient number of times on the training stream!"); } } Logger.log("Processed %d-node patterns!", entry.getKey()); } Logger.log("All patterns are identified in the training data!"); } private void checkMethodSize(List<List<Fact>> stream, List<Event> events) { int largestMethod = 0; int bigMethods = 0; List<Fact> keepMethod = new LinkedList<Fact>(); for (List<Fact> method : stream) { // if (method.size() >= 446) { // continue; // } if (method.size() > 500) { bigMethods++; } if (method.size() > largestMethod) { largestMethod = method.size(); keepMethod = method; } } String methodName = ""; for (Fact fact : keepMethod) { Event event = events.get(fact.getFactID()); if (event.getKind() == EventKind.METHOD_DECLARATION) { methodName = event.getMethod().getDeclaringType().getFullName() + "." + event.getMethod().getName(); } } Logger.log("Size of the largest method is: %d", largestMethod); Logger.log("Method name is: %s", methodName); Logger.log("Methods with more than 500 events are: %d", bigMethods); } private Episode createDebuggingEpisode() { Episode episode = new Episode(); episode.addStringsOfFacts("50", "14", "50>14"); episode.setFrequency(25); episode.setBidirectMeasure(1.0); return episode; } public void validationCode(int numbRepos, int frequency, double entropy) throws Exception { List<Event> trainEvents = mappingParser.parse(numbRepos); Map<Integer, Set<Episode>> patterns = episodeProcessor.postprocess(numbRepos, frequency, entropy); List<Event> stream = repos.validationStream(numbRepos); Map<Event, Integer> mapEvents = mergeTrainingValidationEvents(stream, trainEvents); List<List<Fact>> streamMethods = streamOfMethods(stream, mapEvents); List<Event> listEvents = mapToList(mapEvents); StringBuilder sb = new StringBuilder(); int patternId = 0; for (Map.Entry<Integer, Set<Episode>> entry : patterns.entrySet()) { if ((entry.getKey() < 2) || (entry.getValue().size() == 0)) { continue; } sb.append("Patterns of size: " + entry.getKey() + "-events\n"); sb.append("Pattern\tFrequency\toccurrencesAsSet\toccurrencesOrder\n"); for (Episode episode : entry.getValue()) { EnclosingMethods methodsNoOrderRelation = new EnclosingMethods(false); EnclosingMethods methodsOrderRelation = new EnclosingMethods(true); for (List<Fact> method : streamMethods) { if (method.containsAll(episode.getEvents())) { methodsNoOrderRelation.addMethod(episode, method, listEvents); methodsOrderRelation.addMethod(episode, method, listEvents); } } sb.append(patternId + "\t" + episode.getFrequency() + "\t" + methodsNoOrderRelation.getOccurrences() + "\t" + methodsOrderRelation.getOccurrences() + "\n"); patternsWriter(episode, trainEvents, numbRepos, frequency, entropy, patternId); patternId++; } sb.append("\n"); Logger.log("Processed %d-node patterns!", entry.getKey()); } FileUtils.writeStringToFile(getValidationPath(getPath(numbRepos, frequency, entropy)), sb.toString()); } private void patternsWriter(Episode episode, List<Event> events, int numbRepos, int frequency, double entropy, int pId) throws IOException { Episode closedEpisodes = transClosure.remTransClosure(episode); File filePath = getPath(numbRepos, frequency, entropy); DirectedGraph<Fact, DefaultEdge> graph = episodeGraphConverter.convert(closedEpisodes, events); graphWriter.write(graph, getGraphPaths(filePath, pId)); } private List<Event> mapToList(Map<Event, Integer> events) { List<Event> result = new LinkedList<Event>(); for (Map.Entry<Event, Integer> entry : events.entrySet()) { result.add(entry.getKey()); } return result; } private Map<Event, Integer> mergeTrainingValidationEvents(List<Event> stream, List<Event> events) { Map<Event, Integer> completeEvents = Maps.newLinkedHashMap(); int index = 0; for (Event event : events) { completeEvents.put(event, index); index++; } for (Event event : stream) { if (!completeEvents.containsKey(event)) { completeEvents.put(event, index); index++; } } return completeEvents; } private List<List<Fact>> streamOfMethods(List<Event> stream, Map<Event, Integer> events) { List<List<Fact>> result = new LinkedList<>(); List<Fact> method = new LinkedList<Fact>(); for (Event event : stream) { if (event.getKind() == EventKind.FIRST_DECLARATION) { if (!method.isEmpty()) { result.add(method); method = new LinkedList<Fact>(); } } int index = events.get(event); method.add(new Fact(index)); } if (!method.isEmpty()) { result.add(method); } return result; } private File getPath(int numbRepos, int freqThresh, double bidirectThresh) { File folderPath = new File(patternsFolder.getAbsolutePath() + "/Repos" + numbRepos + "/Freq" + freqThresh + "/Bidirect" + bidirectThresh + "/"); if (!folderPath.isDirectory()) { folderPath.mkdirs(); } return folderPath; } private String getGraphPaths(File folderPath, int patternNumber) { String graphPath = folderPath + "/pattern" + patternNumber + ".dot"; return graphPath; } private File getValidationPath(File folderPath) { File fileName = new File(folderPath.getAbsolutePath() + "/patternsValidation.txt"); return fileName; } }