Java tutorial
/* * Copyright 2014, Hridesh Rajan, Robert Dyer, * and Iowa State University of Science and Technology * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package boa.functions; import java.io.IOException; import java.util.HashMap; import java.util.Stack; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper.Context; import com.google.protobuf.CodedInputStream; import com.google.protobuf.InvalidProtocolBufferException; import boa.types.Ast.*; import boa.types.Code.CodeRepository; import boa.types.Code.Revision; import boa.types.Diff.ChangedFile; import boa.types.Issues.IssueRepository; import boa.types.Issues.IssuesRoot; import boa.types.Shared.ChangeKind; import boa.types.Shared.Person; import boa.types.Toplevel.Project; /** * Boa functions for working with ASTs. * * @author rdyer */ public class BoaAstIntrinsics { @SuppressWarnings("rawtypes") private static Context context; private static MapFile.Reader map, commentsMap, issuesMap; public static enum AST_COUNTER { GETS_ATTEMPTED, GETS_SUCCEED, GETS_FAILED, GETS_FAIL_MISSING, GETS_FAIL_BADPROTOBUF, GETS_FAIL_BADLOC, }; @FunctionSpec(name = "url", returnType = "string", formalParameters = { "ChangedFile" }) public static String changedfileToString(final ChangedFile f) { return f.getKey() + "!!" + f.getName(); } private static final ASTRoot emptyAst = ASTRoot.newBuilder().build(); private static final CommentsRoot emptyComments = CommentsRoot.newBuilder().build(); private static final IssuesRoot emptyIssues = IssuesRoot.newBuilder().build(); /** * Given a ChangedFile, return the AST for that file at that revision. * * @param f the ChangedFile to get a snapshot of the AST for * @return the AST, or an empty AST on any sort of error */ @SuppressWarnings("unchecked") @FunctionSpec(name = "getast", returnType = "ASTRoot", formalParameters = { "ChangedFile" }) public static ASTRoot getast(final ChangedFile f) { // since we know only certain kinds have ASTs, filter before looking up final ChangedFile.FileKind kind = f.getKind(); if (kind != ChangedFile.FileKind.SOURCE_JAVA_ERROR && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS2 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS3 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS4) return emptyAst; context.getCounter(AST_COUNTER.GETS_ATTEMPTED).increment(1); final String rowName = f.getKey() + "!!" + f.getName(); if (map == null) openMap(); try { final BytesWritable value = new BytesWritable(); if (map.get(new Text(rowName), value) == null) { context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } else { final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()); // defaults to 64, really big ASTs require more _stream.setRecursionLimit(Integer.MAX_VALUE); final ASTRoot root = ASTRoot.parseFrom(_stream); context.getCounter(AST_COUNTER.GETS_SUCCEED).increment(1); return root; } } catch (final InvalidProtocolBufferException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1); } catch (final IOException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } catch (final RuntimeException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } catch (final Error e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1); } System.err.println("error with ast: " + rowName); context.getCounter(AST_COUNTER.GETS_FAILED).increment(1); return emptyAst; } /** * Given a ChangedFile, return the comments for that file at that revision. * * @param f the ChangedFile to get a snapshot of the comments for * @return the comments list, or an empty list on any sort of error */ @FunctionSpec(name = "getcomments", returnType = "CommentsRoot", formalParameters = { "ChangedFile" }) public static CommentsRoot getcomments(final ChangedFile f) { // since we know only certain kinds have comments, filter before looking up final ChangedFile.FileKind kind = f.getKind(); if (kind != ChangedFile.FileKind.SOURCE_JAVA_ERROR && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS2 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS3 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS4) return emptyComments; final String rowName = f.getKey() + "!!" + f.getName(); if (commentsMap == null) openCommentMap(); try { final BytesWritable value = new BytesWritable(); if (commentsMap.get(new Text(rowName), value) != null) { final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()); final CommentsRoot root = CommentsRoot.parseFrom(_stream); return root; } } catch (final InvalidProtocolBufferException e) { e.printStackTrace(); } catch (final IOException e) { e.printStackTrace(); } catch (final RuntimeException e) { e.printStackTrace(); } catch (final Error e) { e.printStackTrace(); } System.err.println("error with comments: " + rowName); return emptyComments; } /** * Given an IssueRepository, return the issues. * * @param f the IssueRepository to get issues for * @return the issues list, or an empty list on any sort of error */ @FunctionSpec(name = "getissues", returnType = "IssuesRoot", formalParameters = { "IssueRepository" }) public static IssuesRoot getissues(final IssueRepository f) { if (issuesMap == null) openIssuesMap(); try { final BytesWritable value = new BytesWritable(); if (issuesMap.get(new Text(f.getKey()), value) != null) { final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()); final IssuesRoot root = IssuesRoot.parseFrom(_stream); return root; } } catch (final InvalidProtocolBufferException e) { e.printStackTrace(); } catch (final IOException e) { e.printStackTrace(); } catch (final RuntimeException e) { e.printStackTrace(); } catch (final Error e) { e.printStackTrace(); } System.err.println("error with issues: " + f.getKey()); return emptyIssues; } @SuppressWarnings("rawtypes") public static void setup(final Context context) { BoaAstIntrinsics.context = context; } private static void openMap() { final Configuration conf = new Configuration(); try { final FileSystem fs = FileSystem.get(conf); final Path p = new Path("hdfs://boa-njt/", new Path( context.getConfiguration().get("boa.ast.dir", context.getConfiguration().get("boa.input.dir", "repcache/live")), new Path("ast"))); map = new MapFile.Reader(fs, p.toString(), conf); } catch (final Exception e) { e.printStackTrace(); } } private static void openCommentMap() { final Configuration conf = new Configuration(); try { final FileSystem fs = FileSystem.get(conf); final Path p = new Path("hdfs://boa-njt/", new Path( context.getConfiguration().get("boa.comments.dir", context.getConfiguration().get("boa.input.dir", "repcache/live")), new Path("comments"))); commentsMap = new MapFile.Reader(fs, p.toString(), conf); } catch (final Exception e) { e.printStackTrace(); } } private static void openIssuesMap() { final Configuration conf = new Configuration(); try { final FileSystem fs = FileSystem.get(conf); final Path p = new Path("hdfs://boa-njt/", new Path( context.getConfiguration().get("boa.issues.dir", context.getConfiguration().get("boa.input.dir", "repcache/live")), new Path("issues"))); issuesMap = new MapFile.Reader(fs, p.toString(), conf); } catch (final Exception e) { e.printStackTrace(); } } @SuppressWarnings("rawtypes") public static void cleanup(final Context context) { closeMap(); closeCommentMap(); closeIssuesMap(); } private static void closeMap() { if (map != null) try { map.close(); } catch (final IOException e) { e.printStackTrace(); } map = null; } private static void closeCommentMap() { if (commentsMap != null) try { commentsMap.close(); } catch (final IOException e) { e.printStackTrace(); } commentsMap = null; } private static void closeIssuesMap() { if (issuesMap != null) try { issuesMap.close(); } catch (final IOException e) { e.printStackTrace(); } issuesMap = null; } @FunctionSpec(name = "type_name", returnType = "string", formalParameters = { "string" }) public static String type_name(final String s) { // first, normalize the string final String t = s.replaceAll("<\\s+", "<").replaceAll(",\\s+", ", ").replaceAll("\\s*>\\s*", ">") .replaceAll("\\s*&\\s*", " & ").replaceAll("\\s*\\|\\s*", " | "); if (!t.contains(".")) return t; /* * Remove qualifiers from anywhere in the string... * * SomeType => SomeType * foo.SomeType => SomeType * foo.bar.SomeType => SomeType * SomeType<T> => SomeType<T> * SomeType<T, S> => SomeType<T, S> * SomeType<foo.bar.T, S> => SomeType<T, S> * SomeType<T, foo.bar.S> => SomeType<T, S> * foo.bar.SomeType<T, foo.bar.S<bar.Q>> => SomeType<T, S<Q>> * SomeType|foo.Bar => SomeType|Bar * foo<T>.bar<T> => foo<T>.bar<T> */ return t.replaceAll("[^\\s,<>|]+\\.([^\\s\\[.,><|]+)", "$1"); } /** * A visitor that returns the total number of AST nodes. */ public final static BoaCountingVisitor lenVisitor = new BoaCountingVisitor() { /** {@inheritDoc} */ @Override protected boolean defaultPreVisit() { count++; return true; } /** {@inheritDoc} */ @Override protected boolean preVisit(final Project node) throws Exception { return true; } /** {@inheritDoc} */ @Override protected boolean preVisit(final CodeRepository node) throws Exception { return true; } /** {@inheritDoc} */ @Override protected boolean preVisit(final Revision node) throws Exception { return true; } /** {@inheritDoc} */ @Override protected boolean preVisit(final ChangedFile node) throws Exception { return true; } /** {@inheritDoc} */ @Override protected boolean preVisit(final ASTRoot node) throws Exception { return true; } /** {@inheritDoc} */ @Override protected boolean preVisit(final Person node) throws Exception { return true; } }; /** * */ public static class SnapshotVisitor extends BoaCollectingVisitor<String, ChangedFile> { private long timestamp; private String[] kinds; public SnapshotVisitor initialize(final long timestamp, final String... kinds) { initialize(new HashMap<String, ChangedFile>()); this.timestamp = timestamp; this.kinds = kinds; return this; } /** {@inheritDoc} */ @Override protected boolean preVisit(final Revision node) throws Exception { return node.getCommitDate() <= timestamp; } /** {@inheritDoc} */ @Override protected boolean preVisit(final ChangedFile node) throws Exception { if (node.getChange() == ChangeKind.DELETED) { map.remove(node.getName()); return false; } boolean filter = kinds.length > 0; if (filter) { final String kindName = node.getKind().name(); for (final String kind : kinds) if (kindName.startsWith(kind)) { filter = false; break; } } if (!filter) map.put(node.getName(), node); return false; } } public final static SnapshotVisitor snapshot = new SnapshotVisitor(); @FunctionSpec(name = "getsnapshot", returnType = "array of ChangedFile", formalParameters = { "CodeRepository", "time", "string..." }) public static ChangedFile[] getSnapshot(final CodeRepository cr, final long timestamp, final String... kinds) throws Exception { snapshot.initialize(timestamp, kinds).visit(cr); return snapshot.map.values().toArray(new ChangedFile[0]); } @FunctionSpec(name = "getsnapshot", returnType = "array of ChangedFile", formalParameters = { "CodeRepository", "string..." }) public static ChangedFile[] getSnapshot(final CodeRepository cr, final String... kinds) throws Exception { return getSnapshot(cr, Long.MAX_VALUE, kinds); } @FunctionSpec(name = "getsnapshot", returnType = "array of ChangedFile", formalParameters = { "CodeRepository", "time" }) public static ChangedFile[] getSnapshot(final CodeRepository cr, final long timestamp) throws Exception { return getSnapshot(cr, timestamp, new String[0]); } @FunctionSpec(name = "getsnapshot", returnType = "array of ChangedFile", formalParameters = { "CodeRepository" }) public static ChangedFile[] getSnapshot(final CodeRepository cr) throws Exception { return getSnapshot(cr, Long.MAX_VALUE, new String[0]); } @FunctionSpec(name = "isliteral", returnType = "bool", formalParameters = { "Expression", "string" }) public static boolean isLiteral(final Expression e, final String lit) throws Exception { return e.getKind() == Expression.ExpressionKind.LITERAL && e.hasLiteral() && e.getLiteral().equals(lit); } ////////////////////////////// // Collect Annotations Used // ////////////////////////////// private static class AnnotationCollectingVisitor extends BoaCollectingVisitor<String, Long> { @Override protected boolean preVisit(Modifier node) { if (node.getKind() == Modifier.ModifierKind.ANNOTATION) { final String name = BoaAstIntrinsics.type_name(node.getAnnotationName()); final long count = map.containsKey(name) ? map.get(name) : 0; map.put(name, count + 1); } return true; } } private static AnnotationCollectingVisitor annotationCollectingVisitor = new AnnotationCollectingVisitor(); @FunctionSpec(name = "collect_annotations", returnType = "map[string] of int", formalParameters = { "ASTRoot", "map[string] of int" }) public static HashMap<String, Long> collect_annotations(final ASTRoot f, final HashMap<String, Long> map) throws Exception { annotationCollectingVisitor.initialize(map).visit(f); return annotationCollectingVisitor.map; } /////////////////////////// // Collect Generics Used // /////////////////////////// private static class GenericsCollectingVisitor extends BoaCollectingVisitor<String, Long> { @Override protected boolean preVisit(Type node) { // FIXME /* try { parseGenericType(BoaAstIntrinsics.type_name(node.getName()).trim(), map); } catch (final StackOverflowError e) { System.err.println("STACK ERR: " + node.getName() + " -> " + BoaAstIntrinsics.type_name(node.getName()).trim()); } */ return true; } } private static GenericsCollectingVisitor genericsCollectingVisitor = new GenericsCollectingVisitor(); @FunctionSpec(name = "collect_generic_types", returnType = "map[string] of int", formalParameters = { "ASTRoot", "map[string] of int" }) public static HashMap<String, Long> collect_generic_types(final ASTRoot f, final HashMap<String, Long> map) throws Exception { genericsCollectingVisitor.initialize(map).visit(f); return genericsCollectingVisitor.map; } private static void parseGenericType(final String name, final HashMap<String, Long> counts) { if (!name.contains("<") || name.startsWith("<")) return; if (name.contains("|")) { for (final String s : name.split("\\|")) parseGenericType(s.trim(), counts); return; } if (name.contains("&")) { int count = 0; int last = 0; for (int i = 0; i < name.length(); i++) switch (name.charAt(i)) { case '<': count++; break; case '>': count--; break; case '&': if (count == 0) { parseGenericType(name.substring(last, i).trim(), counts); last = i + 1; } break; default: break; } parseGenericType(name.substring(last).trim(), counts); return; } foundType(name, counts); int start = name.indexOf("<"); final Stack<Integer> starts = new Stack<Integer>(); int lastStart = start + 1; for (int i = lastStart; i < name.lastIndexOf(">"); i++) switch (name.charAt(i)) { case '<': starts.push(lastStart); lastStart = i + 1; break; case '>': if (!starts.empty()) foundType(name.substring(starts.pop(), i + 1).trim(), counts); break; case '&': case '|': case ',': case ' ': case '.': case '\t': lastStart = i + 1; default: break; } } private static void foundType(final String name, final HashMap<String, Long> counts) { final String type = name.endsWith("...") ? name.substring(0, name.length() - 3).trim() : name.trim(); final long count = counts.containsKey(type) ? counts.get(type) : 0; counts.put(type, count + 1); String rawType = type.substring(0, type.indexOf("<")).trim(); if (!type.endsWith(">")) rawType += type.substring(type.lastIndexOf(">") + 1).trim(); final long rawCount = counts.containsKey(rawType) ? counts.get(rawType) : 0; counts.put(rawType, rawCount + 1); } }