Java tutorial
/* Copyright 2014 MITRE Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.mitre.provenance.capture.linux; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.FilenameFilter; import java.io.IOException; import java.lang.management.ManagementFactory; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.UserPrincipal; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.logging.Logger; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.mitre.provenance.Metadata; import org.mitre.provenance.PLUSException; import org.mitre.provenance.client.AbstractProvenanceClient; import org.mitre.provenance.client.LocalProvenanceClient; import org.mitre.provenance.client.ProvenanceClient; import org.mitre.provenance.contenthash.ContentHasher; import org.mitre.provenance.contenthash.SHA256ContentHasher; import org.mitre.provenance.db.neo4j.Neo4JPLUSObjectFactory; import org.mitre.provenance.npe.NonProvenanceEdge; import org.mitre.provenance.plusobject.PLUSEdge; import org.mitre.provenance.plusobject.PLUSFile; import org.mitre.provenance.plusobject.PLUSInvocation; import org.mitre.provenance.plusobject.PLUSObject; import org.mitre.provenance.plusobject.PLUSWorkflow; import org.mitre.provenance.plusobject.ProvenanceCollection; import org.mitre.provenance.tools.LRUCache; import org.mitre.provenance.user.User; /** * This class is an operating system monitoring class for UNIX-based operating systems which support the proc filesystem. * For more information about procfs, see http://en.wikipedia.org/wiki/Procfs * * <p>Basically, this polls available OS information about processes that are running, and then saves that information as provenance. * The OS will tell us for example which process IDs (PIDs) have which files open for read and write, and what the command line is * of the application that executed. * r * <p>We have to apply a few basic fingerprinting techniques to avoid logging duplicates. * * <p>This code could doubtless see many improvements, but it's a basic proof of concept for how to collect provenance in real systems. * For many users, this kind of provenance would be seen as too granular, but it can produce some very interesting findings; in * particular, because we use content-bound identifiers on everything that we encounter, this can establish linkages between * different processes that read and use the same files. * * <p>A major weakness of this capture approach is that you can never know when in the process lifecycle to scan a particular PID. * Which assets the process is using vary dramatically (particularly for long-lived processes) depending on when you hit it in * the lifecycle. Improvements should focus around appending in subsequent polls. * * @author moxious */ public class PROCtor { protected static final Logger log = Logger.getLogger(PROCtor.class.getName()); protected String myPID = null; public static final LRUCache<String, PLUSObject> cache = new LRUCache<String, PLUSObject>(1000); protected HashSet<String> pollPIDs = new HashSet<String>(); protected static AbstractProvenanceClient client = new LocalProvenanceClient(); protected SHA256ContentHasher hasher = new SHA256ContentHasher(); public static final String UUID_KEY = "file_uuid"; /** * Signals that an object already exists. * @author david */ public static class ExistsException extends PLUSException { private static final long serialVersionUID = 11233123L; protected PLUSObject o; public ExistsException(PLUSObject obj) { this.o = obj; } public PLUSObject getObject() { return o; } } public void addPID(String pid) { pollPIDs.add(pid); } //HashMap<String,PLUSObject> cache = new HashMap<String,PLUSObject>(); protected static File PROC = new File("/proc"); public PROCtor() throws Exception { myPID = PROCtor.getMyPID(); } public void run(long pollTimeoutMs, int times) throws Exception { int x = 0; while (true) { if (times > 0 && x >= times) break; poll(); Thread.sleep(pollTimeoutMs); x++; } } protected List<String> slurpLines(File f) { BufferedReader br = null; ArrayList<String> lines = new ArrayList<String>(); try { br = new BufferedReader(new FileReader(f)); String line = null; while ((line = br.readLine()) != null) lines.add(line); return lines; } catch (IOException exc) { return null; } finally { try { br.close(); } catch (IOException e) { ; } } } // End slurpLines /** * Read the complete contents of a file and return them as a string. Simple utility for tiny files. * @param f file to read. * @return the complete text contents */ protected String slurp(File f) { BufferedReader br = null; try { br = new BufferedReader(new FileReader(f)); StringBuffer b = new StringBuffer(""); String line = null; while ((line = br.readLine()) != null) b.append(line); return b.toString(); } catch (IOException ioe) { return null; } finally { try { br.close(); } catch (IOException e) { ; } } } /** * Computes a special identifier for files based on their path and when they were last modified. This is not a content-bound identifier, * but can be used in case a duplicate file has been seen on the same system. * @param f the file to use * @return a string identifier * @throws NoSuchAlgorithmException * @throws IOException */ protected String getIDForFile(File f) throws NoSuchAlgorithmException, IOException { // Unique ID for a file based on its absolute pathname, and last modified date. // When this hash value changes, you know it's a different file. String stamp = f.getCanonicalPath() + "-" + f.lastModified(); return ContentHasher.formatAsHexString(hasher.hash(new ByteArrayInputStream(stamp.getBytes()))); } /** * Polls through all available items in the proc fs, and processes them individually. * @throws IOException * @throws NoSuchAlgorithmException * @throws PLUSException */ protected void poll() throws IOException, NoSuchAlgorithmException, PLUSException { String[] PIDs = PROC.list(new FilenameFilter() { public boolean accept(File dir, String name) { // Match only filenames that are entirely numeric. // These filenames correspond to system PIDs (process IDs) return name.matches("^[0-9]+$"); } }); for (String pid : PIDs) { if (pid.equals(myPID)) continue; // Don't process myself. if (pollPIDs.isEmpty() || pollPIDs.contains(pid)) processPID(new File(PROC, pid)); } } protected ProcFDInfo getFDInfo(File procPID, String fd) { File fdInfoFile = new File(new File(procPID, "fdinfo"), fd); if (!fdInfoFile.exists()) return null; List<String> lines = slurpLines(fdInfoFile); String flags = null; String pos = null; for (String line : lines) { if (line.indexOf(':') != -1) { String[] toks = line.split("[ \\t]+"); if (toks[0].contains("pos")) pos = toks[1]; else if (toks[0].contains("flags")) flags = toks[1]; else log.warning("Unexpected line '" + line + "' in " + fdInfoFile.getAbsolutePath()); } else // Ignore other lines, (inotify, tfd, eventfd-count, others) continue; if (flags != null && pos != null) break; } // Shouldn't happen... if (pos == null || flags == null) return null; return new ProcFDInfo(pos, flags); } /** * Processes a PID identified by a particular /proc filesystem path, and creates the necessary provenance objects. * @param procPID * @throws IOException * @throws NoSuchAlgorithmException * @throws PLUSException */ protected void processPID(File procPID) throws IOException, NoSuchAlgorithmException, PLUSException { if (!procPID.exists()) { log.warning("PID " + procPID + " doesn't exist."); return; } PLUSInvocation inv = createOrRetrieveInvocation(procPID); if (inv == null) return; String[] fileDescriptors = null; File fds = new File(procPID, "fd"); fileDescriptors = fds.list(); if (fileDescriptors == null) { return; } // No permissions here. ProvenanceCollection pcol = new ProvenanceCollection(); boolean revisiting = false; if (client.exists(inv) != null) revisiting = true; else pcol.addNode(inv); List<String> inputs = new ArrayList<String>(); List<String> outputs = new ArrayList<String>(); List<String> related = new ArrayList<String>(); for (String fdName : fileDescriptors) { File fdFile = new File(fds, fdName); // We get the canonical file to resolve the procfs symlink, so that // we're gathering metadata about the file, and not a symlink to the file. File canonical = fdFile.getCanonicalFile(); boolean previouslyWritten = false; PLUSObject fdObj = null; // This is what will let us know whether the file was open for input/output, or whatever. ProcFDInfo fdInfo = getFDInfo(procPID, fdName); if (fdInfo == null) { log.warning("Couldn't get fdInfo for " + procPID + "/fdinfo/" + fdName); continue; } try { fdObj = createOnlyIfNew(canonical); } catch (ExistsException e) { // There is a valid file here, but we've already seen it. That means don't add it // to the collection or try to re-write it. previouslyWritten = true; fdObj = e.getObject(); } if (fdObj == null) continue; if (!previouslyWritten) { fdObj.getMetadata().put("unix:fd", fdName); pcol.addNode(fdObj); } // It's an output if we're appending to it, creating it, writing only to it, or truncating it. if (fdInfo.O_APPEND() || fdInfo.O_CREAT() || fdInfo.O_WRONLY() || fdInfo.O_TRUNC()) outputs.add("" + fdObj.getMetadata().get(UUID_KEY)); // It's an input if we're read only. else if (fdInfo.O_RDONLY()) inputs.add("" + fdObj.getMetadata().get(UUID_KEY)); else if (fdInfo.O_RDWR()) related.add("" + fdObj.getMetadata().get(UUID_KEY)); else { log.warning("Ambiguous mode for " + procPID + "/fdinfo/" + fdName + ": " + fdInfo.getFlags()); } if (fdFile.canWrite()) outputs.add("" + fdObj.getMetadata().get(UUID_KEY)); else inputs.add("" + fdObj.getMetadata().get(UUID_KEY)); String file_uuid = "" + fdObj.getMetadata().get(UUID_KEY); if (previouslyWritten) pcol.addNonProvenanceEdge(new NonProvenanceEdge(fdObj, file_uuid, UUID_KEY)); } for (String id : inputs) { PLUSObject o = (PLUSObject) cache.get(id); if (o != null) pcol.addEdge(new PLUSEdge(o, inv)); } for (String id : outputs) { PLUSObject o = (PLUSObject) cache.get(id); if (o != null) pcol.addEdge(new PLUSEdge(inv, o)); } for (String id : related) { // Just mark these as "contributing". PLUSObject o = (PLUSObject) cache.get(id); if (o != null) pcol.addEdge(new PLUSEdge(o, inv, PLUSWorkflow.DEFAULT_WORKFLOW, PLUSEdge.EDGE_TYPE_CONTRIBUTED)); } boolean written = false; if (pcol.countNodes() > 0) written = client.report(pcol); if (written) log.info((revisiting ? "REVISITED" : "NEW") + ": " + inv.getMetadata().get("cmdline") + " PID " + inv.getMetadata().get("pid") + " => " + inputs.size() + " inputs, " + outputs.size() + " outputs. Total written=" + written); } public boolean isSymlink(File file) throws IOException { if (file == null) return false; File canon; if (file.getParent() == null) canon = file; else { File canonDir = file.getParentFile().getCanonicalFile(); canon = new File(canonDir, file.getName()); } return !canon.getCanonicalFile().equals(canon.getAbsoluteFile()); } /** * Return the PID of the process that PROCtor is running underneath. * @return */ public static String getMyPID() { String pidStr = ManagementFactory.getRuntimeMXBean().getName(); int idx = pidStr.indexOf("@"); if (idx == -1) return pidStr; else return pidStr.substring(0, idx); } /** * Get or create a new PLUSInvocation on the basis of a proc PID file, e.g. /proc/56 (pid 56) * Returns null for insufficient permissions, or when you shouldn't log a particular pid. (For * example, this program will not log its own run) */ public PLUSInvocation createOrRetrieveInvocation(File procPID) throws NoSuchAlgorithmException, IOException { String procFileID = getIDForFile(procPID); if (procFileID == null) return null; String pid = procPID.getName(); if (pid.equals(myPID)) return null; // Don't log myself. String[] children = procPID.list(); if (children == null) return null; // No permissions. if (cache.containsKey(procFileID)) return (PLUSInvocation) cache.get(procFileID); try { ProvenanceCollection results = Neo4JPLUSObjectFactory.loadBySingleMetadataField(User.DEFAULT_USER_GOD, UUID_KEY, procFileID); if (results != null && results.countNodes() > 0) { PLUSInvocation i = (PLUSInvocation) results.getNodes().toArray()[0]; cache.put(procFileID, i); return i; } } catch (PLUSException exc) { exc.printStackTrace(); } long lmod = procPID.lastModified(); String cmdline = slurp(new File(procPID, "cmdline")); File exe = new File(procPID, "exe").getCanonicalFile(); File cwd = new File(procPID, "cwd").getCanonicalFile(); PLUSInvocation inv = new PLUSInvocation(exe.getCanonicalPath()); inv.getMetadata().put("pid", pid); inv.getMetadata().put("cwd", cwd.getCanonicalPath()); inv.getMetadata().put("cmdline", cmdline); inv.getMetadata().put("started", "" + lmod); inv.getMetadata().put(UUID_KEY, procFileID); inv.getMetadata().put(Metadata.CONTENT_HASH_SHA_256, procFileID); Path path = Paths.get(procPID.getAbsolutePath()); UserPrincipal owner = Files.getOwner(path); String username = owner.getName(); try { inv.setOwner(Neo4JPLUSObjectFactory.getActor(username, true)); } catch (PLUSException exc) { log.warning("Failed to set owner for " + inv + ": " + exc.getMessage()); } cache.put(procFileID, inv); // Cache this so we don't go back over it. return inv; } /** * Create a PLUSObject corresponding to a given file, only if that file is new. Note that throwing an * ExistsException is not an error condition, to signal to the caller that provenance already exists. * @param f the file to inspect. * @return a PLUSObject if it is new. * @throws ExistsException if provenance already exists for that object, this will be thrown. * @throws NoSuchAlgorithmException on error * @throws IOException on error. */ public PLUSObject createOnlyIfNew(File f) throws ExistsException, NoSuchAlgorithmException, IOException { if (f == null || !f.exists()) return null; if (!f.isFile()) return null; // Don't log things like sockets right now. String id = getIDForFile(f); if (id == null) { log.warning("Couldn't compute file id for " + f); return null; } if (cache.containsKey(id)) throw new ExistsException(cache.get(id)); ProvenanceCollection results = null; try { results = Neo4JPLUSObjectFactory.loadBySingleMetadataField(User.DEFAULT_USER_GOD, UUID_KEY, id, 1); } catch (PLUSException exc) { exc.printStackTrace(); throw new RuntimeException(exc); } if (results != null && results.countNodes() > 0) { PLUSObject o = (PLUSObject) results.getNodes().toArray()[0]; cache.put(id, o); throw new ExistsException(o); } PLUSFile pf = new PLUSFile(f); pf.getMetadata().put(UUID_KEY, id); if (id != null) cache.put(id, pf); if (f.isFile()) { long fileSize = 0; try { fileSize = f.length(); } catch (Exception exc) { exc.printStackTrace(); return pf; } // Best effort to hash the content. if (fileSize < 1024 * 1024) { FileInputStream fis = null; try { fis = new FileInputStream(f); String sha256hash = ContentHasher.formatAsHexString(hasher.hash(fis)); fis.close(); pf.getMetadata().put(Metadata.CONTENT_HASH_SHA_256, sha256hash); } catch (IOException exc) { ; } finally { if (fis != null) try { fis.close(); } catch (Exception e) { ; } } } } return pf; } public static Options makeCLIOptions() { Options options = new Options(); options.addOption(OptionBuilder.withArgName("pid").hasArg().isRequired(false) .withDescription("If specified, capture only provenance for this single PID and its children.") .create("pid")); options.addOption(OptionBuilder.withArgName("once").hasArg(false).isRequired(false) .withDescription("Poll the PID fs once, and then quit").create("once")); options.addOption(OptionBuilder.withArgName("poll").hasArg(false).isRequired(false) .withDescription("Poll continuously until user interrupts.").create("poll")); return options; } public static void usage() { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("PROCtor", makeCLIOptions()); } /** * If provided with arguments, the program processes only those PIDs. If given no arguments, it starts in polling mode. */ public static void main(String[] args) throws Exception { ProvenanceClient.instance = client; CommandLineParser parser = new GnuParser(); if (!PROC.exists()) { log.severe( "This utility is intended to run on Linux systems with a PROC filesystem. You do not appear to have one (or it is not readable)"); System.exit(1); } try { CommandLine line = parser.parse(makeCLIOptions(), args); String pidArg = line.getOptionValue("pid"); boolean once = line.hasOption("once"); boolean poll = line.hasOption("poll"); System.out.println("Once " + once + " poll " + poll); PROCtor p = new PROCtor(); if (once && poll) { System.err.println("You can't specify both to run once and to poll."); usage(); System.exit(1); } // Default is to poll if user hasn't otherwise specified. if (!poll && !once) poll = true; if (pidArg != null) { System.out.println("PID=" + pidArg); String[] pids = pidArg.split(" +"); for (String pid : pids) { p.addPID(pid); } } if (poll) p.run(5000, -1); else p.run(5000, 1); } catch (ParseException exc) { usage(); System.exit(1); } } } // End PROCtor