ingestor.SingleFileProcessor.java Source code

Java tutorial

Introduction

Here is the source code for ingestor.SingleFileProcessor.java

Source

//Copyright (c) 2015 Hitachi Data Systems, Inc.
//All Rights Reserved.
//
//   Licensed under the Apache License, Version 2.0 (the "License"); you may
//   not use this file except in compliance with the License. You may obtain
//   a copy of the License at
//
//         http://www.apache.org/licenses/LICENSE-2.0
//
//   Unless required by applicable law or agreed to in writing, software
//   distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
//   WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
//   License for the specific language governing permissions and limitations
//   under the License.
//
//Package: COMET::Data Ingestor Service
//Author: Chris Delezenski <chris.delezenski@hdsfed.com>
//Compilation Date: 2015-05-06
//License: Apache License, Version 2.0
//Version: 1.21.0
//(RPM) Release: 1
//SVN: r554

//NOTE: this code was originally developed by Cliff Grimm <clifford.grimm@hds.com>

package ingestor;

import ingestor.metadata.CustomMetadataExtractor;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Date;
import java.util.Map;

import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpResponseException;
import org.apache.http.entity.InputStreamEntity;

import com.hds.hcp.apihelpers.HCPUtils;
import com.hdsfed.cometapi.AnnotationHelper;
import com.hdsfed.cometapi.CometProperties;
import com.hdsfed.cometapi.ExtendedLogger;
import com.hdsfed.cometapi.HCPClient;
import com.hdsfed.cometapi.IngestorJSON;
import com.hdsfed.cometapi.StringHelper;
import com.hdsfed.cometapi.ThreadTrackerDB;

//TODO: need to reinforce thread safety in this class
//      since SFP is operating inside of a thread, we want the run() function to handle all exceptions
//      therefore SFP should throw but not catch exceptions
//      consider moving SFP into library
public class SingleFileProcessor {
    //TODO: merge loggers together
    private static ExtendedLogger logger = new ExtendedLogger(SingleFileProcessor.class.getName());
    private static ExtendedLogger ScreenLog = new ExtendedLogger(SingleFileProcessor.class.getName());
    //private static Logger logger = Logger.getLogger(SingleFileProcessor.class.getPackage().getName());
    private WriteStatus eObjectStatus;
    private WriteStatus eCustomMetadataStatus;
    private File mCurrentFile;

    // Local member variables.
    private Boolean bIsInitialized = false;
    //private Boolean bCanUseWholeIO = false;
    private HttpClient mHttpClient;
    //   private String sHCPVersion;
    public String AuthToken;
    private CustomMetadataExtractor mCustomMetadataGenerator;

    //   private static String UNKNOWN_HCP_VERSION = "<unknown>";
    //private CustomThumbnailGenerator mThumbnailGenerator;

    private Map<String, String> annotations = null;
    private String pathPrefix;
    private String workingPath;

    private int threadID = -1;

    /*
     * Return values for CheckExistanceOnHCP method
     */
    public enum ObjectState {
        OBJECT_DOES_NOT_EXIST, OBJECT_ONLY, OBJECT_AND_CUSTOM_METADATA
    };

    public enum WriteStatus {
        WRITE_NOT_ATTEMPTED, WRITE_SUCCESS, WRITE_FAILURE
    }

    SingleFileProcessor(String workingPath, String pathPrefix) {
        eObjectStatus = WriteStatus.WRITE_NOT_ATTEMPTED;
        eCustomMetadataStatus = WriteStatus.WRITE_NOT_ATTEMPTED;
        this.workingPath = workingPath;
        this.pathPrefix = pathPrefix;

        ScreenLog.setDebug(CometProperties.getInstance().getDebug());
        ScreenLog.setSilence(
                !CometProperties.getInstance().getDebug() && !CometProperties.getInstance().getVerbose());
    }

    WriteStatus getObjectStatus() {
        return eObjectStatus;
    };

    WriteStatus getCustomMetadataStatus() {
        return eCustomMetadataStatus;
    };

    File getCurrentFile() {
        return mCurrentFile;
    };

    /**
     * Initialize the object by setting up internal data and establishing the HTTP client connection.
     * 
     * This routine is called by the ReadFromHCP and WriteToHCP routines, so calling it by the
     * consumer of this class is unnecessary.
     *
     */
    void initialize() throws Exception {

        if (!bIsInitialized) // Only initialize if we haven't already
        {
            // Setup properties member.
            CometProperties mProps = CometProperties.getInstance();
            // Setup HTTP Client
            setmHttpClient(HCPUtils.initHttpClient());
            mCustomMetadataGenerator = new CustomMetadataExtractor(this);
            AuthToken = mProps.getAuthToken();

            //TODO: move this functionality into HCPClient
            // Determine if we can use Whole I/O by looking at the version number.
            //sHCPVersion = GetHCPVersion(mProps.getDestinationRootPath().toURI());
            //         sHCPVersion = GetHCPVersion();

            //only support 6.0+
            //         logger.info("Detected HCP Version: " + sHCPVersion);
            //         try {
            //            Float tmpValue = Float.valueOf(sHCPVersion.substring(0, 3));
            //            if (6.0 > tmpValue) {
            //               //bCanUseWholeIO = true;
            //               
            //               logger.info("This version of HCP is not supported");
            //            }
            //         } catch (NumberFormatException x) {
            //            if (! sHCPVersion.equals(UNKNOWN_HCP_VERSION) ) {
            //               throw x;
            //            }
            //         }

            bIsInitialized = true;
        }
    }

    /**
     * Internal function to look at the HCP machine and determine the passed object exists
     *   on the system and whether it has custom-metadata or not.  It accomplishes this by
     *   doing an HCP HTTP REST HEAD request to the object and looks at the metadata returned
     *   about the object.
     * 
     * @param inDestinationPath URL to HCP file to retrieve state.
     * @return ObjectState enumeration value as to the state on the HCP system.
     * @throws ClientProtocolException
     * @throws IOException
     * @throws HttpResponseException
     */
    //TODO: move this function into HCPClient
    //    private String GetHCPVersion_DONOTUSE() {
    //       String retVal = UNKNOWN_HCP_VERSION;
    //      try {
    //         logger.info("Getting HCP Version");
    //         CometProperties mProps=CometProperties.getInstance();
    //         HCPClient client=new HCPClient(mProps);
    //         client.setRootpath(mProps.getDestinationRootPath(pathPrefix));
    //         retVal=client.GetHCPVersion();
    //      } catch (Exception x) {
    //         logger.warning("Unable to determine HCP Version: " + x.getMessage());
    //      }
    //      if(retVal.length()<3) retVal=UNKNOWN_HCP_VERSION;
    //      return retVal;
    //    }

    //TODO: need to replace this with CSV in comet.properties
    boolean blackListCheck(File inFile) {
        return (inFile.getName().equals(".DS_Store") || inFile.getName().equals("thumbs.db")
                || inFile.getName().endsWith(".default.xml") || inFile.getName().startsWith(".svn")
                || inFile.getName().startsWith(".") || inFile.getName().endsWith("tmp")
                || inFile.getName().startsWith("__MACOSX") || inFile.getName().endsWith(".inuse")
                || (inFile.getName().endsWith(".xml") && !CometProperties.getInstance().skipMetadata()));

    }

    private boolean CombineMetadata(File inFile) {
        CometProperties mProps = CometProperties.getInstance();
        return (!mProps.shouldSkipMetadata(inFile) && mProps.shouldCombineAnnotations());
    }

    boolean processFile(File inFile) throws Exception {

        if (CometProperties.WasTerminated()) {
            logger.severe("COMET was externally terminated prematurely");
            return false;
        }

        if (null == inFile) {
            logger.severe("Invalid input parameter.  inFile is null");
            return false;
        }

        if (!bIsInitialized) {
            logger.severe("Programming Error. Object Not Initialized");
            return false;
        }

        mCurrentFile = inFile;
        Boolean skipmd = CometProperties.getInstance().shouldSkipMetadata(mCurrentFile);
        logger.fine("Processing File:" + mCurrentFile.getAbsolutePath());

        //TODO: still useful to track this?
        eObjectStatus = WriteStatus.WRITE_NOT_ATTEMPTED;
        eCustomMetadataStatus = WriteStatus.WRITE_NOT_ATTEMPTED;

        if (!mCurrentFile.exists()) {
            logger.warning("File does not exist: " + mCurrentFile.getAbsolutePath() + " (Skipping)");
            return false;
        }

        if (CometProperties.WasTerminated()) {
            logger.severe("COMET was externally terminated prematurely");
            return false;
        }

        //TODO: verify that shouldUnzip is not active when the source file system is r/o
        if (isArchive(mCurrentFile) && CometProperties.getInstance().shouldUnzip()) {
            ThreadTrackerDB.updateDBOverHTTP(mCurrentFile.getAbsolutePath() + "-unzipping", threadID,
                    mCurrentFile.length());

            //need to improve this black list
            if (UnzipArchive(mCurrentFile)) {
                if (!mCurrentFile.delete()) {
                    logger.warning("Unable to delete archive file: " + mCurrentFile.getAbsolutePath());
                }
            }
            logger.warning(
                    "Archive files are not to be ingested: " + mCurrentFile.getAbsolutePath() + " (Skipping)");
            return false;
        }

        if (CometProperties.WasTerminated()) {
            logger.severe("COMET was externally terminated prematurely");
            return false;
        }

        if (blackListCheck(mCurrentFile)) {
            logger.warning("File is on black list: " + mCurrentFile.getAbsolutePath() + " (Skipping)");
            return false;
        } else if (StringHelper.FileExists(mCurrentFile.getAbsolutePath() + ".inuse")) {
            logger.warning("File is not black list, but is currently locked: " + mCurrentFile.getAbsolutePath()
                    + " (skipping)");
            return false;
        } //else file is not on blacklist and free to move on

        ThreadTrackerDB.updateDBOverHTTP(mCurrentFile.getAbsolutePath(), threadID, mCurrentFile.length());
        boolean success = CometProperties.getSkipUpload()
                || WriteToHCP((new File(workingPath)).getAbsolutePath(), mCurrentFile, skipmd);

        //if writing to HCP fails (eg inappropriate for filetype to write to HCP), just quit
        if (!success)
            return false;

        // If the write to HCP succeeded, 
        //     remove the file we just processed off the local file system..
        //we're not quite done yet, we need to combine annotations into the new default annotation

        if (CometProperties.WasTerminated()) {
            logger.severe("COMET was externally terminated prematurely");
            return false;
        }

        if (CometProperties.getSkipUpload()) {
            skipmd = true;
            //delay execution for a number of seconds to ensure we see the file being fake-processed
            Delay();

        }

        //attempt to write metadata, if allowed to
        if (!skipmd && CombineMetadata(mCurrentFile)) {

            HCPClient client = new HCPClient(CometProperties.getInstance());
            URL encodedURL = AnnotationHelper.FSToURLPath(
                    CometProperties.getInstance().getDestinationRootPath(pathPrefix), workingPath,
                    new File(mCurrentFile.toString()));
            client.setRootpath(CometProperties.getInstance().getDestinationRootPath(pathPrefix));
            String captured = client.HttpPutHCPContent(
                    AnnotationHelper.AnnotationMapToCombinedAnnotation(annotations),
                    AnnotationHelper.PathAndAnnotationToURL(client.getRootpath(), encodedURL.toString(),
                            CometProperties.getInstance().getCombinedAnnotation()));
            //neccesary to capture and show output here?
            ScreenLog.out("output from PUT operation: " + captured);
            success = true;
        } else {
            ScreenLog.out("\tCombineMetadata() returned false, not writing combined annotation for file "
                    + mCurrentFile.toString());
        }

        //TODO: should verify that shouldDeleteSourceFiles() is inactive when read/only
        //TODO: wrap this delete process into another function
        if (success && CometProperties.getInstance().shouldDeleteSourceFiles()) {
            ThreadTrackerDB.updateDBOverHTTP(mCurrentFile.getAbsolutePath() + "-deleting", threadID,
                    mCurrentFile.length());

            ScreenLog.out("\tattempting delete... of  " + mCurrentFile.getPath());
            if (mCurrentFile.delete()) {
                ScreenLog.out("\t\tdelete was successful!");
            } else {
                ScreenLog.out("\t\tdelete was unsuccessful, try force delete");
                logger.warning(
                        "Failed to delete source file with current permissions: " + mCurrentFile.getAbsolutePath());

                if (CometProperties.getInstance().shouldForceDeleteSourceFiles()) {
                    if (mCurrentFile.setWritable(true, true) && !mCurrentFile.delete()) {
                        ScreenLog.out("\t\tforce delete was unsuccessful");
                        logger.warning("Failed to delete source file after attempt to make writable");
                    } else {
                        ScreenLog.out("\t\tforce delete was successful");
                    }
                } else {
                    ScreenLog.out("\t\tforce delete not allowed");
                }
            }
            ScreenLog.out("\tafter delete attempt of " + mCurrentFile.getPath());
        }
        return success;
    }

    private void Delay() throws InterruptedException {
        // TODO Auto-generated method stub
        if (!CometProperties.getIngestDelay())
            return;

        IngestorJSON ij = IngestorJSON.getInstance();

        Thread.sleep(ij.getSize(getThreadID()));

    }

    //verify that a file has been migrated to HCP
    boolean verifyFile(File inFile) throws Exception {

        if (CometProperties.WasTerminated()) {
            logger.severe("COMET was externally terminated prematurely");
            return false;
        }

        if (null == inFile) {
            logger.severe("Invalid input parameter.  inFile is null");
            return false;
        }

        if (!bIsInitialized) {
            logger.severe("Programming Error. Object Not Initialized");
            return false;
        }

        mCurrentFile = inFile;

        //need to build the encodedPathURL
        URL encodedPathURL = AnnotationHelper.FSToURLPath(
                CometProperties.getInstance().getDestinationRootPath(pathPrefix),
                (new File(workingPath)).getAbsolutePath(), mCurrentFile);

        boolean success = CheckExistanceOnHCP(encodedPathURL);

        //if writing to HCP fails (eg inappropriate for filetype to write to HCP), just quit
        if (!success)
            return false;

        return success;
    }

    //TODO: move this function into the library
    private String GetFileHeader(File thisFile) throws InterruptedException, IOException {
        String cmd = "/usr/bin/file " + thisFile.toString();
        logger.info("run command: " + cmd);
        Runtime run = Runtime.getRuntime();
        Process pr = null;
        pr = run.exec(cmd);
        pr.waitFor();
        BufferedReader buf = new BufferedReader(new InputStreamReader(pr.getInputStream()));
        return buf.readLine();
    }

    //TODO: move this function into the library
    private boolean UnzipArchive(File inSourceFile) throws IOException, InterruptedException {
        logger.begin("UnzipArchive(" + inSourceFile + ")");
        String line = "";
        String cmd = "";
        Runtime run = Runtime.getRuntime();
        Process pr = null;
        String srcPath = "", tgtPath = "";
        line = GetFileHeader(inSourceFile);
        srcPath = inSourceFile.getAbsolutePath();
        tgtPath = inSourceFile.getParentFile().getAbsolutePath();

        logger.force("\tsrcPath=" + srcPath);
        logger.force("\ttgtPath=" + tgtPath);

        logger.force("\tline=" + line);
        if (line.contains("gzip compressed data")) {
            cmd = "/bin/tar -C " + tgtPath + " -xvzf " + srcPath;
        } else if (line.contains("xz compressed data")) {
            cmd = "/bin/tar  -C " + tgtPath + " -xvJf " + srcPath;
        } else if (line.contains("Zip archive data")) {
            cmd = "/usr/bin/unzip " + srcPath + " -d " + tgtPath;
        } else {
            logger.force("\t\tUnable to determine compression type, exiting");
            return false;
        }
        logger.force("\t\tcmd=" + cmd);

        //logger.fine("\nexecuting: "+cmd+"\n");
        pr = run.exec(cmd);

        BufferedReader buf = new BufferedReader(new InputStreamReader(pr.getInputStream()));
        BufferedReader buferr = new BufferedReader(new InputStreamReader(pr.getErrorStream()));

        // read everything and output to outputStream as you go
        String s = null;
        ScreenLog.out("===== stdout =====");
        while ((s = buf.readLine()) != null) {
            ScreenLog.out("line=" + s);
        }

        ScreenLog.out("===== stderr =====");
        while ((s = buferr.readLine()) != null) {
            ScreenLog.out("line=" + s);
        }
        pr.waitFor();

        logger.end("UnzipArchive(" + inSourceFile + ")==exit(" + pr.exitValue() + ")");
        return pr.exitValue() == 0;
    }

    private boolean isArchive(File inSourceFile) {
        return (inSourceFile.toString().contains(".mdo") || inSourceFile.toString().contains(".tgz")
                || inSourceFile.toString().contains(".xz") || inSourceFile.toString().contains(".tar.gz")
                || inSourceFile.toString().contains(".zip"));
    }

    //   
    //     * Internal function to look at the HCP machine and determine the passed object exists
    //     *   on the system and whether it has custom-metadata or not.  It accomplishes this by
    //     *   doing an HCP HTTP REST HEAD request to the object and looks at the metadata returned
    //     *   about the object.
    //     * 
    //     * @param inDestinationPath URL to HCP file to retrieve state.
    //     * @return ObjectState enumeration value as to the state on the HCP system.
    //     * @throws ClientProtocolException
    //     * @throws IOException
    //     * @throws HttpResponseException
    //     
    //TODO: deprecate this function and instead directly setup and call client.HCPObjectExists
    //      OR: create static helper functions in HCPClient
    private Boolean CheckExistanceOnHCP(URL encodedPathURL) throws URISyntaxException, IOException {
        HCPClient client = new HCPClient(CometProperties.getInstance());
        client.setRootpath(CometProperties.getInstance().getDestinationRootPath(pathPrefix));
        return client.HCPObjectExists(encodedPathURL);
    }

    /**
     * This method performs a PUT of an object data file and/or custom metadata depending
     * on the state of the object on the HCP system and the configuration of the execution
     * based on the properties file and the HCP system version.
     * @param objectOnly 
     * @throws Exception 
     */
    //TODO: Create our own Exceptions and throw them
    private Boolean WriteToHCP(String inInitialPath, File inSourceFile, boolean objectOnly) throws Exception {
        Boolean retVal = Boolean.TRUE; // Let's be optimistic.
        logger.info("Processing File: " + inSourceFile.getCanonicalPath());

        //Build the destination path based on the source path.
        //TODO: newer functions are not portable and assume Linux file system
        URL encodedPathURL = AnnotationHelper.FSToURLPath(
                CometProperties.getInstance().getDestinationRootPath(pathPrefix), inInitialPath, inSourceFile);
        if (!CheckExistanceOnHCP(encodedPathURL)) {
            retVal = retVal && WriteObjectToHCP(encodedPathURL, inSourceFile);
        }
        if (!objectOnly) {
            retVal = retVal && WriteAnnotationsToHCP(encodedPathURL, inSourceFile);
        }
        return retVal;
    }

    //TODO: consider combining this with WriteToHCP; or moving functionality into HCPClient
    public Boolean WriteObjectToHCP(URL encodedPathURL, File inSourceFile) throws Exception {
        if (inSourceFile.toString().endsWith(".inuse"))
            throw new IOException();
        ScreenLog.begin("Write Object to HCP");
        HCPClient client = new HCPClient(CometProperties.getInstance());
        client.setRootpath(CometProperties.getInstance().getDestinationRootPath(pathPrefix));
        FileInputStream fis = null;
        Boolean fileLock = false;
        //FileLocker fis=null;
        File fileLockFile = new File(inSourceFile.getAbsolutePath() + ".inuse");
        if (CometProperties.getInstance().getUseFileLocking()) {
            if (fileLockFile.exists()) {
                ScreenLog.warning("\tLock file already exists, bail:" + inSourceFile.getAbsolutePath() + ".inuse");
                return true; //acceptable outcome
            } else {
                ScreenLog.out("\tLock file does not exist, create it:" + inSourceFile.getAbsolutePath() + ".inuse");
                StringHelper.touch(fileLockFile);
                fileLock = true;
            }
        } else {
            ScreenLog.fine("\tNot using file locking");
        }

        fis = new FileInputStream(inSourceFile);
        String captured = client.HttpPutHCPContent(new InputStreamEntity(fis, -1), encodedPathURL);

        fis.close();
        if (fileLock) {
            ScreenLog.fine("deleting file lock");
            fileLockFile.delete();
        }
        ScreenLog.out("output from PUT operation: " + captured);
        ScreenLog.out("filename(" + inSourceFile.toString() + ") status code = " + client.getStatusCode() + "\n");

        if (409 == client.getStatusCode()) {
            logger.fine(" Object already exists on HCP, ignore the error for transaction \""
                    + inSourceFile.getAbsolutePath() + "\" to \"" + encodedPathURL);
        }
        // If the return code is anything BUT 200 range indicating success, we have to throw an exception.
        else {
            if (2 != client.getStatusCode() / 100)
                eObjectStatus = WriteStatus.WRITE_FAILURE;
            else {
                eObjectStatus = WriteStatus.WRITE_SUCCESS;
            }
            Date d = new Date();
            logger.force("[" + d.toString() + "] PUT \"" + inSourceFile.getAbsolutePath() + "\" to \""
                    + encodedPathURL + "\" " + client.getStatusCode());
        }
        ScreenLog.end("Write Object to HCP");
        return eObjectStatus == WriteStatus.WRITE_SUCCESS;
    }

    /**
    * Writes custom-metadata ONLY to an already existing object in HCP.
    * 
    * @param encodedPathURL
    * @param inSourceFile
    * @throws Exception 
    */
    //TODO: consider moving this function into HCPClient
    private Boolean WriteAnnotationsToHCP(URL encodedPathURL, File inSourceFile) throws Exception {
        annotations = mCustomMetadataGenerator.generateAnnotations(inSourceFile);
        HCPClient client = new HCPClient(CometProperties.getInstance());
        client.setRootpath(CometProperties.getInstance().getDestinationRootPath(pathPrefix));
        ThreadTrackerDB.updateDBOverHTTP(mCurrentFile.getAbsolutePath() + "-writing-metadata", threadID,
                mCurrentFile.length());

        for (String key : annotations.keySet()) {
            if (key.equals("") || annotations.get(key).equals("") || annotations.get(key).contains("ignore")) {
                //annotations.remove(key);
                logger.fine("annotation " + key + " was blank or marked to be ignored, skipping");
                ScreenLog.out("\tannotation " + key + " was either blank or marked to be ignored, don't ingest");
                annotations.put(key, "");
                continue;
            } else {
                ScreenLog.out("\tannotation " + key + " was neither blank nor marked to be ignored, ingesting...");
            }
            String currentAnnotation = key;
            logger.info("Sending PUT Custom Metadata to URL:  " + encodedPathURL + " for annotation "
                    + currentAnnotation);
            if (annotations.containsKey(key) && !annotations.get(key).equals(""))
                client.HttpPutHCPContent(
                        new InputStreamEntity(new ByteArrayInputStream(annotations.get(key).getBytes()), -1),
                        new URL(encodedPathURL + "?type=custom-metadata&annotation=" + currentAnnotation));
            if (client.getStatusCode() / 100 == 2)
                eCustomMetadataStatus = WriteStatus.WRITE_SUCCESS;
        } //end for loop

        return eCustomMetadataStatus == WriteStatus.WRITE_SUCCESS;
    }

    public Map<String, String> getAnnotations() {
        return mCustomMetadataGenerator.getAnnotations();
    }

    public HttpClient getmHttpClient() {
        return mHttpClient;
    }

    public void setmHttpClient(HttpClient mHttpClient) {
        this.mHttpClient = mHttpClient;
    }

    public String getHCPName() {
        return CometProperties.getInstance().getDestinationHCPName();
    }

    public String getWorkingPath() {
        return workingPath;
    }

    public void setWorkingPath(String workingPath) {
        this.workingPath = workingPath;
    }

    public String getPathPrefix() {
        // TODO Auto-generated method stub
        return pathPrefix;
    }

    public void setPathPrefix(String pathPrefix) {
        // TODO Auto-generated method stub
        this.pathPrefix = pathPrefix;
    }

    public int getThreadID() {
        return threadID;
    }

    public void setThreadID(int threadID) {
        this.threadID = threadID;
    }

}