org.pentaho.di.job.entries.deletefiles.JobEntryDeleteFiles.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.job.entries.deletefiles.JobEntryDeleteFiles.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.job.entries.deletefiles;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.job.entry.validator.AbstractFileValidator;
import org.pentaho.di.job.entry.validator.AndValidator;
import org.pentaho.di.job.entry.validator.JobEntryValidatorUtils;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSelectInfo;
import org.apache.commons.vfs2.FileSelector;
import org.apache.commons.vfs2.FileType;
import org.pentaho.di.cluster.SlaveServer;
import org.pentaho.di.core.CheckResultInterface;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.job.Job;
import org.pentaho.di.job.JobMeta;
import org.pentaho.di.job.entry.JobEntryBase;
import org.pentaho.di.job.entry.JobEntryInterface;
import org.pentaho.di.job.entry.validator.ValidatorContext;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.Repository;
import org.pentaho.di.resource.ResourceEntry;
import org.pentaho.di.resource.ResourceEntry.ResourceType;
import org.pentaho.di.resource.ResourceReference;
import org.pentaho.metastore.api.IMetaStore;
import org.w3c.dom.Node;

/**
 * This defines a 'delete files' job entry.
 *
 * @author Samatar Hassan
 * @since 06-05-2007
 */
public class JobEntryDeleteFiles extends JobEntryBase implements Cloneable, JobEntryInterface {

    private static Class<?> PKG = JobEntryDeleteFiles.class; // for i18n purposes, needed by Translator2!!

    private boolean argFromPrevious;

    private boolean includeSubfolders;

    private String[] arguments;

    private String[] filemasks;

    public JobEntryDeleteFiles(String jobName) {
        super(jobName, "");
        argFromPrevious = false;
        arguments = null;

        includeSubfolders = false;
    }

    public JobEntryDeleteFiles() {
        this("");
    }

    public void allocate(int numberOfFields) {
        arguments = new String[numberOfFields];
        filemasks = new String[numberOfFields];
    }

    public Object clone() {
        JobEntryDeleteFiles jobEntry = (JobEntryDeleteFiles) super.clone();
        if (arguments != null) {
            int nrFields = arguments.length;
            jobEntry.allocate(nrFields);
            System.arraycopy(arguments, 0, jobEntry.arguments, 0, nrFields);
            System.arraycopy(filemasks, 0, jobEntry.filemasks, 0, nrFields);
        }
        return jobEntry;
    }

    public String getXML() {
        StringBuilder retval = new StringBuilder(300);

        retval.append(super.getXML());
        retval.append("      ").append(XMLHandler.addTagValue("arg_from_previous", argFromPrevious));
        retval.append("      ").append(XMLHandler.addTagValue("include_subfolders", includeSubfolders));

        retval.append("      <fields>").append(Const.CR);
        if (arguments != null) {
            for (int i = 0; i < arguments.length; i++) {
                retval.append("        <field>").append(Const.CR);
                retval.append("          ").append(XMLHandler.addTagValue("name", arguments[i]));
                retval.append("          ").append(XMLHandler.addTagValue("filemask", filemasks[i]));
                retval.append("        </field>").append(Const.CR);
            }
        }
        retval.append("      </fields>").append(Const.CR);

        return retval.toString();
    }

    public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers,
            Repository rep, IMetaStore metaStore) throws KettleXMLException {
        try {
            super.loadXML(entrynode, databases, slaveServers);
            argFromPrevious = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "arg_from_previous"));
            includeSubfolders = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "include_subfolders"));

            Node fields = XMLHandler.getSubNode(entrynode, "fields");

            int numberOfFields = XMLHandler.countNodes(fields, "field");
            allocate(numberOfFields);

            for (int i = 0; i < numberOfFields; i++) {
                Node fnode = XMLHandler.getSubNodeByNr(fields, "field", i);

                arguments[i] = XMLHandler.getTagValue(fnode, "name");
                filemasks[i] = XMLHandler.getTagValue(fnode, "filemask");
            }
        } catch (KettleXMLException xe) {
            throw new KettleXMLException(BaseMessages.getString(PKG, "JobEntryDeleteFiles.UnableToLoadFromXml"),
                    xe);
        }
    }

    public void loadRep(Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List<DatabaseMeta> databases,
            List<SlaveServer> slaveServers) throws KettleException {
        try {
            argFromPrevious = rep.getJobEntryAttributeBoolean(id_jobentry, "arg_from_previous");
            includeSubfolders = rep.getJobEntryAttributeBoolean(id_jobentry, "include_subfolders");

            int numberOfArgs = rep.countNrJobEntryAttributes(id_jobentry, "name");
            allocate(numberOfArgs);

            for (int i = 0; i < numberOfArgs; i++) {
                arguments[i] = rep.getJobEntryAttributeString(id_jobentry, i, "name");
                filemasks[i] = rep.getJobEntryAttributeString(id_jobentry, i, "filemask");
            }
        } catch (KettleException dbe) {
            throw new KettleException(BaseMessages.getString(PKG, "JobEntryDeleteFiles.UnableToLoadFromRepo",
                    String.valueOf(id_jobentry)), dbe);
        }
    }

    public void saveRep(Repository rep, IMetaStore metaStore, ObjectId id_job) throws KettleException {
        try {
            rep.saveJobEntryAttribute(id_job, getObjectId(), "arg_from_previous", argFromPrevious);
            rep.saveJobEntryAttribute(id_job, getObjectId(), "include_subfolders", includeSubfolders);

            // save the arguments...
            if (arguments != null) {
                for (int i = 0; i < arguments.length; i++) {
                    rep.saveJobEntryAttribute(id_job, getObjectId(), i, "name", arguments[i]);
                    rep.saveJobEntryAttribute(id_job, getObjectId(), i, "filemask", filemasks[i]);
                }
            }
        } catch (KettleDatabaseException dbe) {
            throw new KettleException(
                    BaseMessages.getString(PKG, "JobEntryDeleteFiles.UnableToSaveToRepo", String.valueOf(id_job)),
                    dbe);
        }
    }

    public Result execute(Result result, int nr) throws KettleException {
        List<RowMetaAndData> resultRows = result.getRows();

        int numberOfErrFiles = 0;
        result.setResult(false);
        result.setNrErrors(1);

        if (argFromPrevious && log.isDetailed()) {
            logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.FoundPreviousRows",
                    String.valueOf((resultRows != null ? resultRows.size() : 0))));
        }

        Multimap<String, String> pathToMaskMap = populateDataForJobExecution(resultRows);

        for (Map.Entry<String, String> pathToMask : pathToMaskMap.entries()) {
            final String filePath = environmentSubstitute(pathToMask.getKey());
            if (filePath.trim().isEmpty()) {
                // Relative paths are permitted, and providing an empty path means deleting all files inside a root pdi-folder.
                // It is much more likely to be a mistake than a desirable action, so we don't delete anything (see PDI-15181)
                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.NoPathProvided"));
                }
            } else {
                final String fileMask = environmentSubstitute(pathToMask.getValue());

                if (parentJob.isStopped()) {
                    break;
                }

                if (!processFile(filePath, fileMask, parentJob)) {
                    numberOfErrFiles++;
                }
            }
        }

        if (numberOfErrFiles == 0) {
            result.setResult(true);
            result.setNrErrors(0);
        } else {
            result.setNrErrors(numberOfErrFiles);
            result.setResult(false);
        }

        return result;
    }

    /**
     * For job execution path to files and file masks should be provided.
     * These values can be obtained in two ways:
     * 1. As an argument of a current job entry
     * 2. As a table, that comes as a result of execution previous job/transformation.
     *
     * As the logic of processing this data is the same for both of this cases, we first
     * populate this data (in this method) and then process it.
     *
     * We are using guava multimap here, because if allows key duplication and there could be a
     * situation where two paths to one folder with different wildcards are provided.
     */
    private Multimap<String, String> populateDataForJobExecution(List<RowMetaAndData> rowsFromPreviousMeta)
            throws KettleValueException {
        Multimap<String, String> pathToMaskMap = ArrayListMultimap.create();
        if (argFromPrevious && rowsFromPreviousMeta != null) {
            for (RowMetaAndData resultRow : rowsFromPreviousMeta) {
                if (resultRow.size() < 2) {
                    logError(BaseMessages.getString(PKG, "JobDeleteFiles.Error.InvalidNumberOfRowsFromPrevMeta",
                            resultRow.size()));
                    return pathToMaskMap;
                }
                String pathToFile = resultRow.getString(0, null);
                String fileMask = resultRow.getString(1, null);

                if (log.isDetailed()) {
                    logDetailed(
                            BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingRow", pathToFile, fileMask));
                }

                pathToMaskMap.put(pathToFile, fileMask);
            }
        } else if (arguments != null) {
            for (int i = 0; i < arguments.length; i++) {
                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingArg", arguments[i],
                            filemasks[i]));
                }
                pathToMaskMap.put(arguments[i], filemasks[i]);
            }
        }

        return pathToMaskMap;
    }

    boolean processFile(String path, String wildcard, Job parentJob) {
        boolean isDeleted = false;
        FileObject fileFolder = null;

        try {
            fileFolder = KettleVFS.getFileObject(path, this);

            if (fileFolder.exists()) {
                if (fileFolder.getType() == FileType.FOLDER) {

                    if (log.isDetailed()) {
                        logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingFolder", path));
                    }

                    int totalDeleted = fileFolder
                            .delete(new TextFileSelector(fileFolder.toString(), wildcard, parentJob));

                    if (log.isDetailed()) {
                        logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.TotalDeleted",
                                String.valueOf(totalDeleted)));
                    }
                    isDeleted = true;
                } else {

                    if (log.isDetailed()) {
                        logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingFile", path));
                    }
                    isDeleted = fileFolder.delete();
                    if (!isDeleted) {
                        logError(BaseMessages.getString(PKG, "JobEntryDeleteFiles.CouldNotDeleteFile", path));
                    } else {
                        if (log.isBasic()) {
                            logBasic(BaseMessages.getString(PKG, "JobEntryDeleteFiles.FileDeleted", path));
                        }
                    }
                }
            } else {
                // File already deleted, no reason to try to delete it
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG, "JobEntryDeleteFiles.FileAlreadyDeleted", path));
                }
                isDeleted = true;
            }
        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "JobEntryDeleteFiles.CouldNotProcess", path, e.getMessage()), e);
        } finally {
            if (fileFolder != null) {
                try {
                    fileFolder.close();
                } catch (IOException ex) {
                    // Ignore
                }
            }
        }

        return isDeleted;
    }

    private class TextFileSelector implements FileSelector {
        String fileWildcard = null;
        String sourceFolder = null;
        Job parentjob;

        public TextFileSelector(String sourcefolderin, String filewildcard, Job parentJob) {

            if (!Utils.isEmpty(sourcefolderin)) {
                sourceFolder = sourcefolderin;
            }

            if (!Utils.isEmpty(filewildcard)) {
                fileWildcard = filewildcard;
            }
            parentjob = parentJob;
        }

        public boolean includeFile(FileSelectInfo info) {
            boolean doReturnCode = false;
            try {

                if (!info.getFile().toString().equals(sourceFolder) && !parentjob.isStopped()) {
                    // Pass over the Base folder itself
                    String shortFilename = info.getFile().getName().getBaseName();

                    if (!info.getFile().getParent().equals(info.getBaseFolder())) {
                        // Not in the Base Folder..Only if include sub folders
                        if (includeSubfolders && (info.getFile().getType() == FileType.FILE)
                                && GetFileWildcard(shortFilename, fileWildcard)) {
                            if (log.isDetailed()) {
                                logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.DeletingFile",
                                        info.getFile().toString()));
                            }
                            doReturnCode = true;
                        }
                    } else {
                        // In the Base Folder...
                        if ((info.getFile().getType() == FileType.FILE)
                                && GetFileWildcard(shortFilename, fileWildcard)) {
                            if (log.isDetailed()) {
                                logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.DeletingFile",
                                        info.getFile().toString()));
                            }
                            doReturnCode = true;
                        }
                    }
                }
            } catch (Exception e) {
                log.logError(BaseMessages.getString(PKG, "JobDeleteFiles.Error.Exception.DeleteProcessError"),
                        BaseMessages.getString(PKG, "JobDeleteFiles.Error.Exception.DeleteProcess",
                                info.getFile().toString(), e.getMessage()));

                doReturnCode = false;
            }

            return doReturnCode;
        }

        public boolean traverseDescendents(FileSelectInfo info) {
            return true;
        }
    }

    /**********************************************************
     *
     * @param selectedfile
     * @param wildcard
     * @return True if the selectedfile matches the wildcard
     **********************************************************/
    private boolean GetFileWildcard(String selectedfile, String wildcard) {
        boolean getIt = true;

        if (!Utils.isEmpty(wildcard)) {
            Pattern pattern = Pattern.compile(wildcard);
            // First see if the file matches the regular expression!
            Matcher matcher = pattern.matcher(selectedfile);
            getIt = matcher.matches();
        }

        return getIt;
    }

    public void setIncludeSubfolders(boolean includeSubfolders) {
        this.includeSubfolders = includeSubfolders;
    }

    public void setPrevious(boolean argFromPrevious) {
        this.argFromPrevious = argFromPrevious;
    }

    public boolean evaluates() {
        return true;
    }

    public void check(List<CheckResultInterface> remarks, JobMeta jobMeta, VariableSpace space,
            Repository repository, IMetaStore metaStore) {
        boolean isValid = JobEntryValidatorUtils.andValidator().validate(this, "arguments", remarks,
                AndValidator.putValidators(JobEntryValidatorUtils.notNullValidator()));

        if (!isValid) {
            return;
        }

        ValidatorContext ctx = new ValidatorContext();
        AbstractFileValidator.putVariableSpace(ctx, getVariables());
        AndValidator.putValidators(ctx, JobEntryValidatorUtils.notNullValidator(),
                JobEntryValidatorUtils.fileExistsValidator());

        for (int i = 0; i < arguments.length; i++) {
            JobEntryValidatorUtils.andValidator().validate(this, "arguments[" + i + "]", remarks, ctx);
        }
    }

    public List<ResourceReference> getResourceDependencies(JobMeta jobMeta) {
        List<ResourceReference> references = super.getResourceDependencies(jobMeta);
        if (arguments != null) {
            ResourceReference reference = null;
            for (int i = 0; i < arguments.length; i++) {
                String filename = jobMeta.environmentSubstitute(arguments[i]);
                if (reference == null) {
                    reference = new ResourceReference(this);
                    references.add(reference);
                }
                reference.getEntries().add(new ResourceEntry(filename, ResourceType.FILE));
            }
        }
        return references;
    }

    public void setArguments(String[] arguments) {
        this.arguments = arguments;
    }

    public void setFilemasks(String[] filemasks) {
        this.filemasks = filemasks;
    }

    public void setArgFromPrevious(boolean argFromPrevious) {
        this.argFromPrevious = argFromPrevious;
    }

    public boolean isArgFromPrevious() {
        return argFromPrevious;
    }

    public String[] getArguments() {
        return arguments;
    }

    public String[] getFilemasks() {
        return filemasks;
    }

    public boolean isIncludeSubfolders() {
        return includeSubfolders;
    }

}