Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.job.entries.deletefiles; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.job.entry.validator.AbstractFileValidator; import org.pentaho.di.job.entry.validator.AndValidator; import org.pentaho.di.job.entry.validator.JobEntryValidatorUtils; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSelectInfo; import org.apache.commons.vfs2.FileSelector; import org.apache.commons.vfs2.FileType; import org.pentaho.di.cluster.SlaveServer; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.Result; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleDatabaseException; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.Job; import org.pentaho.di.job.JobMeta; import org.pentaho.di.job.entry.JobEntryBase; import org.pentaho.di.job.entry.JobEntryInterface; import org.pentaho.di.job.entry.validator.ValidatorContext; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.di.resource.ResourceEntry; import org.pentaho.di.resource.ResourceEntry.ResourceType; import org.pentaho.di.resource.ResourceReference; import org.pentaho.metastore.api.IMetaStore; import org.w3c.dom.Node; /** * This defines a 'delete files' job entry. * * @author Samatar Hassan * @since 06-05-2007 */ public class JobEntryDeleteFiles extends JobEntryBase implements Cloneable, JobEntryInterface { private static Class<?> PKG = JobEntryDeleteFiles.class; // for i18n purposes, needed by Translator2!! private boolean argFromPrevious; private boolean includeSubfolders; private String[] arguments; private String[] filemasks; public JobEntryDeleteFiles(String jobName) { super(jobName, ""); argFromPrevious = false; arguments = null; includeSubfolders = false; } public JobEntryDeleteFiles() { this(""); } public void allocate(int numberOfFields) { arguments = new String[numberOfFields]; filemasks = new String[numberOfFields]; } public Object clone() { JobEntryDeleteFiles jobEntry = (JobEntryDeleteFiles) super.clone(); if (arguments != null) { int nrFields = arguments.length; jobEntry.allocate(nrFields); System.arraycopy(arguments, 0, jobEntry.arguments, 0, nrFields); System.arraycopy(filemasks, 0, jobEntry.filemasks, 0, nrFields); } return jobEntry; } public String getXML() { StringBuilder retval = new StringBuilder(300); retval.append(super.getXML()); retval.append(" ").append(XMLHandler.addTagValue("arg_from_previous", argFromPrevious)); retval.append(" ").append(XMLHandler.addTagValue("include_subfolders", includeSubfolders)); retval.append(" <fields>").append(Const.CR); if (arguments != null) { for (int i = 0; i < arguments.length; i++) { retval.append(" <field>").append(Const.CR); retval.append(" ").append(XMLHandler.addTagValue("name", arguments[i])); retval.append(" ").append(XMLHandler.addTagValue("filemask", filemasks[i])); retval.append(" </field>").append(Const.CR); } } retval.append(" </fields>").append(Const.CR); return retval.toString(); } public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers, Repository rep, IMetaStore metaStore) throws KettleXMLException { try { super.loadXML(entrynode, databases, slaveServers); argFromPrevious = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "arg_from_previous")); includeSubfolders = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "include_subfolders")); Node fields = XMLHandler.getSubNode(entrynode, "fields"); int numberOfFields = XMLHandler.countNodes(fields, "field"); allocate(numberOfFields); for (int i = 0; i < numberOfFields; i++) { Node fnode = XMLHandler.getSubNodeByNr(fields, "field", i); arguments[i] = XMLHandler.getTagValue(fnode, "name"); filemasks[i] = XMLHandler.getTagValue(fnode, "filemask"); } } catch (KettleXMLException xe) { throw new KettleXMLException(BaseMessages.getString(PKG, "JobEntryDeleteFiles.UnableToLoadFromXml"), xe); } } public void loadRep(Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List<DatabaseMeta> databases, List<SlaveServer> slaveServers) throws KettleException { try { argFromPrevious = rep.getJobEntryAttributeBoolean(id_jobentry, "arg_from_previous"); includeSubfolders = rep.getJobEntryAttributeBoolean(id_jobentry, "include_subfolders"); int numberOfArgs = rep.countNrJobEntryAttributes(id_jobentry, "name"); allocate(numberOfArgs); for (int i = 0; i < numberOfArgs; i++) { arguments[i] = rep.getJobEntryAttributeString(id_jobentry, i, "name"); filemasks[i] = rep.getJobEntryAttributeString(id_jobentry, i, "filemask"); } } catch (KettleException dbe) { throw new KettleException(BaseMessages.getString(PKG, "JobEntryDeleteFiles.UnableToLoadFromRepo", String.valueOf(id_jobentry)), dbe); } } public void saveRep(Repository rep, IMetaStore metaStore, ObjectId id_job) throws KettleException { try { rep.saveJobEntryAttribute(id_job, getObjectId(), "arg_from_previous", argFromPrevious); rep.saveJobEntryAttribute(id_job, getObjectId(), "include_subfolders", includeSubfolders); // save the arguments... if (arguments != null) { for (int i = 0; i < arguments.length; i++) { rep.saveJobEntryAttribute(id_job, getObjectId(), i, "name", arguments[i]); rep.saveJobEntryAttribute(id_job, getObjectId(), i, "filemask", filemasks[i]); } } } catch (KettleDatabaseException dbe) { throw new KettleException( BaseMessages.getString(PKG, "JobEntryDeleteFiles.UnableToSaveToRepo", String.valueOf(id_job)), dbe); } } public Result execute(Result result, int nr) throws KettleException { List<RowMetaAndData> resultRows = result.getRows(); int numberOfErrFiles = 0; result.setResult(false); result.setNrErrors(1); if (argFromPrevious && log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.FoundPreviousRows", String.valueOf((resultRows != null ? resultRows.size() : 0)))); } Multimap<String, String> pathToMaskMap = populateDataForJobExecution(resultRows); for (Map.Entry<String, String> pathToMask : pathToMaskMap.entries()) { final String filePath = environmentSubstitute(pathToMask.getKey()); if (filePath.trim().isEmpty()) { // Relative paths are permitted, and providing an empty path means deleting all files inside a root pdi-folder. // It is much more likely to be a mistake than a desirable action, so we don't delete anything (see PDI-15181) if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.NoPathProvided")); } } else { final String fileMask = environmentSubstitute(pathToMask.getValue()); if (parentJob.isStopped()) { break; } if (!processFile(filePath, fileMask, parentJob)) { numberOfErrFiles++; } } } if (numberOfErrFiles == 0) { result.setResult(true); result.setNrErrors(0); } else { result.setNrErrors(numberOfErrFiles); result.setResult(false); } return result; } /** * For job execution path to files and file masks should be provided. * These values can be obtained in two ways: * 1. As an argument of a current job entry * 2. As a table, that comes as a result of execution previous job/transformation. * * As the logic of processing this data is the same for both of this cases, we first * populate this data (in this method) and then process it. * * We are using guava multimap here, because if allows key duplication and there could be a * situation where two paths to one folder with different wildcards are provided. */ private Multimap<String, String> populateDataForJobExecution(List<RowMetaAndData> rowsFromPreviousMeta) throws KettleValueException { Multimap<String, String> pathToMaskMap = ArrayListMultimap.create(); if (argFromPrevious && rowsFromPreviousMeta != null) { for (RowMetaAndData resultRow : rowsFromPreviousMeta) { if (resultRow.size() < 2) { logError(BaseMessages.getString(PKG, "JobDeleteFiles.Error.InvalidNumberOfRowsFromPrevMeta", resultRow.size())); return pathToMaskMap; } String pathToFile = resultRow.getString(0, null); String fileMask = resultRow.getString(1, null); if (log.isDetailed()) { logDetailed( BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingRow", pathToFile, fileMask)); } pathToMaskMap.put(pathToFile, fileMask); } } else if (arguments != null) { for (int i = 0; i < arguments.length; i++) { if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingArg", arguments[i], filemasks[i])); } pathToMaskMap.put(arguments[i], filemasks[i]); } } return pathToMaskMap; } boolean processFile(String path, String wildcard, Job parentJob) { boolean isDeleted = false; FileObject fileFolder = null; try { fileFolder = KettleVFS.getFileObject(path, this); if (fileFolder.exists()) { if (fileFolder.getType() == FileType.FOLDER) { if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingFolder", path)); } int totalDeleted = fileFolder .delete(new TextFileSelector(fileFolder.toString(), wildcard, parentJob)); if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.TotalDeleted", String.valueOf(totalDeleted))); } isDeleted = true; } else { if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.ProcessingFile", path)); } isDeleted = fileFolder.delete(); if (!isDeleted) { logError(BaseMessages.getString(PKG, "JobEntryDeleteFiles.CouldNotDeleteFile", path)); } else { if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "JobEntryDeleteFiles.FileDeleted", path)); } } } } else { // File already deleted, no reason to try to delete it if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "JobEntryDeleteFiles.FileAlreadyDeleted", path)); } isDeleted = true; } } catch (Exception e) { logError(BaseMessages.getString(PKG, "JobEntryDeleteFiles.CouldNotProcess", path, e.getMessage()), e); } finally { if (fileFolder != null) { try { fileFolder.close(); } catch (IOException ex) { // Ignore } } } return isDeleted; } private class TextFileSelector implements FileSelector { String fileWildcard = null; String sourceFolder = null; Job parentjob; public TextFileSelector(String sourcefolderin, String filewildcard, Job parentJob) { if (!Utils.isEmpty(sourcefolderin)) { sourceFolder = sourcefolderin; } if (!Utils.isEmpty(filewildcard)) { fileWildcard = filewildcard; } parentjob = parentJob; } public boolean includeFile(FileSelectInfo info) { boolean doReturnCode = false; try { if (!info.getFile().toString().equals(sourceFolder) && !parentjob.isStopped()) { // Pass over the Base folder itself String shortFilename = info.getFile().getName().getBaseName(); if (!info.getFile().getParent().equals(info.getBaseFolder())) { // Not in the Base Folder..Only if include sub folders if (includeSubfolders && (info.getFile().getType() == FileType.FILE) && GetFileWildcard(shortFilename, fileWildcard)) { if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.DeletingFile", info.getFile().toString())); } doReturnCode = true; } } else { // In the Base Folder... if ((info.getFile().getType() == FileType.FILE) && GetFileWildcard(shortFilename, fileWildcard)) { if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryDeleteFiles.DeletingFile", info.getFile().toString())); } doReturnCode = true; } } } } catch (Exception e) { log.logError(BaseMessages.getString(PKG, "JobDeleteFiles.Error.Exception.DeleteProcessError"), BaseMessages.getString(PKG, "JobDeleteFiles.Error.Exception.DeleteProcess", info.getFile().toString(), e.getMessage())); doReturnCode = false; } return doReturnCode; } public boolean traverseDescendents(FileSelectInfo info) { return true; } } /********************************************************** * * @param selectedfile * @param wildcard * @return True if the selectedfile matches the wildcard **********************************************************/ private boolean GetFileWildcard(String selectedfile, String wildcard) { boolean getIt = true; if (!Utils.isEmpty(wildcard)) { Pattern pattern = Pattern.compile(wildcard); // First see if the file matches the regular expression! Matcher matcher = pattern.matcher(selectedfile); getIt = matcher.matches(); } return getIt; } public void setIncludeSubfolders(boolean includeSubfolders) { this.includeSubfolders = includeSubfolders; } public void setPrevious(boolean argFromPrevious) { this.argFromPrevious = argFromPrevious; } public boolean evaluates() { return true; } public void check(List<CheckResultInterface> remarks, JobMeta jobMeta, VariableSpace space, Repository repository, IMetaStore metaStore) { boolean isValid = JobEntryValidatorUtils.andValidator().validate(this, "arguments", remarks, AndValidator.putValidators(JobEntryValidatorUtils.notNullValidator())); if (!isValid) { return; } ValidatorContext ctx = new ValidatorContext(); AbstractFileValidator.putVariableSpace(ctx, getVariables()); AndValidator.putValidators(ctx, JobEntryValidatorUtils.notNullValidator(), JobEntryValidatorUtils.fileExistsValidator()); for (int i = 0; i < arguments.length; i++) { JobEntryValidatorUtils.andValidator().validate(this, "arguments[" + i + "]", remarks, ctx); } } public List<ResourceReference> getResourceDependencies(JobMeta jobMeta) { List<ResourceReference> references = super.getResourceDependencies(jobMeta); if (arguments != null) { ResourceReference reference = null; for (int i = 0; i < arguments.length; i++) { String filename = jobMeta.environmentSubstitute(arguments[i]); if (reference == null) { reference = new ResourceReference(this); references.add(reference); } reference.getEntries().add(new ResourceEntry(filename, ResourceType.FILE)); } } return references; } public void setArguments(String[] arguments) { this.arguments = arguments; } public void setFilemasks(String[] filemasks) { this.filemasks = filemasks; } public void setArgFromPrevious(boolean argFromPrevious) { this.argFromPrevious = argFromPrevious; } public boolean isArgFromPrevious() { return argFromPrevious; } public String[] getArguments() { return arguments; } public String[] getFilemasks() { return filemasks; } public boolean isIncludeSubfolders() { return includeSubfolders; } }