Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.job.entries.folderscompare; import static org.pentaho.di.job.entry.validator.AbstractFileValidator.putVariableSpace; import static org.pentaho.di.job.entry.validator.AndValidator.putValidators; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.andValidator; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.fileExistsValidator; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.notNullValidator; import java.io.BufferedInputStream; import java.io.DataInputStream; import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.vfs.FileObject; import org.apache.commons.vfs.FileSelectInfo; import org.apache.commons.vfs.FileSelector; import org.apache.commons.vfs.FileType; import org.pentaho.di.cluster.SlaveServer; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.Result; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleDatabaseException; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleFileException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.JobMeta; import org.pentaho.di.job.entry.JobEntryBase; import org.pentaho.di.job.entry.JobEntryInterface; import org.pentaho.di.job.entry.validator.ValidatorContext; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.metastore.api.IMetaStore; import org.w3c.dom.Node; /** * This defines a 'folder compare' job entry. It will compare 2 folders, and will either follow the true flow upon the * files being the same or the false flow otherwise. * * @author Samatar Hassan * @since 25-11-2007 * */ public class JobEntryFoldersCompare extends JobEntryBase implements Cloneable, JobEntryInterface { private static Class<?> PKG = JobEntryFoldersCompare.class; // for i18n purposes, needed by Translator2!! private String filename1; private String filename2; private String wildcard; private String compareonly; private boolean includesubfolders; private boolean comparefilecontent; private boolean comparefilesize; public JobEntryFoldersCompare(String n) { super(n, ""); includesubfolders = false; comparefilesize = false; comparefilecontent = false; compareonly = "all"; wildcard = null; filename1 = null; filename2 = null; } public void setCompareOnly(String comparevalue) { this.compareonly = comparevalue; } public String getCompareOnly() { return compareonly; } public JobEntryFoldersCompare() { this(""); } public Object clone() { JobEntryFoldersCompare je = (JobEntryFoldersCompare) super.clone(); return je; } public String getXML() { StringBuffer retval = new StringBuffer(50); retval.append(super.getXML()); retval.append(" ").append(XMLHandler.addTagValue("include_subfolders", includesubfolders)); retval.append(" ").append(XMLHandler.addTagValue("compare_filecontent", comparefilecontent)); retval.append(" ").append(XMLHandler.addTagValue("compare_filesize", comparefilesize)); retval.append(" ").append(XMLHandler.addTagValue("compareonly", compareonly)); retval.append(" ").append(XMLHandler.addTagValue("wildcard", wildcard)); retval.append(" ").append(XMLHandler.addTagValue("filename1", filename1)); retval.append(" ").append(XMLHandler.addTagValue("filename2", filename2)); return retval.toString(); } public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers, Repository rep, IMetaStore metaStore) throws KettleXMLException { try { super.loadXML(entrynode, databases, slaveServers); includesubfolders = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "include_subfolders")); comparefilecontent = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "compare_filecontent")); comparefilesize = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "compare_filesize")); compareonly = XMLHandler.getTagValue(entrynode, "compareonly"); wildcard = XMLHandler.getTagValue(entrynode, "wildcard"); filename1 = XMLHandler.getTagValue(entrynode, "filename1"); filename2 = XMLHandler.getTagValue(entrynode, "filename2"); } catch (KettleXMLException xe) { throw new KettleXMLException( BaseMessages.getString(PKG, "JobFoldersCompare.Meta.UnableLoadXML", xe.getMessage())); } } public void loadRep(Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List<DatabaseMeta> databases, List<SlaveServer> slaveServers) throws KettleException { try { includesubfolders = rep.getJobEntryAttributeBoolean(id_jobentry, "include_subfolders"); comparefilecontent = rep.getJobEntryAttributeBoolean(id_jobentry, "compare_filecontent"); comparefilesize = rep.getJobEntryAttributeBoolean(id_jobentry, "compare_filesize"); compareonly = rep.getJobEntryAttributeString(id_jobentry, "compareonly"); wildcard = rep.getJobEntryAttributeString(id_jobentry, "wildcard"); filename1 = rep.getJobEntryAttributeString(id_jobentry, "filename1"); filename2 = rep.getJobEntryAttributeString(id_jobentry, "filename2"); } catch (KettleException dbe) { throw new KettleException(BaseMessages.getString(PKG, "JobFoldersCompare.Meta.UnableLoadRep", "" + id_jobentry, dbe.getMessage())); } } public void saveRep(Repository rep, IMetaStore metaStore, ObjectId id_job) throws KettleException { try { rep.saveJobEntryAttribute(id_job, getObjectId(), "include_subfolders", includesubfolders); rep.saveJobEntryAttribute(id_job, getObjectId(), "compare_filecontent", comparefilecontent); rep.saveJobEntryAttribute(id_job, getObjectId(), "compare_filesize", comparefilesize); rep.saveJobEntryAttribute(id_job, getObjectId(), "compareonly", compareonly); rep.saveJobEntryAttribute(id_job, getObjectId(), "wildcard", wildcard); rep.saveJobEntryAttribute(id_job, getObjectId(), "filename1", filename1); rep.saveJobEntryAttribute(id_job, getObjectId(), "filename2", filename2); } catch (KettleDatabaseException dbe) { throw new KettleException(BaseMessages.getString(PKG, "JobFoldersCompare.Meta.UnableSaveRep", "" + id_job, dbe.getMessage())); } } public void setIncludeSubfolders(boolean includeSubfolders) { this.includesubfolders = includeSubfolders; } public boolean isIncludeSubfolders() { return includesubfolders; } public void setCompareFileContent(boolean comparefilecontent) { this.comparefilecontent = comparefilecontent; } public boolean isCompareFileContent() { return comparefilecontent; } public void setCompareFileSize(boolean comparefilesize) { this.comparefilesize = comparefilesize; } public boolean isCompareFileSize() { return comparefilesize; } public String getRealWildcard() { return environmentSubstitute(getWildcard()); } public String getRealFilename1() { return environmentSubstitute(getFilename1()); } public String getRealFilename2() { return environmentSubstitute(getFilename2()); } /** * Check whether 2 files have the same contents. * * @param file1 * first file to compare * @param file2 * second file to compare * @return true if files are equal, false if they are not * * @throws IOException * upon IO problems */ protected boolean equalFileContents(FileObject file1, FileObject file2) throws KettleFileException { // Really read the contents and do comparisons DataInputStream in1 = null; DataInputStream in2 = null; try { // Really read the contents and do comparisons in1 = new DataInputStream( new BufferedInputStream(KettleVFS.getInputStream(KettleVFS.getFilename(file1), this))); in2 = new DataInputStream( new BufferedInputStream(KettleVFS.getInputStream(KettleVFS.getFilename(file2), this))); char ch1, ch2; while (in1.available() != 0 && in2.available() != 0) { ch1 = (char) in1.readByte(); ch2 = (char) in2.readByte(); if (ch1 != ch2) { return false; } } if (in1.available() != in2.available()) { return false; } else { return true; } } catch (IOException e) { throw new KettleFileException(e); } finally { if (in1 != null) { try { in1.close(); } catch (IOException ignored) { // Nothing to see here... } } if (in2 != null) { try { in2.close(); } catch (Exception ignored) { // We can't do anything else here... } } } } public Result execute(Result previousResult, int nr) { Result result = previousResult; result.setResult(false); boolean ok = true; String realFilename1 = getRealFilename1(); String realFilename2 = getRealFilename2(); FileObject folder1 = null; FileObject folder2 = null; FileObject filefolder1 = null; FileObject filefolder2 = null; try { if (filename1 != null && filename2 != null) { // Get Folders/Files to compare folder1 = KettleVFS.getFileObject(realFilename1, this); folder2 = KettleVFS.getFileObject(realFilename2, this); if (folder1.exists() && folder2.exists()) { if (!folder1.getType().equals(folder2.getType())) { // pb...we try to compare file with folder !!! logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.CanNotCompareFilesFolders")); if (folder1.getType() == FileType.FILE) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFile", realFilename1)); } else if (folder1.getType() == FileType.FOLDER) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFolder", realFilename1)); } else { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsUnknownFileType", realFilename1)); } if (folder2.getType() == FileType.FILE) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFile", realFilename2)); } else if (folder2.getType() == FileType.FOLDER) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFolder", realFilename2)); } else { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsUnknownFileType", realFilename2)); } } else { if (folder1.getType() == FileType.FILE) { // simply compare 2 files .. if (equalFileContents(folder1, folder2)) { result.setResult(true); } else { result.setResult(false); } } else if (folder1.getType() == FileType.FOLDER) { // We compare 2 folders ... FileObject[] list1 = folder1.findFiles(new TextFileSelector(folder1.toString())); FileObject[] list2 = folder2.findFiles(new TextFileSelector(folder2.toString())); int lenList1 = list1.length; int lenList2 = list2.length; if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FolderContains", realFilename1, "" + lenList1)); logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FolderContains", realFilename2, "" + lenList2)); } if (lenList1 == lenList2) { HashMap<String, String> collection1 = new HashMap<String, String>(); HashMap<String, String> collection2 = new HashMap<String, String>(); for (int i = 0; i < list1.length; i++) { // Put files list1 in TreeMap collection1 collection1.put(list1[i].getName().getBaseName(), list1[i].toString()); } for (int i = 0; i < list2.length; i++) { // Put files list2 in TreeMap collection2 collection2.put(list2[i].getName().getBaseName(), list2[i].toString()); } // Let's now fetch Folder1 // and for each entry, we will search it in Folder2 // if the entry exists..we will compare file entry (file or folder?) // if the 2 entry are file (not folder), we will compare content Set<Map.Entry<String, String>> entrees = collection1.entrySet(); Iterator<Map.Entry<String, String>> iterateur = entrees.iterator(); while (iterateur.hasNext()) { Map.Entry<String, String> entree = iterateur.next(); if (!collection2.containsKey(entree.getKey())) { ok = false; if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FileCanNotBeFoundIn", entree.getKey().toString(), realFilename2)); } } else { if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FileIsFoundIn", entree.getKey().toString(), realFilename2)); } filefolder1 = KettleVFS.getFileObject(entree.getValue().toString(), this); filefolder2 = KettleVFS .getFileObject(collection2.get(entree.getKey()).toString(), this); if (!filefolder2.getType().equals(filefolder1.getType())) { // The file1 exist in the folder2..but they don't have the same type ok = false; if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FilesNotSameType", filefolder1.toString(), filefolder2.toString())); } if (filefolder1.getType() == FileType.FILE) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFile", filefolder1.toString())); } else if (filefolder1.getType() == FileType.FOLDER) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFolder", filefolder1.toString())); } else { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsUnknownFileType", filefolder1.toString())); } if (filefolder2.getType() == FileType.FILE) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFile", filefolder2.toString())); } else if (filefolder2.getType() == FileType.FOLDER) { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsAFolder", filefolder2.toString())); } else { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.IsUnknownFileType", filefolder2.toString())); } } else { // Files are the same type ... if (filefolder2.getType() == FileType.FILE) { // Let's compare file size if (comparefilesize) { long filefolder1_size = filefolder1.getContent().getSize(); long filefolder2_size = filefolder2.getContent().getSize(); if (filefolder1_size != filefolder2_size) { ok = false; if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FilesNotSameSize", filefolder1.toString(), filefolder2.toString())); logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.SizeFileIs", filefolder1.toString(), "" + filefolder1_size)); logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.SizeFileIs", filefolder2.toString(), "" + filefolder2_size)); } } } if (ok) { // Let's compare files content.. if (comparefilecontent) { if (!equalFileContents(filefolder1, filefolder2)) { ok = false; if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FilesNotSameContent", filefolder1.toString(), filefolder2.toString())); } } } } } } } // logBasic(entree.getKey() + " - " + entree.getValue()); } result.setResult(ok); } else { // The 2 folders don't have the same files number if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobFoldersCompare.Log.FoldersDifferentFiles", realFilename1.toString(), realFilename2.toString())); } } } // else: File type unknown !! } } else { if (!folder1.exists()) { logError(BaseMessages.getString(PKG, "JobFileCompare.Log.FileNotExist", realFilename1)); } if (!folder2.exists()) { logError(BaseMessages.getString(PKG, "JobFileCompare.Log.FileNotExist", realFilename2)); } result.setResult(false); result.setNrErrors(1); } } else { logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.Need2Files")); } } catch (Exception e) { result.setResult(false); result.setNrErrors(1); logError(BaseMessages.getString(PKG, "JobFoldersCompare.Log.ErrorComparing", realFilename2, realFilename2, e.getMessage())); } finally { try { if (folder1 != null) { folder1.close(); folder1 = null; } if (folder2 != null) { folder2.close(); folder2 = null; } if (filefolder1 != null) { filefolder1.close(); filefolder1 = null; } if (filefolder2 != null) { filefolder2.close(); filefolder2 = null; } } catch (IOException e) { // Ignore errors } } return result; } private class TextFileSelector implements FileSelector { String source_folder = null; public TextFileSelector(String sourcefolderin) { if (!Const.isEmpty(sourcefolderin)) { source_folder = sourcefolderin; } } public boolean includeFile(FileSelectInfo info) { boolean returncode = false; try { if (!info.getFile().toString().equals(source_folder)) { // Pass over the Base folder itself String short_filename = info.getFile().getName().getBaseName(); if (info.getFile().getParent().equals(info.getBaseFolder())) { // In the Base Folder... if ((info.getFile().getType() == FileType.FILE && compareonly.equals("only_files")) || (info.getFile().getType() == FileType.FOLDER && compareonly.equals("only_folders")) || (GetFileWildcard(short_filename) && compareonly.equals("specify")) || (compareonly.equals("all"))) { returncode = true; } } else { // Not in the Base Folder...Only if include sub folders if (includesubfolders) { if ((info.getFile().getType() == FileType.FILE && compareonly.equals("only_files")) || (info.getFile().getType() == FileType.FOLDER && compareonly.equals("only_folders")) || (GetFileWildcard(short_filename) && compareonly.equals("specify")) || (compareonly.equals("all"))) { returncode = true; } } } } } catch (Exception e) { logError("Error while finding files ... in [" + info.getFile().toString() + "]. Exception :" + e.getMessage()); returncode = false; } return returncode; } public boolean traverseDescendents(FileSelectInfo info) { return true; } } /********************************************************** * * @param selectedfile * @param wildcard * @return True if the selectedfile matches the wildcard **********************************************************/ private boolean GetFileWildcard(String selectedfile) { Pattern pattern = null; boolean getIt = true; if (!Const.isEmpty(wildcard)) { pattern = Pattern.compile(wildcard); // First see if the file matches the regular expression! if (pattern != null) { Matcher matcher = pattern.matcher(selectedfile); getIt = matcher.matches(); } } return getIt; } public boolean evaluates() { return true; } public void setWildcard(String wildcard) { this.wildcard = wildcard; } public String getWildcard() { return wildcard; } public void setFilename1(String filename) { this.filename1 = filename; } public String getFilename1() { return filename1; } public void setFilename2(String filename) { this.filename2 = filename; } public String getFilename2() { return filename2; } public void check(List<CheckResultInterface> remarks, JobMeta jobMeta, VariableSpace space, Repository repository, IMetaStore metaStore) { ValidatorContext ctx = new ValidatorContext(); putVariableSpace(ctx, getVariables()); putValidators(ctx, notNullValidator(), fileExistsValidator()); andValidator().validate(this, "filename1", remarks, ctx); andValidator().validate(this, "filename2", remarks, ctx); } }