de.intranda.goobi.plugins.CSICMixedImport.java Source code

Java tutorial

Introduction

Here is the source code for de.intranda.goobi.plugins.CSICMixedImport.java

Source

/**
 * 
 * Copyright (C) 2011 intranda GmbH
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * @author Andrey Kozhushkov
 */
package de.intranda.goobi.plugins;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import net.xeoh.plugins.base.annotations.PluginImplementation;

import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.goobi.production.Import.DocstructElement;
import org.goobi.production.Import.ImportObject;
import org.goobi.production.Import.Record;
import org.goobi.production.enums.ImportReturnValue;
import org.goobi.production.enums.ImportType;
import org.goobi.production.enums.PluginType;
import org.goobi.production.plugin.interfaces.IImportPlugin;
import org.goobi.production.plugin.interfaces.IPlugin;
import org.goobi.production.properties.ImportProperty;
import org.jdom.Content;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.jdom.transform.XSLTransformer;

import ugh.dl.DigitalDocument;
import ugh.dl.DocStruct;
import ugh.dl.DocStructType;
import ugh.dl.Fileformat;
import ugh.dl.Metadata;
import ugh.dl.MetadataType;
import ugh.dl.Person;
import ugh.dl.Prefs;
import ugh.dl.Reference;
import ugh.exceptions.DocStructHasNoTypeException;
import ugh.exceptions.MetadataTypeNotAllowedException;
import ugh.exceptions.PreferencesException;
import ugh.exceptions.ReadException;
import ugh.exceptions.TypeNotAllowedAsChildException;
import ugh.exceptions.TypeNotAllowedForParentException;
import ugh.exceptions.WriteException;
import ugh.fileformats.mets.MetsMods;
import de.intranda.goobi.plugins.utils.DocTypeConfiguration;
import de.intranda.goobi.plugins.utils.ModsUtils;
import de.intranda.utils.CommonUtils;
import de.sub.goobi.Beans.Prozess;
import de.sub.goobi.Beans.Prozesseigenschaft;
import de.sub.goobi.Import.ImportOpac;
import de.sub.goobi.Persistence.ProzessDAO;
import de.sub.goobi.config.ConfigMain;
import de.sub.goobi.config.ConfigPlugins;
import de.sub.goobi.helper.Helper;
import de.sub.goobi.helper.exceptions.DAOException;
import de.sub.goobi.helper.exceptions.SwapException;

@PluginImplementation
public class CSICMixedImport implements IImportPlugin, IPlugin {

    /** Logger for this class. */
    private static final Logger logger = Logger.getLogger(CSICMixedImport.class);

    private static final String NAME = "CSICMixedImport";
    private static final String VERSION = "1.1.20141121";
    private static final String XSLT_PATH = ConfigMain.getParameter("xsltFolder") + "MARC21slim2MODS3.xsl";
    public static final String MODS_MAPPING_FILE = ConfigMain.getParameter("xsltFolder") + "mods_map.xml";
    private static final String TEMP_DIRECTORY = ConfigMain.getParameter("tempfolder");
    public final boolean writeCurrentNoToMultiVolume = (ConfigPlugins.getPluginConfig(this)
            .getBoolean("writeCurrentNoToMultiVolume", false));
    public final boolean writeCurrentNoSortingToMultiVolume = (ConfigPlugins.getPluginConfig(this)
            .getBoolean("writeCurrentNoSortingToMultiVolume", false));
    public final boolean useSquareBracketsForVolume = (ConfigPlugins.getPluginConfig(this)
            .getBoolean("useSquareBracketsForVolume", false));
    public final boolean addVolumeNoToTitle = (ConfigPlugins.getPluginConfig(this).getBoolean("addVolumeNoToTitle",
            false));

    // Namespaces
    private Namespace mets;

    private Prefs prefs;
    private String data = "";
    private File importFile = null;
    private String importFolder = "/output";
    private DocTypeConfiguration docTypeConfig;
    private Map<String, VolumeInfo> recordMap = new HashMap<String, VolumeInfo>();
    private Map<String, String> projectsCollectionsMap = new HashMap<String, String>();
    private Map<String, Integer> pageVolumeMap = new HashMap<String, Integer>();
    private HashMap<String, Boolean> idMap = new HashMap<String, Boolean>(); // maps to true all records with reoccuring ids (PPNs)
    private String currentIdentifier;
    private String currentTitle;
    private String currentAuthor;
    private String identifierSuffix;
    private List<String> currentCollectionList;
    private String encoding = "utf-8";
    private ArrayList<File> imageDirs = new ArrayList<File>();
    private ArrayList<File> pdfFiles = new ArrayList<File>();;
    public String currentPieceDesignation;
    public int currentVolume = 1;
    public int totalVolumes = 1;
    private String projectName;
    private Record currentRecord;
    private String dsAnchorType = null;
    private String dsType = null;
    private List<String> itemTitleList = new ArrayList<String>();

    /**
     * Directory containing the image files (possibly in TIFF/JPEG subfolders)
     */
    private final boolean deleteOriginalImages = ConfigPlugins.getPluginConfig(this)
            .getBoolean("deleteOriginalImages", true);
    private final boolean deleteTempFiles = ConfigPlugins.getPluginConfig(this).getBoolean("deleteTempFiles",
            false);
    private final boolean logConversionLoss = ConfigPlugins.getPluginConfig(this).getBoolean("logConversionLoss",
            false);
    private final boolean copyImages = ConfigPlugins.getPluginConfig(this).getBoolean("copyImages", true);
    private final boolean copyPdfs = ConfigPlugins.getPluginConfig(this).getBoolean("copyPdfs", true);
    private final boolean updateExistingRecords = ConfigPlugins.getPluginConfig(this)
            .getBoolean("updateExistingRecords", true);
    private final boolean relatedSeriesIsAnchor = ConfigPlugins.getPluginConfig(this)
            .getBoolean("relatedSeriesIsAnchor", false);
    public final File exportFolder = new File(
            ConfigPlugins.getPluginConfig(this).getString("importFolder", "/opt/digiverso/ftp-import/"));
    public final String projectIdentifierTitle = (ConfigPlugins.getPluginConfig(this).getString("projectIdentifier",
            "projectIdentifier"));
    public File sourceFolder = null;
    public File logFolder = new File("/opt/digiverso/logs/CSIC");
    public boolean archiveImport = false;

    // private File exportFolder = new File("example");

    public CSICMixedImport() {

        docTypeConfig = new DocTypeConfiguration(ConfigPlugins.getPluginConfig(this), null);

        List<String> projectList = ConfigPlugins.getPluginConfig(this).getList("project[@name]");
        List<String> collectionList = ConfigPlugins.getPluginConfig(this).getList("project[@collection]");

        for (int i = 0; i < projectList.size(); i++) {
            String projectName = projectList.get(i);
            String collectionName = collectionList.get(i);
            projectsCollectionsMap.put(projectName, collectionName);
        }

    }

    private static DecimalFormat volumeNumberFormat = new DecimalFormat("00");

    private List<Record> generateRecords(Record r) {

        ArrayList<Record> records = new ArrayList<Record>();
        if (imageDirs == null || imageDirs.size() < 2) {
            // if we have no image directories or only one, create exactly one record
            records.add(r);
            if (imageDirs != null && !imageDirs.isEmpty()) {
                File pdfFile = null;
                if (pdfFiles != null && pdfFiles.size() > 0) {
                    pdfFile = pdfFiles.get(0);
                }
                archiveImport = r.getId().startsWith("CSICAR");
                String idNumber = r.getId().split("_")[0].replaceAll("\\D", "");
                VolumeInfo info = new VolumeInfo(idNumber, 1, 1, getPieceDesignation(imageDirs.get(0).getName()),
                        imageDirs.get(0), pdfFile, identifierSuffix, projectName);
                recordMap.put(r.getId(), info);
                if (idMap.get(idNumber) == null) {
                    idMap.put(idNumber, false);
                } else {
                    idMap.put(idNumber, true);
                }
                logger.debug("Adding record " + r.getId() + " to recordmap");
            }
        } else {
            int counter = 1;
            Collections.sort(imageDirs, new ProcessFileComparator());
            ArrayList<String> volumeList = new ArrayList<String>();

            String idNumber = r.getId().split("_")[0].replaceAll("\\D", "");
            if (idMap.get(idNumber) == null) {
                idMap.put(idNumber, false);
            } else {
                idMap.put(idNumber, true);
            }

            if (archiveImport) {
                VolumeInfo info = new VolumeInfo(idNumber, 1, 1, "", imageDirs, pdfFiles, "", projectName);
                recordMap.put(r.getId(), info);
                records.add(r);
            } else {
                for (File imageDir : imageDirs) {

                    String mySuffix = null;
                    if (imageDir.getName().contains("_V")) {
                        String volumeString = imageDir.getName().substring(imageDir.getName().lastIndexOf("_V"));
                        mySuffix = volumeString.substring(1);
                        // String volumeNoString = mySuffix.replaceAll("\\D", "");
                        // if (volumeNoString != null && !volumeNoString.isEmpty()) {
                        // counter = Integer.valueOf(volumeNoString);
                        // }
                        if (volumeList.contains(mySuffix)) {
                            mySuffix = null;
                        } else {
                            volumeList.add(mySuffix);
                        }
                    }

                    // create one new record for each image directory
                    Record rec = new Record();
                    if (mySuffix == null) {
                        mySuffix = "V" + volumeNumberFormat.format(counter);
                        while (volumeList.contains(mySuffix)) {
                            counter++;
                            mySuffix = "V" + volumeNumberFormat.format(counter);
                        }
                        volumeList.add(mySuffix);
                    }
                    // get pdfDirectory
                    File pdfFile = null;
                    if (pdfFiles.size() == 1) {
                        pdfFile = pdfFiles.get(0);
                    } else if (pdfFiles.size() > 1) {
                        for (File file : pdfFiles) {
                            if (file.getName().toUpperCase().endsWith(mySuffix + ".PDF")) {
                                pdfFile = file;
                                break;
                            }
                        }
                    }

                    String suffix = (identifierSuffix == null ? mySuffix : identifierSuffix + "_" + mySuffix);
                    rec.setId(r.getId() + "_" + mySuffix);
                    rec.setData(r.getData());
                    records.add(rec);
                    VolumeInfo info = new VolumeInfo(idNumber, counter, imageDirs.size(),
                            getPieceDesignation(imageDir.getName()), imageDir, pdfFile, suffix, projectName);
                    recordMap.put(rec.getId(), info);
                    logger.debug("Adding record " + rec.getId() + " to recordmap");
                    // System.out.println("Volume " + counter + " = " + imageDir.getName());
                    counter++;
                }
            }
        }
        return records;
    }

    private String getPieceDesignation(String name) {

        if (name.startsWith("M_")) {
            name = name.substring(2);
        }

        String[] parts = name.split("_");
        int last = parts.length - 1;
        if (parts.length > 2) {
            return parts[last - 1];
        } else if (parts[last].startsWith("V")) {
            return null;
        } else {
            return parts[last];
        }

    }

    /**
     * 
     * 
     * @return
     */
    public Fileformat convertData() {
        if (data == null || data.isEmpty()) {
            logger.warn("Attempting to convert empty data. Aborting.");
            return null;
        }

        itemTitleList = new ArrayList<String>();
        Document marcDoc = null;
        Document doc = null;
        Fileformat innerff = null;
        try {
            // CREATE FILEFORMAT FROM MARC RECORD
            doc = CommonUtils.getDocumentFromString(data);
            marcDoc = getMarcDocument(doc, true);
            String marcString = getStringFromDocument(marcDoc, encoding);
            innerff = convertData(marcString);

            if (!deleteTempFiles && sourceFolder != null) {
                CommonUtils.writeTextFile(marcString, new File(sourceFolder, "marcDoc.xml"), encoding, false);
            }
        } catch (IOException e) {
            logger.error(e.toString(), e);
        }

        doc = swapStructData(doc);
        if (archiveImport) {
            doc = createArchiveStructData(doc);
            try {
                CommonUtils.getFileFromDocument(new File(sourceFolder, "metsDoc.xml"), doc);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        } else {
            doc = replaceStructData(doc);
            try {
                CommonUtils.getFileFromDocument(new File(sourceFolder, "metsDoc.xml"), doc);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        // CREAGE FILEFORMAT FROM MODS DOC
        Fileformat outerff = null;
        File tempFile = new File(TEMP_DIRECTORY, "temp.xml");
        try {
            if (!archiveImport) {
                CommonUtils.getFileFromDocument(tempFile, doc);
                outerff = new MetsMods(prefs);
                outerff.read(tempFile.getAbsolutePath());
                compareFileFormats(innerff, outerff);
                removeOtherVolumes(outerff);
            } else {
                innerff.write(tempFile.getAbsolutePath());
                Document innerDoc = CommonUtils.getDocumentFromFile(tempFile);
                doc = replaceXmlData(doc, innerDoc);
                CommonUtils.getFileFromDocument(tempFile, doc);
                outerff = new MetsMods(prefs);
                outerff.read(tempFile.getAbsolutePath());
                nameItems(outerff);
                createPageLinks(outerff.getDigitalDocument().getLogicalDocStruct());
            }
        } catch (PreferencesException e) {
            logger.error(e.toString());
        } catch (ReadException e) {
            logger.error(e.toString());
        } catch (IOException e) {
            logger.error(e.toString());
        } catch (WriteException e) {
            logger.error(e.toString());
        } catch (JDOMException e) {
            logger.error(e.toString());
        }

        if (tempFile.exists()) {
            tempFile.delete();
        }

        return outerff;
    }

    private void createPageLinks(DocStruct logicalDocStruct) {
        if (logicalDocStruct.getType().isAnchor()) {
            logicalDocStruct = logicalDocStruct.getAllChildren().get(0);
        }
        List<DocStruct> itemList = null;
        itemList = getAllDescendants(logicalDocStruct, prefs.getDocStrctTypeByName("SimpleDocument"));
        for (DocStruct docStruct : itemList) {
            List<Reference> referenceList = docStruct.getAllToReferences();
            for (Reference reference : referenceList) {
                logicalDocStruct.addReferenceTo(reference.getTarget(), reference.getType());
            }
        }

    }

    private void nameItems(Fileformat ff) {
        List<DocStruct> itemList;
        try {
            itemList = getAllDescendants(ff.getDigitalDocument().getLogicalDocStruct(),
                    prefs.getDocStrctTypeByName("SimpleDocument"));
        } catch (PreferencesException e1) {
            logger.error("Logical topStruct has no children");
            return;
        }
        int count = 0;
        DecimalFormat formatter = new DecimalFormat("00");
        for (DocStruct ds : itemList) {
            try {
                String title = itemTitleList.get(count);
                Metadata md = new Metadata(prefs.getMetadataTypeByName("TitleDocMain"));
                md.setValue(title);
                ds.addMetadata(md);
            } catch (MetadataTypeNotAllowedException e) {
                logger.error("Failed to add title to item " + count);
            }
            try {
                String idDitital = currentIdentifier + "_" + formatter.format(count + 1);
                Metadata md = new Metadata(prefs.getMetadataTypeByName("CatalogIDDigital"));
                md.setValue(idDitital);
                ds.addMetadata(md);
            } catch (MetadataTypeNotAllowedException e) {
                logger.error("Failed to add id to item " + count);
            }
            count++;
        }

    }

    private List<DocStruct> getAllDescendants(DocStruct logicalDocStruct, DocStructType dsType) {
        List<DocStruct> dsList = new ArrayList<DocStruct>();
        List<DocStruct> children = logicalDocStruct.getAllChildren();
        if (children != null && !children.isEmpty()) {
            for (DocStruct ds : children) {
                if (dsType == null || ds.getType().equals(dsType)) {
                    dsList.add(ds);
                }
                dsList.addAll(getAllDescendants(ds, dsType));
            }
        }
        return dsList;
    }

    private void removeOtherVolumes(Fileformat outerff) throws PreferencesException {

        // remove logical structures
        DocStruct topStruct = outerff.getDigitalDocument().getLogicalDocStruct();
        DocStruct validVolume = topStruct;
        ArrayList<DocStruct> invalidDocStructs = new ArrayList<DocStruct>();
        List<DocStruct> logStructs = topStruct.getAllChildren();
        if (logStructs != null && !logStructs.isEmpty()) {
            Collections.sort(logStructs, new VolumeComparator());
        }
        if (topStruct.getType().isAnchor() && logStructs != null && logStructs.size() > 1) {
            for (int i = 1; i <= logStructs.size(); i++) {
                if (i != currentVolume) {
                    invalidDocStructs.add(logStructs.get(i - 1));
                    // topStruct.removeChild(topStruct.getAllChildren().get(i-1));
                } else {
                    validVolume = logStructs.get(i - 1);
                }
            }
            for (DocStruct docStruct : invalidDocStructs) {
                topStruct.removeChild(docStruct);
            }

        } else if (topStruct.getType().isAnchor()) {
            validVolume = topStruct.getAllChildren().get(0);
        }

        // clean references
        if (validVolume != null) {
            List<Reference> references = validVolume.getAllToReferences();
            if (references != null) {
                List<DocStruct> targets = new ArrayList<DocStruct>();
                for (Reference reference : references) {
                    targets.add(reference.getTarget());
                }
                for (DocStruct target : targets) {
                    validVolume.removeReferenceTo(target);
                }
            }
        }

        // remove physical structures
        DocStruct bookStruct = outerff.getDigitalDocument().getPhysicalDocStruct();
        DocStruct collectionStruct = null;

        while (bookStruct.getAllChildren() != null
                && !bookStruct.getAllChildren().get(0).getType().getName().contentEquals("page")) {
            collectionStruct = bookStruct;
            bookStruct = bookStruct.getAllChildren().get(0);
        }

        //remove superfluous books
        if (collectionStruct != null) {
            ArrayList<DocStruct> invalidBooks = new ArrayList<DocStruct>();
            List<DocStruct> physStructs = collectionStruct.getAllChildren();
            Collections.sort(physStructs, new VolumeComparator());
            if (physStructs.size() > 1) {
                for (int i = 1; i <= physStructs.size(); i++) {
                    if (i != currentVolume) {
                        invalidBooks.add(physStructs.get(i - 1));
                        // topStruct.removeChild(topStruct.getAllChildren().get(i-1));
                    } else {
                        bookStruct = physStructs.get(i - 1);
                    }
                }
                for (DocStruct docStruct : invalidBooks) {
                    collectionStruct.removeChild(docStruct);
                }
            }
        }

        //add metadata to bookStruct
        DocStruct origBook = outerff.getDigitalDocument().getPhysicalDocStruct();
        if (origBook != bookStruct && origBook.getAllMetadata() != null) {
            for (Metadata md : origBook.getAllMetadata()) {
                Metadata newMd;
                try {
                    newMd = new Metadata(md.getType());
                    newMd.setValue(md.getValue());
                    bookStruct.addMetadata(newMd);
                } catch (MetadataTypeNotAllowedException e) {
                    logger.error("Unable to add metadata " + md.getType().getName() + " to physical DocStruct");
                }
            }
        }
        outerff.getDigitalDocument().setPhysicalDocStruct(bookStruct);

        for (DocStruct page : bookStruct.getAllChildren()) {
            validVolume.addReferenceTo(page, "logical_physical");
        }
    }

    private void compareFileFormats(Fileformat innerff, Fileformat outerff) {

        try {
            DocStruct innerLogStruct = innerff.getDigitalDocument().getLogicalDocStruct();
            DocStruct outerLogStruct = outerff.getDigitalDocument().getLogicalDocStruct();
            List<DocStruct> innerChildren = innerLogStruct.getAllChildren();
            List<DocStruct> outerChildren = outerLogStruct.getAllChildren();

            if (!archiveImport) {
                DocStructType innerAnchorType = null;
                DocStructType innerTopStructType = null;
                if (innerLogStruct.getType().isAnchor()) {
                    innerAnchorType = innerLogStruct.getType();
                    innerTopStructType = innerChildren.get(0).getType();
                } else {
                    innerTopStructType = innerLogStruct.getType();
                }

                if (innerAnchorType != null) {
                    if (outerChildren == null || outerChildren.isEmpty()) {
                        // We need to import the inner anchor
                        outerChildren = new ArrayList<DocStruct>();
                        DocStructType tempType = outerLogStruct.getType();
                        outerChildren.add(outerLogStruct.copy(false, false));
                        outerLogStruct = innerLogStruct.copy(false, false);
                        outerLogStruct.setType(tempType);
                        outerLogStruct.addChild(outerChildren.get(0));
                        outerff.getDigitalDocument().setLogicalDocStruct(outerLogStruct);
                    }
                    outerLogStruct.setType(innerAnchorType);
                    // copy metadata to anchor
                    copyAllMetadata(innerLogStruct, outerLogStruct);
                    // copy metadata to children
                    for (DocStruct ds : outerChildren) {
                        ds.setType(innerTopStructType);
                        copyAllMetadata(innerChildren.get(0), ds);
                    }
                } else {
                    // no inner anchor
                    if (outerChildren != null && !outerChildren.isEmpty()) {
                        if (outerChildren.size() > 1) {
                            // This shouldn't happen
                            logger.error("More than one outer volume, but no inner anchor");
                        } else {
                            // Remove the outer anchor
                            outerLogStruct = outerChildren.get(0).copy(false, false);
                            outerff.getDigitalDocument().setLogicalDocStruct(outerLogStruct);
                        }
                    }
                    outerLogStruct.setType(innerTopStructType);
                    // now copy metadata
                    copyAllMetadata(innerLogStruct, outerLogStruct);
                }
            } else {
                //archive import

            }

            // Now copy the physical metadata
            List<DocStruct> bookList = new ArrayList<DocStruct>();
            bookList.add(outerff.getDigitalDocument().getPhysicalDocStruct());
            while (bookList.size() > 0 && bookList.get(0).getType().getName().contentEquals("Collection")) {
                List<DocStruct> children = bookList.get(0).getAllChildren();
                if (children != null && !children.isEmpty()) {
                    bookList = children;
                } else {
                    // This shouldn't happen
                    logger.error("No BoundBook found in physicalDocument");
                    break;
                }
            }
            for (DocStruct ds : bookList) {
                copyAllMetadata(innerff.getDigitalDocument().getPhysicalDocStruct(), ds);
            }

        } catch (PreferencesException e) {
            logger.error(e);
        } catch (TypeNotAllowedAsChildException e) {
            logger.error(e);
        }
    }

    /**
     * Removes and childen and metadata not allowed for this docStructType
     * 
     * @param docStruct
     */
    private void verifyDocStructIntegrity(DocStruct docStruct) {
        if (docStruct.getAllChildren() != null) {
            ArrayList<DocStruct> childrenToRemove = new ArrayList<DocStruct>();
            for (DocStruct child : docStruct.getAllChildren()) {
                if (!docStruct.isDocStructTypeAllowedAsChild(child.getType())) {
                    logger.warn("Removing child " + child.getType().getName() + " from DocStruct "
                            + docStruct.getType().getName());
                    childrenToRemove.add(child);
                    // docStruct.removeChild(child);
                }
            }
            for (DocStruct docStruct2 : childrenToRemove) {
                docStruct.removeChild(docStruct2);
            }
        }
        if (docStruct.getAllMetadata() != null) {
            ArrayList<Metadata> metadataToRemove = new ArrayList<Metadata>();
            HashMap<MetadataType, Boolean> existingMetadataMap = new HashMap<MetadataType, Boolean>();
            for (Metadata metadata : docStruct.getAllMetadata()) {
                MetadataType type = metadata.getType();
                boolean isAllowed = false;
                for (MetadataType mdType : docStruct.getType().getAllMetadataTypes()) {
                    if (mdType.getName().contentEquals(type.getName())) {
                        isAllowed = true;
                        if (docStruct.getType().getNumberOfMetadataType(mdType).contentEquals("1o")
                                && existingMetadataMap.get(mdType) != null) {
                            // a metadata of this type already exists, although only one instance is permitted. Delete this instance
                            isAllowed = false;
                        } else {
                            existingMetadataMap.put(mdType, true);
                        }

                    }

                }

                if (!isAllowed) {
                    // if (!docStruct.getType().isMDTypeAllowed(type)) {
                    logger.warn("Removing metadata " + metadata.getType().getName() + " from DocStruct "
                            + docStruct.getType().getName());
                    metadataToRemove.add(metadata);
                    // docStruct.removeMetadata(metadata);
                }
            }
            for (Metadata metadata : metadataToRemove) {
                docStruct.removeMetadata(metadata);
            }
        }
    }

    /**
     * 
     * generate mets files and copy image files from record list
     * 
     */
    @Override
    public List<ImportObject> generateFiles(List<Record> records) {
        ArrayList<ImportObject> ret = new ArrayList<ImportObject>();

        for (Record r : records) {
            currentRecord = r;
            logger.info("Processing " + r.getId());
            // Data conversion
            data = r.getData();
            VolumeInfo info = recordMap.get(r.getId());
            // VolumeInfo info = (VolumeInfo)CommonUtils.getFromMap(recordMap, r.getId());
            if (info == null) {
                logger.error("Unable to find information to that record");
                continue;
            }
            currentIdentifier = info.identifier;
            currentPieceDesignation = info.pieceDesignation;
            currentVolume = info.volumeNumber;
            totalVolumes = info.totalVolumes;
            identifierSuffix = info.identifierSuffix;
            // currentCollectionList = r.getCollections();
            currentCollectionList = new ArrayList<String>();
            String collection = projectsCollectionsMap.get(info.projectName);
            if (collection != null) {
                currentCollectionList.add(collection);
            }
            Fileformat ff = convertData();
            // correctId(ff);
            ImportObject io = new ImportObject();

            if (ff != null) {

                // add collection as metadata

                // writing converted data into Import("temp") folder
                try {
                    MetsMods mm = new MetsMods(prefs);
                    mm.setDigitalDocument(ff.getDigitalDocument());
                    String fileName = getImportFolder() + getProcessTitle();
                    addProject(ff, info.projectName);

                    logger.debug("Writing '" + fileName + "'...");
                    mm.write(fileName);
                    logger.debug("copying image files from image directories...");
                    copyImageFiles(info.imageDirs);
                    copyPdfs(info.pdfFiles);
                    io.setProcessTitle(getProcessTitle().replace(".xml", ""));
                    io.setMetsFilename(fileName);
                    io.setImportReturnValue(ImportReturnValue.ExportFinished);

                    Prozesseigenschaft property = new Prozesseigenschaft();
                    property.setTitel(projectIdentifierTitle);
                    property.setWert(info.projectName);
                    ArrayList<Prozesseigenschaft> propertyList = new ArrayList<Prozesseigenschaft>();
                    propertyList.add(property);
                    io.setProcessProperties(propertyList);

                    if (importFile != null && importFile.isFile() && sourceFolder != null) {
                        try {
                            CommonUtils.moveFile(importFile, new File(sourceFolder, importFile.getName()), true);
                            // importFile.renameTo(new File(sourceFolder, importFile.getName()));
                        } catch (IOException e) {
                            logger.error(e.getMessage(), e);
                        }
                    }
                } catch (PreferencesException e) {
                    logger.error(e.getMessage(), e);
                    io.setImportReturnValue(ImportReturnValue.InvalidData);
                    io.setErrorMessage(e.getMessage());
                } catch (WriteException e) {
                    logger.error(e.getMessage(), e);
                    io.setImportReturnValue(ImportReturnValue.WriteError);
                    io.setErrorMessage(e.getMessage());
                }
            } else {
                io.setImportReturnValue(ImportReturnValue.InvalidData);
                io.setErrorMessage("FileFormat is null");
            }
            ret.add(io);
        }
        return ret;
    }

    private void addProject(Fileformat ff, String projectName) {
        projectName = getActualProjectName(projectName);
        try {
            DocStruct topStruct = ff.getDigitalDocument().getLogicalDocStruct();
            if (topStruct.getAllMetadataByType(prefs.getMetadataTypeByName("ProjectIdentifier")) == null
                    || topStruct.getAllMetadataByType(prefs.getMetadataTypeByName("ProjectIdentifier")).isEmpty()) {
                Metadata projectInfo = new Metadata(prefs.getMetadataTypeByName("ProjectIdentifier"));
                projectInfo.setValue(projectName);
                topStruct.addMetadata(projectInfo);

                // set also for first subSrtuct if topStruct was an anchor
                if (topStruct.getType().isAnchor()) {
                    DocStruct subStruct = topStruct.getAllChildren().get(0);
                    projectInfo = new Metadata(prefs.getMetadataTypeByName("ProjectIdentifier"));
                    projectInfo.setValue(projectName);
                    subStruct.addMetadata(projectInfo);
                }
            }

        } catch (PreferencesException e) {
            logger.error("Unable to add collection as metadata: " + e.getMessage(), e);
        } catch (MetadataTypeNotAllowedException e) {
            logger.error("Unable to add collection as metadata: " + e.getMessage(), e);
        } catch (DocStructHasNoTypeException e) {
            logger.error("Unable to add collection as metadata: " + e.getMessage(), e);
        } finally {
        }

    }

    /**
     * generate list of records
     * 
     */
    @Override
    public List<Record> generateRecordsFromFile() {

        projectName = null;
        List<Record> origRecordList = retrieveOriginalRecordsFromFile();
        List<Record> recordList = generateRecordsForImport(origRecordList);

        return recordList;
    }

    private List<Record> generateRecordsForImport(List<Record> origRecordList) {
        List<Record> recordList = new ArrayList<Record>();
        HashMap<Record, File> updateRecordMap = new HashMap<Record, File>();

        for (Record record : origRecordList) {

            identifierSuffix = null;
            String searchString = parseImagefolder(record); // get the image folders corresponding to this record, plus an additional process
            logger.debug("Project name is " + projectName);
            record.setId(searchString); // identifer if necessary
            List<Record> childRecords = generateRecords(record);

            for (Record childRecord : childRecords) {
                File oldFile = searchForExistingData(childRecord);
                if (oldFile != null) {
                    if (updateExistingRecords) {
                        logger.info("Found existing record. Updating.");
                        updateRecordMap.put(childRecord, oldFile);
                    } else {
                        Helper.setFehlerMeldung("Cannot import record " + record.getId()
                                + " : A process with this title already exists.");
                    }
                } else {
                    recordList.add(childRecord);
                }
            }

        }

        if (recordList.size() > 0 && importFile != null) {
            logger.info("Extracted " + recordList.size() + " records from '" + importFile.getName() + "'.");
        } else if (updateRecordMap.size() > 0 && importFile != null) {
            logger.info("Updating " + updateRecordMap.size() + " records from '" + importFile.getName() + "'.");
        } else {
            logger.error("No record extracted from importFile");
        }

        for (Record record : updateRecordMap.keySet()) {
            if (!updateOldRecord(record, updateRecordMap.get(record))) {
                Helper.setFehlerMeldung("Error updating record " + record.getId());
            } else {
                Helper.setMeldung("Updated process " + record.getId());
            }
        }
        return recordList;
    }

    private List<Record> retrieveOriginalRecordsFromFile() {

        List<Record> recordList = new ArrayList<Record>();
        HashMap<String, String> filteredRecordStrings = new HashMap<String, String>();
        int count = 0;

        if (importFile.getName().endsWith("zip")) {
            logger.info("Extracting zip archive");
            HashMap<String, byte[]> recordStrings = unzipFile(importFile);
            // Check all records first to see what's inside
            for (String key : recordStrings.keySet()) {
                byte[] bytes = recordStrings.get(key);
                InputStream bais = null;
                if (key.endsWith(".zip")) {
                    // Another zip-archive
                    logger.debug("Extracting inner archive " + key + "; size = " + bytes.length + " bytes");
                    try {
                        bais = new ByteArrayInputStream(bytes);
                        // FileOutputStream fout = new FileOutputStream(new File(importFolder, "temp.zip"));
                        // fout.write(bytes);
                        // fout.close();
                        HashMap<String, String> tempRecords = unzipStream(bais);
                        filteredRecordStrings.putAll(tempRecords);
                    } catch (Exception e) {
                        logger.error("Unable to read inner zip file");
                    } finally {
                        if (bais != null) {
                            try {
                                bais.close();
                            } catch (IOException e) {
                                logger.error(e);
                            }
                        }
                    }
                } else if (key.endsWith(".xml")) {
                    BufferedReader br = null;
                    // ByteArrayInputStream bais = null;
                    InputStreamReader isr = null;
                    try {
                        bais = new ByteArrayInputStream(bytes);
                        isr = new InputStreamReader(bais);
                        br = new BufferedReader(isr);
                        StringBuffer sb = new StringBuffer();
                        String line;
                        while ((line = br.readLine()) != null) {
                            sb.append(line).append("\n");
                        }
                        filteredRecordStrings.put(key, sb.toString());
                    } catch (IOException e) {
                        logger.error("Unable to read METS file from zip-Archive");
                    } finally {
                        try {
                            if (bais != null) {
                                bais.close();
                            }
                            if (isr != null) {
                                isr.close();
                            }
                            if (br != null) {
                                br.close();
                            }
                        } catch (IOException e) {
                            logger.error("Error closing String reader");
                        }
                    }
                }
            }

            for (String key : filteredRecordStrings.keySet()) {
                if (!key.endsWith(".xml")) {
                    continue;
                }
                String importFileName = key;
                String importData = filteredRecordStrings.get(key);
                logger.debug("Extracting record " + ++count);
                Record rec = new Record();
                // System.out.println("Data from Zip-File:\n " + importData);
                rec.setData(importData);
                logger.debug("Getting record " + importFileName);
                rec.setId(importFileName.substring(0, importFileName.indexOf(".")));
                recordList.add(rec);
            }
        } else {
            logger.info("Importing single record file");
            InputStream input = null;
            StringWriter writer = null;
            try {
                logger.debug("loaded file: " + importFile.getAbsolutePath());
                String importFileName = importFile.getName();
                input = new FileInputStream(importFile);
                Record rec = new Record();
                writer = new StringWriter();
                IOUtils.copy(input, writer, encoding);
                rec.setData(writer.toString());
                rec.setId(importFileName.substring(0, importFileName.indexOf(".")));
                recordList.add(rec);
            } catch (FileNotFoundException e) {
                logger.error(e.getMessage(), e);
            } catch (IOException e) {
                logger.error(e.getMessage(), e);
            } finally {
                if (input != null) {
                    try {
                        if (writer != null)
                            writer.close();
                        input.close();
                    } catch (IOException e) {
                        logger.error(e.getMessage(), e);
                    }
                }
            }
        }
        return recordList;

    }

    /**
     * not used - records are split on import
     */
    @Override
    public List<Record> splitRecords(String records) {

        return null;
    }

    @Override
    public List<String> splitIds(String ids) {
        return null;
    }

    @Override
    public String getProcessTitle() {
        String title = "";
        if (StringUtils.isNotBlank(currentTitle)) {
            String atstsl = new ImportOpac().createAtstsl(currentTitle, currentAuthor);
            if (atstsl != null) {
                title = new ImportOpac().createAtstsl(currentTitle, currentAuthor).toLowerCase() + "_"
                        + currentIdentifier + ".xml";
            } else {
                title = currentIdentifier + ".xml";
            }
        } else {
            title = currentIdentifier + ".xml";
        }
        return title;
        // if (identifierSuffix != null && !identifierSuffix.isEmpty()) {
        // return title.replace(".xml", "_" + identifierSuffix + ".xml");
        // } else {
        // return title;
        // }
    }

    @Override
    public void setData(Record r) {
        this.data = r.getData();
    }

    @Override
    public String getImportFolder() {
        return importFolder;
    }

    @Override
    public void setImportFolder(String folder) {
        this.importFolder = folder;
    }

    @Override
    public void setFile(File importFile) {
        this.importFile = importFile;
    }

    @Override
    public void setPrefs(Prefs prefs) {
        this.prefs = prefs;

    }

    @Override
    public List<ImportType> getImportTypes() {
        List<ImportType> answer = new ArrayList<ImportType>();
        answer.add(ImportType.FILE);
        answer.add(ImportType.FOLDER);

        return answer;
    }

    @Override
    public PluginType getType() {
        return PluginType.Import;
    }

    @Override
    public String getTitle() {
        return getDescription();
    }

    @Override
    public String getDescription() {
        return NAME + " v" + VERSION;
    }

    /**
     * 
     * Specialized convertData to convert only the specified String inString from marc to mods
     * 
     * @param inString
     * @return
     */
    private Fileformat convertData(String inString) {
        Fileformat ff = null;
        Document doc;
        StringReader sr = null;
        try {
            sr = new StringReader(inString);
            doc = new SAXBuilder().build(sr);

            // remove namespaces
            Element docRoot = doc.getRootElement();
            docRoot = setNamespaceRecursive(docRoot, null);
            Element newRecord = new Element("record");
            List<Element> eleList = new ArrayList<Element>();
            for (Object obj : docRoot.getChildren()) {
                Element child = (Element) obj;
                eleList.add(child);
            }
            for (Element element : eleList) {
                element.detach();
            }
            newRecord.setContent(eleList);
            for (Object obj : newRecord.getChildren()) {
                Element child = (Element) obj;
                child.setNamespace(null);
            }
            // newRecord = removeDuplicateChildren(newRecord);
            docRoot.detach();
            doc.setRootElement(newRecord);

            // logger.debug(new XMLOutputter().outputString(doc));
            if (doc != null && doc.hasRootElement()) {
                XSLTransformer transformer = new XSLTransformer(XSLT_PATH);
                Document docMods = transformer.transform(doc);

                // logger.debug(new XMLOutputter().outputString(docMods));
                ff = new MetsMods(prefs);
                DigitalDocument dd = new DigitalDocument();
                ff.setDigitalDocument(dd);

                Element eleMods = docMods.getRootElement();
                if (eleMods.getName().equals("modsCollection")) {
                    eleMods = eleMods.getChild("mods", null);
                }

                // Determine the root docstruct type
                dsType = null;
                dsAnchorType = null;
                String typeOfResource = null;
                boolean belongsToPeriodical = false;
                boolean belongsToSeries = false;
                boolean isManuscript = false;
                boolean belongsToMultiVolume = false;

                // handle TypeOfResource
                List<Element> eleTypeOfResourceList = eleMods.getChildren("typeOfResource", null);
                if (eleTypeOfResourceList != null) {
                    for (Element eleTypeOfResource : eleTypeOfResourceList) {
                        String resourceLabel = eleTypeOfResource.getAttributeValue("displayLabel");
                        if (resourceLabel != null && resourceLabel.contains("SE")) {
                            belongsToPeriodical = true;
                        }
                        if ("yes".equals(eleTypeOfResource.getAttributeValue("manuscript"))) {
                            isManuscript = true;
                        }
                        typeOfResource = eleTypeOfResource.getTextTrim();
                    }
                }

                // handle physicalDescription
                List<Element> physicalDescriptionList = eleMods.getChildren("physicalDescription", null);
                if (physicalDescriptionList != null) {
                    for (Element physDescr : physicalDescriptionList) {
                        List<Element> eleFormList = physDescr.getChildren("form", null);
                        if (eleFormList != null) {
                            for (Element eleForm : eleFormList) {
                                if (eleForm.getAttribute("authority") != null
                                        && eleForm.getValue().contentEquals("Manuscrito")) {
                                    isManuscript = true;
                                }
                            }
                        }
                    }
                }

                // handle archive
                List<Element> recordInfoList = eleMods.getChildren("recordInfo", null);
                if (physicalDescriptionList != null) {
                    for (Element recordInfo : recordInfoList) {
                        List<Element> eleIdList = recordInfo.getChildren("recordIdentifier", null);
                        if (eleIdList != null) {
                            for (Element eleId : eleIdList) {
                                String id = eleId.getTextTrim();
                                if (id != null && id.startsWith("CSICAR")) {
                                    archiveImport = true;
                                }
                            }
                        }
                    }
                }

                // handle relatedSeries
                List<Element> eleRelatedSeriesList = eleMods.getChildren("relatedItem", null);
                if (relatedSeriesIsAnchor && eleRelatedSeriesList != null) {
                    for (Element eleRelatedSeries : eleRelatedSeriesList) {

                        if (eleRelatedSeries != null && eleRelatedSeries.getAttribute("type") != null
                                && eleRelatedSeries.getAttribute("type").getValue().contentEquals("series")) {
                            belongsToSeries = true;
                        }
                    }
                }

                if (idMap.get(currentIdentifier.replaceAll("\\D", "")) != null) {

                    if (idMap.get(currentIdentifier.replaceAll("\\D", "")) == true) {
                        belongsToMultiVolume = true;
                    } else if ((identifierSuffix != null && identifierSuffix.startsWith("V"))
                            && ((!belongsToPeriodical && !belongsToSeries))) {
                        belongsToMultiVolume = true;
                    }
                    // This volume is part of a Series/Multivolume work
                    // if (!belongsToPeriodical && !belongsToSeries) {
                    // }
                }

                boolean multipart = belongsToMultiVolume || belongsToPeriodical || belongsToSeries;

                dsType = docTypeConfig.getDocType(typeOfResource, multipart, archiveImport);
                dsAnchorType = docTypeConfig.getAnchorType(typeOfResource, multipart, archiveImport);

                // remove unnecessary suffixes for everything but multivolumes
                if (!belongsToMultiVolume) {
                    if (idMap.get(currentIdentifier.replaceAll("\\D", "")) != null
                            && idMap.get(currentIdentifier.replaceAll("\\D", "")) == true) {
                        // need suffix
                    } else {
                        identifierSuffix = null;
                    }
                }

                logger.debug("Docstruct type: " + dsType);
                DocStruct dsVolume = dd.createDocStruct(prefs.getDocStrctTypeByName(dsType));
                if (dsVolume == null) {
                    logger.error("Could not create DocStructType " + dsVolume);
                    return null;
                }
                DocStruct dsAnchor = null;
                if (dsAnchorType != null) {
                    logger.debug("Anchor type: " + dsAnchorType);
                    dsAnchor = dd.createDocStruct(prefs.getDocStrctTypeByName(dsAnchorType));
                    if (dsAnchor == null) {
                        logger.error("Could not create DocStructType " + dsAnchorType);
                    }
                    try {
                        dsAnchor.addChild(dsVolume);
                    } catch (TypeNotAllowedAsChildException e) {
                        logger.error("Could not attach " + dsAnchorType + " to anchor " + dsType);
                    }
                    dd.setLogicalDocStruct(dsAnchor);
                } else {
                    dd.setLogicalDocStruct(dsVolume);
                }

                DocStruct dsBoundBook = dd.createDocStruct(prefs.getDocStrctTypeByName("BoundBook"));
                dd.setPhysicalDocStruct(dsBoundBook);

                //get volume number of this item
                Integer volumeNo = ModsUtils.getNumberFromString(this.getCurrentSuffix());
                if (volumeNo == null) {
                    volumeNo = currentVolume;
                }

                // Collect MODS metadata
                ModsUtils.parseModsSection(this, dsVolume, dsAnchor, dsBoundBook, eleMods, volumeNo,
                        currentPieceDesignation);
                currentIdentifier = ModsUtils.getIdentifier(prefs, dsVolume);
                currentTitle = ModsUtils.getTitle(prefs, dsVolume);
                currentAuthor = ModsUtils.getAuthor(prefs, dsVolume);
                logger.debug("Author:" + currentAuthor + ", Title: " + currentTitle);

                // create source ("import") Folder
                if (importFolder != null) {
                    File tempDir = new File(importFolder, getProcessTitle().replace(".xml", ""));
                    sourceFolder = new File(tempDir, "import");
                    sourceFolder.mkdirs();
                }

                if (logConversionLoss) {
                    try {
                        File marcLossFile = new File(logFolder, currentIdentifier + "_MarcLoss.xml");
                        Document lossDoc = getMarcModsLoss(doc, docMods);
                        CommonUtils.getFileFromDocument(marcLossFile, lossDoc);
                        File modsFile = new File(logFolder, currentIdentifier + "_Mods.xml");
                        CommonUtils.getFileFromDocument(modsFile, docMods);
                    } catch (IOException e) {
                        logger.error(e);
                    }
                }

                if (!deleteTempFiles) {
                    try {
                        File modsFile = new File(sourceFolder, "modsTemp.xml");
                        CommonUtils.getFileFromDocument(modsFile, docMods);
                    } catch (IOException e) {
                        logger.error(e);
                    }
                }

                // Add 'pathimagefiles'
                try {
                    Metadata mdForPath = new Metadata(prefs.getMetadataTypeByName("pathimagefiles"));
                    mdForPath.setValue("./" + currentIdentifier);
                    dsBoundBook.addMetadata(mdForPath);
                } catch (MetadataTypeNotAllowedException e1) {
                    logger.error("MetadataTypeNotAllowedException while reading images", e1);
                } catch (DocStructHasNoTypeException e1) {
                    logger.error("DocStructHasNoTypeException while reading images", e1);
                }

                // Add collection names attached to the current record
                if (currentCollectionList != null) {
                    MetadataType mdTypeCollection = prefs.getMetadataTypeByName("singleDigCollection");
                    for (String collection : currentCollectionList) {
                        Metadata mdCollection = new Metadata(mdTypeCollection);
                        mdCollection.setValue(collection);
                        dsVolume.addMetadata(mdCollection);
                        //                        if (dsAnchor != null) {
                        //                            dsAnchor.addMetadata(mdCollection);
                        //                        }
                    }
                }
            }

        } catch (JDOMException e) {
            logger.error(e.getMessage(), e);
            return null;
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        } catch (PreferencesException e) {
            logger.error(e.getMessage(), e);
        } catch (TypeNotAllowedForParentException e) {
            logger.error(e.getMessage(), e);
        } catch (MetadataTypeNotAllowedException e) {
            logger.error(e.getMessage(), e);
        } finally {
            // try{
            if (sr != null) {
                sr.close();
            }
            // } catch(IOException e) {
            // logger.error("Error closing String reader");
            // }
        }

        return ff;
    }

    /**
     * returns the metadatafile meta.xml if a prozess of this name was found, null otherwise
     * 
     * @param processTitle
     * @return
     */
    private File searchForExistingData(Record r) {
        String processTitle = r.getId();

        // For imports with wrong processTitle, correct it
        //        processTitle = processTitle.replace("000471130", "001100392");
        processTitle = processTitle.replace("001363255", "000884278");
        processTitle = processTitle.replace("00045898", "000045898");
        processTitle = processTitle.replace("0000045898", "000045898");
        //

        int index = processTitle.indexOf("_");
        String processId = processTitle;
        String processIdSuffix = "";
        String processIdVolume = "";
        if (index > 0 && index < processTitle.length()) {
            processId = processTitle.substring(0, index);
            processIdSuffix = processTitle.substring(index + 1);
            int vIndex = processIdSuffix.indexOf("V");
            if (vIndex > -1 && vIndex < processIdSuffix.length() - 2) {
                processIdVolume = processIdSuffix.substring(vIndex);
                if (processIdVolume.contains("_")) {
                    processIdVolume = processIdVolume.substring(0, processIdVolume.indexOf("_"));
                }
            }
        }
        String metsFilePath, processDataDirectory;
        ProzessDAO dao = new ProzessDAO();

        try {
            List<Prozess> processList = dao.search("from Prozess where titel LIKE '%" + processId + "%'");

            if (processList == null || processList.isEmpty()) {
                String id = processTitle.split("_")[0] + "_V00";
                processList = dao.search("from Prozess where titel LIKE '%" + id + "'");
            }

            if (processList != null && !processList.isEmpty()) {
                Prozess p = processList.get(0);
                if (processList.size() > 1) {
                    for (Prozess process : processList) {
                        if (process.getTitel().contains(processIdSuffix)) {
                            p = process;
                            break;
                        } else if (p.getTitel().contains(processIdVolume)) {
                            p = process;
                        }
                    }

                }

                VolumeInfo info = recordMap.get(r.getId());
                recordMap.remove(r.getId());
                p.setTitel(p.getTitel().split("_")[0] + "_" + processTitle);
                r.setId(p.getTitel());
                recordMap.put(r.getId(), info);
                logger.info("Found existing process '" + p.getTitel() + "'...");
                metsFilePath = p.getMetadataFilePath();
                processDataDirectory = p.getProcessDataDirectory();
                logger.debug("METS file path: " + metsFilePath);
                logger.debug("Process data path: " + processDataDirectory);
                File metadataFile = new File(metsFilePath);
                return metadataFile;
            }
        } catch (DAOException e) {
            logger.error(e.toString());
        } catch (SwapException e) {
            logger.error(e.toString());
        } catch (IOException e) {
            logger.error(e.toString());
        } catch (InterruptedException e) {
            logger.error(e.toString());
        }
        return null;
    }

    /**
     * returns the metadatafile meta.xml if a prozess of this name was found, null otherwise
     * 
     * @param processTitle
     * @return
     */
    public static File DeleteExistingData(String processTitle) {
        String processDataDirectory = null;
        ProzessDAO dao = new ProzessDAO();

        try {
            List<Prozess> processList = dao.search("from Prozess where titel LIKE '%" + processTitle + "%'");

            logger.debug("Deleting " + processList.size() + "data sets.");
            if (processList != null && !processList.isEmpty()) {
                for (Prozess prozess : processList) {
                    processDataDirectory = prozess.getProcessDataDirectory();
                    dao.remove(prozess);
                }
                File prozessDir = new File(processDataDirectory);
                deleteOldDirs(prozessDir.getParentFile(), processTitle);
            }
        } catch (DAOException e) {
            logger.error(e.toString(), e);
        } catch (SwapException e) {
            logger.error(e.toString(), e);
        } catch (IOException e) {
            logger.error(e.toString(), e);
        } catch (InterruptedException e) {
            logger.error(e.toString(), e);
        }
        return null;
    }

    /**
     * Deletes all goobi metadata directories contianing prozesses oft the given identifier
     * 
     * @param parentDir
     * @param identifier
     */
    private static void deleteOldDirs(File parentDir, String identifier) {
        File[] dirs = parentDir.listFiles(DirFilter);
        boolean delete = false;

        for (File dir : dirs) {
            File imageDir = new File(dir, "images");
            if (imageDir.isDirectory()) {
                File[] imageDirChildren = imageDir.listFiles(DirFilter);
                for (File childDir : imageDirChildren) {
                    if (childDir.getName().contains(identifier))
                        delete = true;
                    break;
                }
            } else
                delete = true;
            if (delete) {
                logger.info("Deleting directory " + dir.getName());
                CommonUtils.deleteAllFiles(dir);
            }
        }

    }

    /**
     * 
     * creates a backup of the oldMetaFile and replaces the metafile with the Record record
     * 
     * @param record
     * @param oldMetaFile
     */
    private boolean updateOldRecord(Record record, File oldMetaFile) {

        currentRecord = record;
        VolumeInfo info = recordMap.get(record.getId());
        if (info == null) {
            logger.error("Unable to find information to that record");
            return false;
        }
        currentPieceDesignation = info.pieceDesignation;
        currentIdentifier = info.identifier;
        currentVolume = info.volumeNumber;
        totalVolumes = info.totalVolumes;
        identifierSuffix = info.identifierSuffix;

        data = record.getData();
        // currentCollectionList = r.getCollections();
        currentCollectionList = new ArrayList<String>();
        String collection = projectsCollectionsMap.get(info.projectName);
        if (collection != null) {
            currentCollectionList.add(collection);
        }
        Fileformat ff = convertData();
        addProject(ff, info.projectName);
        // correctId(ff);
        logger.info("Replacing old matadata in metadata folder " + oldMetaFile.getParent() + " with new data");

        // renaming old metadata files to keep as backup
        String newMetaFileName = oldMetaFile.getAbsolutePath();
        File oldAnchorFile = new File(oldMetaFile.getParent(), "meta_anchor.xml");
        if (oldAnchorFile.isFile()) {
            oldAnchorFile.renameTo(new File(oldMetaFile.getParent(),
                    oldAnchorFile.getName().replace(".xml", ".preIntrandaImport.xml")));
        }
        oldMetaFile.renameTo(
                new File(oldMetaFile.getParent(), oldMetaFile.getName().replace(".xml", ".preIntrandaImport.xml")));

        try {
            if (ff == null) {
                logger.error("Mets document is null. Aborting import");
            }
            String fileName = newMetaFileName;
            logger.debug("Writing '" + fileName + "' into existing folder...");

            // MetsMods mm = new MetsMods(prefs);
            // mm.setDigitalDocument(ff.getDigitalDocument());

            ff.write(fileName);
            copyImageFiles(info.imageDirs);
            copyPdfs(info.pdfFiles);
            // getting anchor file
            File importDir = new File(importFolder);
            if (!importDir.isDirectory()) {
                logger.warn("no hotfolder found. Cannot get anchor files");
            } else {

                File tempFolder = new File(importFolder, getProcessTitle().replace(".xml", ""));
                if (tempFolder.isDirectory()) {
                    CommonUtils.moveDir(tempFolder, oldMetaFile.getParentFile(), true);
                }

                // purging old temp files
                for (File file : importDir.listFiles(XmlFilter)) {
                    if (file.getName().contains(record.getId()))
                        file.delete();
                }

            }

            // ret.put(getProcessTitle(), ImportReturnValue.ExportFinished);
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            return false;
        } catch (WriteException e) {
            logger.error(e.getMessage(), e);
            return false;
        } catch (PreferencesException e) {
            logger.error(e.getMessage(), e);
            return false;
        } finally {
            logger.trace("Deleting importFile");
            importFile.delete();
        }
        return true;
    }

    /**
     * Removes all duplicate occurences of marc datafields with the same same tag
     * 
     * @param newRecord
     * @return
     */
    @SuppressWarnings({ "unchecked", "unused" })
    private Element removeDuplicateChildren(Element newRecord) {
        Element newnewRecord = new Element(newRecord.getName());
        List<Element> children = newRecord.getChildren();
        List<String> tags = new ArrayList<String>();
        logger.debug("Cecking for duplicate datafields in " + children.size() + " fields");
        for (Element child : children) {
            if (child.getAttribute("tag") != null && tags.contains(child.getAttributeValue("tag"))) {
                logger.debug("Found duplicate MARC field with tag " + child.getAttributeValue("tag"));
            } else {
                logger.debug("Added tag " + child.getAttributeValue("tag") + " to tag list");
                newnewRecord.addContent((Element) child.clone());
                tags.add(child.getAttributeValue("tag"));
            }
        }

        return newnewRecord;
    }

    /**
     * Extract marc section from import document and return it as its own document
     * 
     * @param inDoc
     * @return
     */
    private Document getMarcDocument(Document inDoc, boolean insertFakeMods) {
        Element marcRecord = null;
        Element eleMarc = null;
        // getting document elements
        Element root = inDoc.getRootElement();

        logger.debug("getting document elements");
        if (root != null) {
            if (root.getChildren() == null || root.getChildren().isEmpty()) {
                logger.error("No children found in root");
                return null;
            } else {
                try {
                    eleMarc = root.getChild("dmdSec", mets).getChild("mdWrap", mets).getChild("xmlData", mets)
                            .getChild("marc", null);
                    marcRecord = eleMarc.getChild("record", null);
                } catch (NullPointerException e) {
                    logger.error("No dmdSec or marcData found");
                    return null;
                }
            }
        }
        marcRecord.detach();
        if (insertFakeMods) {
            eleMarc.setName("mods");
            Element extension = new Element("extension");
            eleMarc.addContent(extension);
            Element eleGoobi = new Element("goobi");
            extension.addContent(eleGoobi);
            // Element fakeAnchorId = new Element("metadata");
            // fakeAnchorId.setAttribute("name", "CatalogIDDigital");
            // fakeAnchorId.setAttribute("anchorId", "true");
            // fakeAnchorId.setText("0000");
            // eleGoobi.addContent(fakeAnchorId);

        }
        return new Document(marcRecord);
    }

    /**
     * Get the namespaces using Element root
     * 
     * @param root
     */
    private void getNamespaces(Element root) {
        mets = root.getNamespace("mets");
        if (mets != null)
            logger.debug("Namespace mets: Prefix = " + mets.getPrefix() + ", uri = " + mets.getURI());
        // premis = root.getNamespace("premis");
        // if (premis != null)
        // logger.debug("Namespace premis: Prefix = " + premis.getPrefix() + ", uri = " + premis.getURI());
        // mix = root.getNamespace("mix");
        // if (mix != null)
        // logger.debug("Namespace mix: Prefix = " + mix.getPrefix() + ", uri = " + mix.getURI());
        // marc = root.getNamespace("marc");
        // if (marc != null)
        // logger.debug("Namespace marc: Prefix = " + marc.getPrefix() + ", uri = " + marc.getURI());
        // mods = root.getNamespace("mods");
        // if (mods != null)
        // logger.debug("Namespace mods: Prefix = " + mods.getPrefix() + ", uri = " + mods.getURI());
        // goobi = root.getNamespace("goobi");
        // if (goobi != null) {
        // logger.debug("Namespace goobi: Prefix = " + goobi.getPrefix() + ", uri = " + goobi.getURI());
        // } else {
        // // goobi = Namespace.getNamespace("goobi", null);
        // }
    }

    /**
     * 
     * Creates a single String out of the Document document
     * 
     * @param document
     * @param encoding
     * @return
     */
    private String getStringFromDocument(Document document, String encoding) {
        if (document == null) {
            logger.warn("Trying to convert null document to String. Aborting");
            return null;
        }
        XMLOutputter outputter = new XMLOutputter();
        Format xmlFormat = outputter.getFormat();
        if (!(encoding == null) && !encoding.isEmpty())
            xmlFormat.setEncoding(encoding);
        xmlFormat.setExpandEmptyElements(true);
        outputter.setFormat(xmlFormat);
        String docString = outputter.outputString(document);

        return docString;
    }

    /**
     * Gets the corrsponding image folders and if necessary constructs a suffix to the identifier to distinguish this record from others with the same
     * identifier. Returns a unique identifier String for this record
     * 
     * 
     * @param r
     * @return
     */
    private String parseImagefolder(Record r) {

        imageDirs = new ArrayList<File>();
        pdfFiles = new ArrayList<File>();

        // identifiers of metadata file
        String id = r.getId();
        String identifier = null;
        String pieceDesignation = null;
        String volumeIdentifier = null;
        String[] idParts = id.split("_");
        int first = 0;
        int last = idParts.length - 1;
        if (idParts == null || idParts.length == 0) {
            return null;
        }
        if (idParts[0].contentEquals("M") && idParts.length > 1) {
            first = 1;
        }
        identifier = idParts[first];

        if (idParts.length > first + 1) {
            if (idParts[last].startsWith("V")) {
                volumeIdentifier = idParts[last];
                last--;
            }
            for (int i = first + 1; i < last + 1; i++) {
                if (pieceDesignation == null) {
                    pieceDesignation = idParts[i];
                } else if (idParts[i] != null && !idParts[i].isEmpty()) {
                    pieceDesignation += ("_" + idParts[i]);
                }
            }
        }

        // getting matching image folders
        if (!exportFolder.isDirectory()) {
            logger.warn("export folder does not exist. Cannot copy image files");
            return identifier;
        }

        File[] projectFolders = exportFolder.listFiles();
        for (File folder : projectFolders) {
            if (!folder.isDirectory() || !folder.getName().startsWith("00")) {
                continue;
            }

            projectName = folder.getName();
            identifierSuffix = null;
            logger.debug("Found project " + projectName);

            List<File> folders = Arrays.asList(folder.listFiles());
            File tiffDir = null, pdfDir = null;
            for (File file : folders) {
                if (file.isDirectory() && file.getName().toLowerCase().contentEquals("tiff")) {
                    logger.debug("found \"tiff\" directory in " + folder.getName());
                    tiffDir = file;
                }

                if (file.isDirectory() && file.getName().toLowerCase().contentEquals("pdf")) {
                    logger.debug("found \"pdf\" directory in " + folder.getName());
                    pdfDir = file;
                }
            }

            List<File> processFolders = Arrays.asList(folder.listFiles());
            List<File> tiffFolders = null;
            if (tiffDir != null) {
                tiffFolders = Arrays.asList(tiffDir.listFiles());
            }
            ArrayList<File> idImageDirs = new ArrayList<File>();
            ArrayList<File> idRecordFiles = new ArrayList<File>();
            // Get image dirs matching the identifier
            for (File file : processFolders) {
                if (file.isDirectory() && file.getName().contains(identifier)) {
                    idImageDirs.add(file);
                    logger.debug("found export folder " + file.getName() + " in " + folder.getAbsolutePath());
                } else if (file.isFile() && file.getName().toLowerCase().endsWith(".xml")
                        && file.getName().contains(identifier)) {
                    idRecordFiles.add(file);
                }
            }
            if (tiffFolders != null) {
                for (File file : tiffFolders) {
                    if (file.isDirectory() && file.getName().contains(identifier)) {
                        idImageDirs.add(file);
                        logger.debug("found export folder " + file.getName() + " in " + tiffDir.getAbsolutePath());
                    }
                }
            }

            if (idImageDirs == null || idImageDirs.isEmpty()) {
                continue;
            }

            // from these, get image dirs actually matching the metadata-file
            if (idImageDirs.size() == 1) {
                imageDirs.add(idImageDirs.get(0));
            } else if (idRecordFiles.size() == 1) {
                // there is only one record of this id, so get all matching imageDirs
                imageDirs = idImageDirs;
            } else {
                for (File dir : idImageDirs) {
                    if (pieceDesignation == null || dir.getName().contains("_" + pieceDesignation)) {
                        if (volumeIdentifier == null || dir.getName().contains("_" + volumeIdentifier)) {
                            imageDirs.add(dir);
                        }
                    }
                }
            }

            //if necessary, attempt to get volumeNumber from image dir
            if (pieceDesignation != null && volumeIdentifier == null && imageDirs.size() == 1) {
                String imageDirName = imageDirs.get(0).getName();
                int pieceDesignationStartPos = imageDirName.lastIndexOf(pieceDesignation);
                int pieceDesignationEndPos = pieceDesignationStartPos + pieceDesignation.length() + 1;
                if (pieceDesignationStartPos > 0 && pieceDesignationEndPos < imageDirName.length()) {
                    String volumeString = imageDirName.substring(pieceDesignationEndPos);
                    if (volumeString != null && !volumeString.isEmpty()) {
                        volumeIdentifier = volumeString;
                    }
                }
            }

            // sort imageDirs by name
            if (imageDirs != null && !imageDirs.isEmpty()) {
                Collections.sort(imageDirs);
            }

            if (idImageDirs.size() > imageDirs.size()) {
                // there is at least one other process with the same identifier. Add volume or pieceDesignation to this identifier
                if (volumeIdentifier != null && !volumeIdentifier.contentEquals("V00")) {
                    identifierSuffix = volumeIdentifier;
                } else {
                    identifierSuffix = pieceDesignation;
                }
            }

            String logString = "Found the following image directories:";
            if (imageDirs != null && !imageDirs.isEmpty()) {
                for (File file : imageDirs) {
                    logString += ("\n" + file.getAbsolutePath());
                }
            } else {
                logString += "\nNONE";
            }
            logger.debug(logString);

            // get pdfFile if one exists
            if (pdfDir != null) {
                for (File file : pdfDir.listFiles(pdfFilter)) {
                    if (file.getName().contains(identifier)) {
                        // pdf has the correct identifier, check if is matches the volume as well
                        if (idImageDirs.size() < 2 || imageDirs.size() == idImageDirs.size()
                                || (volumeIdentifier != null && file.getName().contains(volumeIdentifier))
                                || (pieceDesignation != null && file.getName().contains(pieceDesignation))) {
                            pdfFiles.add(file);
                            logger.debug("Found pdf-file " + file.getAbsolutePath());
                        }
                    }
                }
            }

            if (identifierSuffix != null) {
                return identifier + "_" + identifierSuffix;
            } else {
                return identifier;
            }
        }
        return null;
    }

    private String getActualProjectName(String name) {
        if (name.contentEquals("0006_PMSC_GR_EEA")) {
            return "0006_PMSC";
        }
        return name;
    }

    /**
     * 
     * Copy the files in exportFolder corresponding to the current record into the importFolder
     * 
     * @param exportFolder
     */
    private void copyImageFiles(List<File> imageDirs) {

        if (!copyImages || imageDirs == null || imageDirs.isEmpty()) {
            logger.debug("No images to copy");
            return;
        }

        Collections.sort(imageDirs);

        // get temp dir
        File tempDir = new File(importFolder, getProcessTitle().replace(".xml", ""));
        File tempImageDir = new File(tempDir, "images");
        File tempTiffDir = new File(tempImageDir, getProcessTitle().replace(".xml", "") + "_tif");
        // File tempOrigDir = new File(tempImageDir, "orig_" + getProcessTitle().replace(".xml", "") + "_tif");
        tempTiffDir.mkdirs();

        // parse all image Files and write them into new Files in the import
        // directory
        List<File> images = new ArrayList<File>();
        //        Collections.sort(imageDirs);
        for (File dir : imageDirs) {
            images.addAll(Arrays.asList(dir.listFiles(ImageFilter)));
        }

        for (File imageFile : images) {
            try {
                String filename = imageFile.getName();
                if (!deleteOriginalImages) {
                    copyFile(imageFile, new File(tempTiffDir, filename));
                    logger.debug("Copying image " + filename);
                } else {
                    CommonUtils.moveFile(imageFile, new File(tempTiffDir, filename), true);

                }

            } catch (Exception e) {
                logger.error(e.getMessage(), e);
            }
        }
    }

    private void copyFile(File source, File dest) throws IOException {
        InputStream inStream = new FileInputStream(source);
        BufferedInputStream bis = new BufferedInputStream(inStream);
        FileOutputStream fos = new FileOutputStream(dest);
        byte[] bytes = new byte[8192];
        int count = bis.read(bytes);
        while (count != -1 && count <= 8192) {
            fos.write(bytes, 0, count);
            count = bis.read(bytes);
        }
        if (count != -1) {
            fos.write(bytes, 0, count);
        }
        fos.close();
        bis.close();
    }

    private void copyPdfs(List<File> pdfFiles) {

        if (!copyPdfs || pdfFiles == null || pdfFiles.isEmpty()) {
            logger.debug("No pdf to copy");
            return;
        }

        Collections.sort(pdfFiles);

        // get temp dir
        File tempDir = new File(importFolder, getProcessTitle().replace(".xml", ""));
        File tempOcrDir = new File(tempDir, "ocr");
        File tempPdfDir = new File(tempOcrDir, getProcessTitle().replace(".xml", "") + "_pdf");
        tempPdfDir.mkdirs();
        PdfExtractor extractor = new PdfExtractor();
        for (File pdfFile : pdfFiles) {
            try {
                splitPdf(pdfFile, tempPdfDir, extractor);
                if (deleteOriginalImages) {
                    pdfFile.delete();
                }
            } catch (Exception e) {
                logger.error(e.getMessage(), e);
            }
        }
    }

    @SuppressWarnings("unchecked")
    private Document createArchiveStructData(Document doc) {

        mets = doc.getRootElement().getNamespace("mets");

        Element physStruct = null;
        Element logStruct = null;
        List<Element> structMaps = doc.getRootElement().getChildren("structMap", mets);

        for (Element element : structMaps) {
            if (element.getAttributeValue("TYPE").contentEquals("PHYSICAL")) {
                physStruct = element;
            } else if (element.getAttributeValue("TYPE").contentEquals("LOGICAL")) {
                logStruct = element;
            }
        }

        List<Element> origLogStructs = ((Element) logStruct.getChildren().get(0)).getChildren();
        if (origLogStructs == null || origLogStructs.isEmpty()) {
            origLogStructs = new ArrayList<Element>();
            origLogStructs.add((Element) logStruct.getChildren().get(0));
        }
        logStruct.removeContent();
        List<Element> pages = getPhysicalPages(physStruct, true);
        List documentStruct = physStruct.removeContent();
        if (dsAnchorType == null) {
            int count = 0;
            while (!(documentStruct.get(count) instanceof Element)) {
                count++;
            }
            documentStruct = ((Element) documentStruct.get(count)).removeContent();
        }
        //        physStruct.removeContent();

        //create new Physical DocStruct
        Element boundBook = new Element("div", mets);
        boundBook.setAttribute("ID", "PHYS_0000");
        //        boundBook.setAttribute("DMDID", "DMDPHYS_0000");
        boundBook.setAttribute("TYPE", "BoundBook");
        int pageCount = 1;
        for (Element page : pages) {
            page.setAttribute("TYPE", "page");
            String labelString = page.getAttributeValue("LABEL");
            String orderlabelString = page.getAttributeValue("ORDERLABEL");
            page.setAttribute("LABEL", orderlabelString);
            page.setAttribute("ORDERLABEL", labelString);
            page.setAttribute("ORDER", "" + pageCount);
            boundBook.addContent(page.detach());
            pageCount++;
        }
        physStruct.addContent(boundBook);

        //create new Logical DocStruct
        logStruct.setContent(documentStruct);
        Element eleDocument = (Element) logStruct.getChildren().get(0);
        if (dsAnchorType != null) {
            eleDocument.setAttribute("TYPE", "ArchiveDocument");
            eleDocument = (Element) eleDocument.getChildren().get(0);
        }
        eleDocument.setAttribute("TYPE", "CompoundDocument");
        eleDocument.setAttribute("DMDID", origLogStructs.get(0).getAttributeValue("DMDID"));
        eleDocument.setAttribute("ADMID", origLogStructs.get(0).getAttributeValue("ADMID"));

        Iterator<Element> itemIterator = origLogStructs.iterator();

        for (Object obj : ((Element) logStruct.getChildren().get(0)).getChildren()) {
            if (obj instanceof Element) {
                correctArchiveDocStruct((Element) obj, itemIterator);
            }
        }

        return doc;

    }

    private void correctArchiveDocStruct(Element ele, Iterator<Element> itemIterator) {
        List<Element> children = ele.getChildren();
        if (children != null && !children.isEmpty()) {
            //This is a compount item
            ele.setAttribute("TYPE", "CompoundDocument");
            for (Element child : children) {
                correctArchiveDocStruct(child, itemIterator);
            }
        } else {
            //This is a simple item: retrieve ids from the corresponding logical item
            ele.setAttribute("TYPE", "SimpleDocument");
            if (itemIterator.hasNext()) {
                Element origEle = itemIterator.next();
                itemTitleList.add(origEle.getAttributeValue("LABEL"));
                ele.setAttribute("ID", origEle.getAttributeValue("ID"));
            } else {
                logger.error("Found a simple document in phys struct, but no corresponding element in log struct");
            }
        }

    }

    /**
     * 
     * makes the physical and logical structMap of doc compatible with goobi
     * 
     * @param doc
     * @param anchorElement the document's anchor as jDom Element. must be inserted into the ff afterwards
     * @return
     */
    @SuppressWarnings("unchecked")
    private Document replaceStructData(Document doc) {

        Element physStruct = null;
        Element logStruct = null;
        List<Element> structMaps = doc.getRootElement().getChildren("structMap", mets);

        // set filegroup use to local
        Element fileSec = doc.getRootElement().getChild("fileSec", mets);
        if (fileSec != null) {
            Element fileGrp = fileSec.getChild("fileGrp", mets);
            fileGrp.setAttribute("USE", "LOCAL");
        }

        for (Element element : structMaps) {
            if (element.getAttributeValue("TYPE").contentEquals("PHYSICAL")) {
                physStruct = element;
            } else if (element.getAttributeValue("TYPE").contentEquals("LOGICAL")) {
                logStruct = element;
            }
        }

        // if we don't know the logical struct types, set them to Document/Item
        if (logStruct != null) {
            List<Element> logChildren = logStruct.getChildren("div", mets);
            if (logChildren != null) {
                for (Element element : logChildren) {
                    element.setAttribute("TYPE", "DOCUMENT");
                    List<Element> logRootChildren = element.getChildren("div", mets);
                    if (logRootChildren != null && !logRootChildren.isEmpty()) {
                        for (Element child : logRootChildren) {
                            child.setAttribute("TYPE", "ITEM");

                        }
                    }
                }
            }

        }

        // make physDocStrct comply to (Collection->)BoundBook->page
        // set type of physical root
        Element physRoot = null;
        for (Object obj : physStruct.getChildren("div", mets)) {
            if (obj instanceof Element) {
                physRoot = (Element) obj;
                break;
            } else {
                continue;
            }
        }
        physRoot.setAttribute("TYPE", "Collection");
        List<Element> books = new ArrayList<Element>();
        books.add(physRoot);
        List<Element> pages = getPhysicalPages(books.get(0), false);
        while (pages.size() == 0) {
            books = books.get(0).getChildren();
            for (Element book : books) {
                book.setAttribute("TYPE", "Collection");
                pages.addAll(getPhysicalPages(book, false));
            }
        }
        for (Element book : books) {
            // book.setAttribute("TYPE", "Collection");
            book.setAttribute("TYPE", "BoundBook");
        }
        for (Element page : pages) {
            page.setAttribute("TYPE", "page");
            String labelString = page.getAttributeValue("LABEL");
            String orderlabelString = page.getAttributeValue("ORDERLABEL");
            page.setAttribute("LABEL", orderlabelString);
            page.setAttribute("ORDERLABEL", labelString);
        }

        return doc;
    }

    private ArrayList<Element> getPhysicalPages(Element parent, boolean recursive) {

        ArrayList<Element> pages = new ArrayList<Element>();

        List children = parent.getChildren("div", mets);
        if (children == null || children.isEmpty()) {
            return pages;
        }
        int volume = -1;
        try {
            volume = Integer.valueOf(parent.getAttributeValue("ORDER"));
        } catch (Exception e) {

        }
        for (Object object : children) {
            if (object instanceof Element) {
                Element child = (Element) object;
                if (child.getChild("fptr", mets) != null) { // a page

                    // child.getAttributeValue("TYPE").toLowerCase().contentEquals("page") ||
                    // child.getAttributeValue("TYPE").toLowerCase().contentEquals("duplex") ) {
                    // child is a page
                    pages.add(child);
                    if (volume != -1) {
                        // map the volume this page belongs to
                        pageVolumeMap.put(child.getAttributeValue("ID"), volume);
                    }

                } else {
                    if (recursive) {
                        pages.addAll(getPhysicalPages(child, true));
                    }
                }
            }
        }

        return pages;
    }

    /**
     * Puts the logical StructData before physical StructData in the document structure of doc
     * 
     * @param doc
     * @return
     */
    @SuppressWarnings("unchecked")
    private Document swapStructData(Document doc) {

        List<Element> structMaps = doc.getRootElement().getChildren("structMap", mets);
        Element structLink = doc.getRootElement().getChild("structLink", mets);
        Element physStruct = null, logStruct = null;
        for (Element element : structMaps) {
            if (element.getAttributeValue("TYPE").contentEquals("PHYSICAL"))
                physStruct = element;
            if (element.getAttributeValue("TYPE").contentEquals("LOGICAL"))
                logStruct = element;
        }

        physStruct.detach();
        logStruct.detach();
        structLink.detach();

        doc.getRootElement().addContent(logStruct);
        doc.getRootElement().addContent(physStruct);
        doc.getRootElement().addContent(structLink);

        return doc;
    }

    /**
     * 
     * replaces the xmlData (actually the content of dmdSec) of doc with that of marcDoc
     * 
     * @param doc
     * @param marcDoc
     * @return
     */
    @SuppressWarnings("unchecked")
    private Document replaceXmlData(Document doc, Document marcDoc) {

        // get old and new Dmd sections
        List<Element> newDmdSecs = marcDoc.getRootElement().getChildren("dmdSec", mets);

        String dmdID1 = ((Element) newDmdSecs.get(0)).getAttributeValue("ID");
        String dmdIDpyhs = null;
        String dmdID2 = null;
        if (newDmdSecs.size() > 1) {
            dmdIDpyhs = ((Element) newDmdSecs.get(newDmdSecs.size() - 1)).getAttributeValue("ID");
        }
        if (newDmdSecs.size() > 2) {
            dmdID2 = ((Element) newDmdSecs.get(1)).getAttributeValue("ID");
        }

        // remove all Children of rootElement and save them as list
        List<Content> objects = doc.getRootElement().removeContent();
        List<Element> elements = new ArrayList<Element>();
        for (Content content : objects) {
            if (content instanceof Element) {
                elements.add((Element) content);
                logger.debug("adding Element to new root children list");
            }
        }

        // reattach all children of root, except "dmdSec", which is replaced by the two new sections
        for (Element element : elements) {
            if (element.getName().contentEquals("dmdSec")) {
                // get child of first dmdSec
                dmdID1 = element.getAttributeValue("ID");
                Element dmdChild = (Element) newDmdSecs.get(0).getChild("mdWrap", mets).clone();
                element.setContent(dmdChild);
                doc.getRootElement().addContent(element);

                if (newDmdSecs.size() > 1) // check if we have indeed two dmdSecs
                {
                    for (int i = 1; i < newDmdSecs.size(); i++) {
                        Element newDmdSec2 = (Element) newDmdSecs.get(i).clone();
                        //                        dmdIDpyhs = newDmdSec2.getAttributeValue("ID");
                        doc.getRootElement().addContent(newDmdSec2);
                    }
                }
            } else {
                // all other elements, just reattach
                doc.getRootElement().addContent(element);
                // set dmdid for boundbook
                if (dmdIDpyhs != null && element.getName().contentEquals("structMap")
                        && element.getAttributeValue("TYPE").contentEquals("PHYSICAL")) {
                    logger.debug("adding reference DMDID=" + dmdIDpyhs + " to physical structMap");
                    Element book = element.getChild("div", mets);
                    book.setAttribute("DMDID", dmdIDpyhs);
                }
                //set dmdid for second logical struct
                if (element.getName().contentEquals("structMap")
                        && element.getAttributeValue("TYPE").contentEquals("LOGICAL")) {
                    logger.debug("adding reference DMDID=" + dmdID1 + " to top logical struct");
                    Element logical1 = element.getChild("div", mets);
                    Element logical2 = logical1.getChild("div", mets);

                    if (dmdID2 != null) {
                        logger.debug("adding reference DMDID=" + dmdID2 + " to second logical struct");
                        logical1.setAttribute("DMDID", dmdID1);
                        logical2.setAttribute("DMDID", dmdID2);
                    } else {
                        logical2.setAttribute("DMDID", dmdID1);
                    }
                }
            }
        }
        return doc;
    }

    /**
     * Unzip a zip archive and write results into Array of Strings
     * 
     * @param source
     * @return
     */
    private HashMap<String, byte[]> unzipFile(File source) {
        ArrayList<String> filenames = new ArrayList<String>();
        HashMap<String, byte[]> contentMap = new HashMap<String, byte[]>();

        FileInputStream fis = null;
        BufferedInputStream bis = null;
        ZipInputStream in = null;
        try {
            fis = new FileInputStream(source);
            bis = new BufferedInputStream(fis);
            in = new ZipInputStream(bis);
            ZipEntry entry;
            while ((entry = in.getNextEntry()) != null) {
                filenames.add(entry.getName());
                logger.debug("Unzipping file " + entry.getName() + " from archive " + source.getName());
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                BufferedOutputStream out = new BufferedOutputStream(baos);
                int size;
                byte[] buffer = new byte[2048];
                while ((size = in.read(buffer, 0, buffer.length)) != -1) {
                    out.write(buffer, 0, size);
                }

                if (entry != null)
                    in.closeEntry();
                if (out != null) {
                    out.close();
                }
                if (baos != null) {
                    baos.close();
                }
                contentMap.put(entry.getName(), baos.toByteArray());

            }
        } catch (IOException e) {
            logger.error(e.toString(), e);
        } finally {
            try {
                if (fis != null) {
                    fis.close();
                }
                if (bis != null) {
                    bis.close();
                }
                if (in != null) {
                    in.close();
                }
            } catch (IOException e) {
                logger.error(e.toString(), e);
            }
        }
        return contentMap;
    }

    private HashMap<String, String> unzipStream(InputStream iStream) {
        ArrayList<String> stringList = new ArrayList<String>();
        ArrayList<String> filenames = new ArrayList<String>();
        HashMap<String, String> contentMap = new HashMap<String, String>();

        BufferedInputStream bis = null;
        ZipInputStream in = null;
        try {
            bis = new BufferedInputStream(iStream);
            in = new ZipInputStream(bis);
            ZipEntry entry;
            while ((entry = in.getNextEntry()) != null) {
                filenames.add(entry.getName());
                BufferedReader br = new BufferedReader(new InputStreamReader(in));
                StringBuffer sb = new StringBuffer();
                String line;
                while ((line = br.readLine()) != null) {
                    sb.append(line).append("\n");
                }
                stringList.add(sb.toString());
                contentMap.put(entry.getName(), sb.toString());
            }
        } catch (IOException e) {
            logger.error(e.toString(), e);
        } finally {
            try {
                if (bis != null) {
                    bis.close();
                }
                if (in != null) {
                    in.close();
                }
            } catch (IOException e) {
                logger.error("Error closing zip input stream");
            }
        }

        return contentMap;
    }

    /**
     * Sets the namespace of all Elements within Element root to Namespace ns
     * 
     * @param root
     * @param ns
     * @return
     */
    @SuppressWarnings("unchecked")
    public static Element setNamespaceRecursive(Element root, Namespace ns) {
        List<Element> current = new ArrayList<Element>();
        current.add(root);
        do {
            List<Element> children = new ArrayList<Element>();
            for (Element element : current) {
                element.setNamespace(ns);
                children.addAll(element.getChildren());
            }
            current = children;
        } while (!current.isEmpty());

        return root;
    }

    private Document getMarcModsLoss(Document marcDoc, Document modsDoc) {

        Element record = marcDoc.getRootElement();
        if (record == null) {
            return null;
        }

        Document missingElementsDoc = new Document(new Element("Lost-in-MarcMods-Conversion"));
        String modsString = CommonUtils.getStringFromDocument(modsDoc, encoding);
        // modsString = modsString.replaceAll("\"", "");

        Iterator<Content> descendant = record.getDescendants();
        while (descendant.hasNext()) {
            Element ele = null;
            Object obj = descendant.next();
            if (obj instanceof Element) {
                ele = (Element) obj;
                String marcContent = ele.getText();
                if (marcContent != null && !marcContent.isEmpty()) {
                    try {
                        CharSequence cs = marcContent.trim().subSequence(0, marcContent.trim().length() - 1);
                        marcContent = cs.toString().trim();
                        if (modsString.contains(marcContent)) {
                            // marc field is contained in the mods doc
                        } else {
                            Element parentElement = ele.getParentElement();
                            if (parentElement != null && parentElement != record) {
                                // The element has a parent that is not the root element, which we should log as well
                                Element parentClone = (Element) parentElement.clone();
                                parentClone.removeContent();
                                missingElementsDoc.getRootElement().addContent(parentClone);
                                parentClone.addContent((Element) ele.clone());
                            } else {
                                missingElementsDoc.getRootElement().addContent((Element) ele.clone());
                            }
                        }
                    } catch (IndexOutOfBoundsException e) {
                        continue;
                    }
                }

            }
        }
        return missingElementsDoc;
    }

    public static void main(String[] args) throws PreferencesException, WriteException {
        CSICMixedImport converter = new CSICMixedImport();
        converter.prefs = new Prefs();

        try {
            converter.prefs.loadPrefs("/opt/digiverso/goobi/rulesets/rulesetCSIC.xml");
        } catch (PreferencesException e) {
            logger.error(e.getMessage(), e);
        }

        converter.setImportFolder("output/");
        List<Record> records = new ArrayList<Record>();
        if (!converter.exportFolder.isDirectory()) {
            logger.warn("No export directory found. Aborting");
            return;
        }

        // converter.createFileformat(file);
        File file = new File("/mnt/csic/0017_FACN/000009800_9800001020.xml");
        converter.setFile(file);
        records.addAll(converter.generateRecordsFromFile());
        // }

        List<ImportObject> results = converter.generateFiles(records);

        for (ImportObject record : results) {
            System.out.println(record.getProcessTitle() + " \t \t " + record.getImportReturnValue());
        }
    }

    public static FilenameFilter ImageFilter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            boolean validImage = false;
            // tif
            if (name.endsWith("tif") || name.endsWith("TIF")) {
                validImage = true;
            }

            // jpg
            if (name.endsWith("jpg") || name.endsWith("JPG") || name.endsWith("jpeg") || name.endsWith("JPEG")) {
                validImage = true;
            }

            return validImage;
        }
    };

    public static FilenameFilter XmlFilter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            boolean validImage = false;
            if (name.endsWith("xml") || name.endsWith("XML")) {
                validImage = true;
            }

            return validImage;
        }
    };

    public static FilenameFilter zipFilter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            boolean validImage = false;
            if (name.endsWith("zip") || name.endsWith("ZIP")) {
                validImage = true;
            }

            return validImage;
        }
    };
    public static FilenameFilter pdfFilter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            boolean validImage = false;
            if (name.endsWith("pdf") || name.endsWith("PDF")) {
                validImage = true;
            }

            return validImage;
        }
    };
    public static FilenameFilter MetsFilter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            boolean validImage = false;
            if (name.endsWith("XML") || name.endsWith("xml")) {
                validImage = true;
            }

            return validImage;
        }
    };
    // Filters for file searches
    public static FileFilter DirFilter = new FileFilter() {
        public boolean accept(File file) {
            return file.isDirectory();
        }
    };

    @Override
    public List<Record> generateRecordsFromFilenames(List<String> filenames) {

        List<Record> recordList = new ArrayList<Record>();
        HashMap<Record, File> updateRecordMap = new HashMap<Record, File>();
        for (String filename : filenames) {
            String[] parts = filename.split("::");
            if (parts == null || parts.length != 2) {
                // return recordList;
                continue;
            }
            String projectName = parts[0].trim();
            String fileName = parts[1].trim();
            File projectDir = new File(exportFolder, projectName);
            if (projectDir.isDirectory()) {
                File sourceFile = new File(projectDir, fileName);
                File tempFile = new File(TEMP_DIRECTORY, sourceFile.getName());
                try {
                    CommonUtils.copyFile(sourceFile, tempFile);
                } catch (IOException e) {
                    logger.error("Unable to copy original metadata file. Cannot import process");
                    continue;
                }
                importFile = tempFile;
                List<Record> tempList = retrieveOriginalRecordsFromFile();
                if (tempList != null) {
                    recordList.addAll(tempList);
                }
            }
        }

        recordList = generateRecordsForImport(recordList);

        return recordList;
    }

    @Override
    public List<ImportProperty> getProperties() {
        return null;
    }

    @Override
    public List<String> getAllFilenames() {
        ArrayList<String> projectList = new ArrayList<String>(projectsCollectionsMap.keySet());
        ArrayList<String> filenameList = new ArrayList<String>();
        String metsDirName = "";
        try {
            for (String project : projectList) {
                File projectDir = new File(exportFolder, project);
                if (projectDir.isDirectory()) {
                    File[] subDirs = projectDir.listFiles(MetsDirFilter);
                    File[] fileList = null;
                    if (subDirs != null) {
                        for (File metsDir : subDirs) {
                            fileList = metsDir.listFiles(MetsFilter);
                            if (fileList != null && fileList.length > 0) {
                                metsDirName = metsDir.getName() + File.separator;
                                break;
                            }
                        }
                    }
                    if (fileList == null || fileList.length == 0) {
                        fileList = projectDir.listFiles(MetsFilter);
                    }
                    if (fileList != null && fileList.length > 0) {
                        for (File file : fileList) {
                            // if(updateExistingRecords) {
                            filenameList.add(project + "::\t" + metsDirName + file.getName());
                        }
                    }
                }
            }
        } catch (Exception e) {
            filenameList.add("ERROR: " + e.getMessage());
            return filenameList;
        } catch (Error e) {
            filenameList.add("ERROR: " + e.getMessage());
            return filenameList;
        }
        Collections.sort(filenameList);
        return filenameList;
    }

    private boolean splitPdf(File pdfFile, File destDir, PdfExtractor extractor) {

        if (pdfFile == null || !pdfFile.isFile()) {
            return false;
        }

        if (destDir == null || !destDir.exists()) {
            destDir.mkdirs();
        }

        if (extractor == null) {
            extractor = new PdfExtractor();
        }

        return extractor.extractPdfs(pdfFile, destDir, null);

    }

    @Override
    public void deleteFiles(List<String> selectedFilenames) {
        if (importFile != null && importFile.isFile()) {
            importFile.delete();
        }

    }

    private void copyAllMetadata(DocStruct source, DocStruct target) {
        if (source.getAllMetadata() != null) {
            for (Metadata md : source.getAllMetadata()) {
                try {
                    target.addMetadata(md);
                } catch (MetadataTypeNotAllowedException e) {
                    logger.error(e.toString());
                } catch (DocStructHasNoTypeException e) {
                    logger.error(e.toString());
                }
            }
        }
        if (source.getAllPersons() != null) {
            for (Person md : source.getAllPersons()) {
                try {
                    target.addPerson(md);
                } catch (MetadataTypeNotAllowedException e) {
                    logger.error(e.toString());
                } catch (DocStructHasNoTypeException e) {
                    logger.error(e.toString());
                }
            }
        }
    }

    @Override
    public List<DocstructElement> getCurrentDocStructs() {
        return null;
    }

    @Override
    public String deleteDocstruct() {
        return null;
    }

    @Override
    public String addDocstruct() {
        return null;
    }

    @Override
    public List<String> getPossibleDocstructs() {
        return null;
    }

    @Override
    public DocstructElement getDocstruct() {
        return null;
    }

    @Override
    public void setDocstruct(DocstructElement dse) {
    }

    public Prefs getPrefs() {
        return prefs;
    }

    public String getCurrentSuffix() {
        return identifierSuffix;
    }

    // Filters for file searches
    public static FileFilter MetsDirFilter = new FileFilter() {
        public boolean accept(File file) {
            if (file.isDirectory() && file.getName().toLowerCase().contains("mets")) {
                return true;
            }
            return false;
        }
    };

    /**
     * Sort Volume DocStructs to correspond to theit volume numbers or identifiers
     * 
     * @author florian
     * 
     */
    public class VolumeComparator implements Comparator<DocStruct> {

        @Override
        public int compare(DocStruct d1, DocStruct d2) {
            if (d1 == null || d2 == null || d1.getIdentifier() == null || d2.getIdentifier() == null) {
                return 0;
            }

            String id1 = d1.getIdentifier().replaceAll("\\D", "");
            String id2 = d2.getIdentifier().replaceAll("\\D", "");
            if (id1 == null || id2 == null || id1.isEmpty() || id2.isEmpty()) {
                return 0;
            }
            int result = Integer.valueOf(id1) - Integer.valueOf(id2);

            if (result == 0) {
                // if both numbers are the same, sort by other criteria
                String[] p1 = d1.getIdentifier().split("_");
                String[] p2 = d2.getIdentifier().split("_");

                if (p1 == null || p1.length == 0 || p2 == null || p2.length == 0) {
                    return 0;
                }
                // first try to sort by number of parts
                result = ((Integer) (p1.length)).compareTo((Integer) (p2.length));

                if (result == 0) {
                    // Then sort by part comparison (last part first, since it is probably the volume id)
                    for (int i = p2.length - 1; i > -1; i--) {
                        result = p1[i].compareTo(p2[i]);
                        if (result != 0) {
                            break;
                        }
                    }
                }
            }
            return result;
        }
    }

    /**
     * Comparator to sort image folders
     * 
     * @author florian
     * 
     */
    public class ProcessFileComparator implements Comparator<File> {

        @Override
        public int compare(File f1, File f2) {
            if (f1 == null || f2 == null) {
                return 0;
            }
            String s1 = f1.getName();
            String s2 = f2.getName();

            String[] p1 = s1.split("_");
            String[] p2 = s2.split("_");

            // compare full Strings only if we cannot split them
            if (p1 == null || p2 == null || p1.length < 2 || p2.length < 2) {
                return s1.compareTo(s2);
            }

            // Compare the second strings (first strings are == "M")
            if (!p1[1].contentEquals(p2[1])) {
                return p1[1].compareTo(p2[1]);
            }

            String v1 = p1[p1.length - 1];
            String v2 = p2[p2.length - 1];
            // compare the last Strings if they are volume identifiers
            if (v1 != null && v2 != null && v1.startsWith("V") && v2.startsWith("V") && !v1.contentEquals(v2)) {
                return v1.compareTo(v2);
            }

            // try to compare all parts of the string
            for (int i = 0; i < Math.min(p1.length, p2.length); i++) {
                if (p1.length > i + 1 && p2.length > i + 1 && p1[i] != null && p2[i] != null
                        && !p1[i].contentEquals(p2[i])) {
                    return p1[i].compareTo(p2[i]);
                }
            }

            // compare by number of string parts
            if (p1.length != p2.length) {
                return ((Integer) (p1.length)).compareTo((Integer) (p2.length));
            }

            return s1.compareTo(s2);
        }
    }

    public String getId() {
        // TODO Auto-generated method stub
        return null;
    }

}