org.kitodo.production.helper.metadata.ImageHelper.java Source code

Java tutorial

Introduction

Here is the source code for org.kitodo.production.helper.metadata.ImageHelper.java

Source

/*
 * (c) Kitodo. Key to digital objects e. V. <contact@kitodo.org>
 *
 * This file is part of the Kitodo project.
 *
 * It is licensed under GNU General Public License version 3 or later.
 *
 * For the full copyright and license information, please read the
 * GPL3-License.txt file that was distributed with this source code.
 */

package org.kitodo.production.helper.metadata;

import de.unigoettingen.sub.commons.contentlib.exceptions.ImageManagerException;
import de.unigoettingen.sub.commons.contentlib.exceptions.ImageManipulatorException;
import de.unigoettingen.sub.commons.contentlib.imagelib.ImageManager;
import de.unigoettingen.sub.commons.contentlib.imagelib.JpegInterpreter;

import java.awt.image.RenderedImage;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.kitodo.api.filemanagement.ProcessSubType;
import org.kitodo.config.ConfigCore;
import org.kitodo.config.enums.ParameterCore;
import org.kitodo.data.database.beans.Process;
import org.kitodo.exceptions.InvalidImagesException;
import org.kitodo.production.helper.Helper;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyContentFileHelper;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyDocStructHelperInterface;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyLogicalDocStructTypeHelper;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyMetadataHelper;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyMetadataTypeHelper;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyMetsModsDigitalDocumentHelper;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyPrefsHelper;
import org.kitodo.production.helper.metadata.legacytypeimplementations.LegacyRomanNumeralHelper;
import org.kitodo.production.metadata.MetadataProcessor;
import org.kitodo.production.metadata.comparator.MetadataImageComparator;
import org.kitodo.production.services.ServiceManager;
import org.kitodo.production.services.file.FileService;

public class ImageHelper {
    private static final Logger logger = LogManager.getLogger(ImageHelper.class);
    private final LegacyPrefsHelper myPrefs;
    private final LegacyMetsModsDigitalDocumentHelper mydocument;
    private int myLastImage = 0;
    private static final FileService fileService = ServiceManager.getFileService();

    public ImageHelper(LegacyPrefsHelper inPrefs, LegacyMetsModsDigitalDocumentHelper inDocument) {
        this.myPrefs = inPrefs;
        this.mydocument = inDocument;
    }

    /**
     * Markus baut eine Seitenstruktur aus den vorhandenen Images --- Steps -
     * ---- Validation of images compare existing number images with existing
     * number of page DocStructs if it is the same don't do anything if
     * DocStructs are less add new pages to physicalDocStruct if images are less
     * delete pages from the end of pyhsicalDocStruct.
     */
    public void createPagination(Process process, URI directory) {
        LegacyDocStructHelperInterface physicalStructure = this.mydocument.getPhysicalDocStruct();
        LegacyDocStructHelperInterface logicalStructure = this.mydocument.getLogicalDocStruct();
        List<LegacyDocStructHelperInterface> allChildren = logicalStructure.getAllChildren();
        while (logicalStructure.getDocStructType().getAnchorClass() != null && Objects.nonNull(allChildren)
                && !allChildren.isEmpty()) {
            logicalStructure = allChildren.get(0);
        }

        // the physical structure tree is only created if it does not exist yet
        if (Objects.isNull(physicalStructure)) {
            physicalStructure = createPhysicalStructure(process);
            this.mydocument.setPhysicalDocStruct(physicalStructure);
        }

        if (Objects.isNull(directory)) {
            checkIfImagesValid(process.getTitle(), ServiceManager.getProcessService().getImagesTifDirectory(true,
                    process.getId(), process.getTitle(), process.getProcessBaseUri()));
        } else {
            checkIfImagesValid(process.getTitle(), directory);
            // fileService.getProcessSubTypeURI(process, ProcessSubType.IMAGE, null).resolve(directory));
        }

        // retrieve existing pages/images
        LegacyLogicalDocStructTypeHelper newPage = this.myPrefs.getDocStrctTypeByName("page");
        List<LegacyDocStructHelperInterface> oldPages = physicalStructure
                .getAllChildrenByTypeAndMetadataType("page", "*");
        if (Objects.isNull(oldPages)) {
            oldPages = new ArrayList<>();
        }

        // add new page/images if necessary
        if (oldPages.size() == this.myLastImage) {
            return;
        }

        String defaultPagination = ConfigCore
                .getParameterOrDefaultValue(ParameterCore.METS_EDITOR_DEFAULT_PAGINATION);
        Map<String, LegacyDocStructHelperInterface> assignedImages = new HashMap<>();
        List<LegacyDocStructHelperInterface> pageElementsWithoutImages = new ArrayList<>();

        if (physicalStructure.getAllChildren() != null && !physicalStructure.getAllChildren().isEmpty()) {
            for (LegacyDocStructHelperInterface page : physicalStructure.getAllChildren()) {
                if (page.getImageName() != null) {
                    URI imageFile;
                    if (Objects.isNull(directory)) {
                        imageFile = ServiceManager
                                .getProcessService().getImagesTifDirectory(true, process.getId(),
                                        process.getTitle(), process.getProcessBaseUri())
                                .resolve(page.getImageName());
                    } else {
                        imageFile = fileService.getProcessSubTypeURI(process, ProcessSubType.IMAGE, null)
                                .resolve(page.getImageName());
                    }
                    if (fileService.fileExist(imageFile)) {
                        assignedImages.put(page.getImageName(), page);
                    } else {
                        throw new UnsupportedOperationException("Dead code pending removal");
                    }
                } else {
                    pageElementsWithoutImages.add(page);

                }
            }
        }
        List<URI> imagesWithoutPageElements = getImagesWithoutPageElements(process, assignedImages);

        // handle possible cases

        // case 1: existing pages but no images (some images are removed)
        if (!pageElementsWithoutImages.isEmpty() && imagesWithoutPageElements.isEmpty()) {
            for (LegacyDocStructHelperInterface pageToRemove : pageElementsWithoutImages) {
                physicalStructure.removeChild(pageToRemove);
                throw new UnsupportedOperationException("Dead code pending removal");
            }
        } else if (pageElementsWithoutImages.isEmpty() && !imagesWithoutPageElements.isEmpty()) {
            // case 2: no page docs but images (some images are added)
            int currentPhysicalOrder = assignedImages.size();
            for (URI newImage : imagesWithoutPageElements) {
                LegacyDocStructHelperInterface dsPage = this.mydocument.createDocStruct(newPage);

                // physical page no
                physicalStructure.addChild(dsPage);
                currentPhysicalOrder++;
                dsPage.addMetadata(createMetadataForPhysicalPageNumber(currentPhysicalOrder));

                // logical page no
                dsPage.addMetadata(createMetadataForLogicalPageNumber(currentPhysicalOrder, defaultPagination));
                logicalStructure.addReferenceTo(dsPage, "logical_physical");

                // image name
                dsPage.addContentFile(createContentFile(newImage));
            }
        } else {
            // case 3: empty page docs and unassinged images
            for (LegacyDocStructHelperInterface page : pageElementsWithoutImages) {
                if (!imagesWithoutPageElements.isEmpty()) {
                    // assign new image name to page
                    URI newImageName = imagesWithoutPageElements.get(0);
                    imagesWithoutPageElements.remove(0);
                    page.addContentFile(createContentFile(newImageName));
                } else {
                    // remove page
                    physicalStructure.removeChild(page);
                    throw new UnsupportedOperationException("Dead code pending removal");
                }
            }
            if (!imagesWithoutPageElements.isEmpty()) {
                // create new page elements
                int currentPhysicalOrder = physicalStructure.getAllChildren().size();
                for (URI newImage : imagesWithoutPageElements) {
                    LegacyDocStructHelperInterface dsPage = this.mydocument.createDocStruct(newPage);

                    // physical page no
                    physicalStructure.addChild(dsPage);
                    currentPhysicalOrder++;
                    dsPage.addMetadata(createMetadataForPhysicalPageNumber(currentPhysicalOrder));

                    // logical page no
                    dsPage.addMetadata(createMetadataForLogicalPageNumber(currentPhysicalOrder, defaultPagination));
                    logicalStructure.addReferenceTo(dsPage, "logical_physical");

                    // image name
                    dsPage.addContentFile(createContentFile(newImage));
                }

            }
        }
        int currentPhysicalOrder = 1;
        LegacyMetadataTypeHelper mdt = this.myPrefs.getMetadataTypeByName("physPageNumber");
        if (physicalStructure.getAllChildrenByTypeAndMetadataType("page", "*") != null) {
            for (LegacyDocStructHelperInterface page : physicalStructure.getAllChildrenByTypeAndMetadataType("page",
                    "*")) {
                List<? extends LegacyMetadataHelper> pageNoMetadata = page.getAllMetadataByType(mdt);
                if (Objects.isNull(pageNoMetadata) || pageNoMetadata.isEmpty()) {
                    currentPhysicalOrder++;
                    break;
                }
                for (LegacyMetadataHelper pageNo : pageNoMetadata) {
                    pageNo.setStringValue(String.valueOf(currentPhysicalOrder));
                }
                currentPhysicalOrder++;
            }
        }
    }

    /**
     * scale given image file to png using internal embedded content server.
     */
    public void scaleFile(URI inFileName, URI outFileName, int inSize, int intRotation)
            throws ImageManagerException, IOException, ImageManipulatorException {
        logger.trace("start scaleFile");
        int tmpSize = inSize / 3;
        if (tmpSize < 1) {
            tmpSize = 1;
        }
        logger.trace("tmpSize: {}", tmpSize);
        Optional<String> kitodoContentServerUrl = ConfigCore
                .getOptionalString(ParameterCore.KITODO_CONTENT_SERVER_URL);
        if (kitodoContentServerUrl.isPresent()) {
            if (kitodoContentServerUrl.get().isEmpty()) {
                logger.trace("api");
                // TODO source image files are locked under windows forever after
                // converting to png begins.
                ImageManager imageManager = new ImageManager(inFileName.toURL());
                logger.trace("im");
                RenderedImage renderedImage = imageManager.scaleImageByPixel(tmpSize, tmpSize,
                        ImageManager.SCALE_BY_PERCENT, intRotation);
                logger.trace("ri");
                JpegInterpreter jpegInterpreter = new JpegInterpreter(renderedImage);
                logger.trace("pi");
                FileOutputStream outputFileStream = (FileOutputStream) fileService.write(outFileName);
                logger.trace("output");
                jpegInterpreter.writeToStream(null, outputFileStream);
                logger.trace("write stream");
                outputFileStream.flush();
                outputFileStream.close();
                logger.trace("close stream");
            } else {
                String cs = kitodoContentServerUrl.get() + inFileName + "&scale=" + tmpSize + "&rotate="
                        + intRotation + "&format=jpg";
                cs = cs.replace("\\", "/");
                logger.trace("url: {}", cs);
                URL csUrl = new URL(cs);
                HttpClient httpclient = new HttpClient();
                GetMethod method = new GetMethod(csUrl.toString());
                logger.trace("get");
                Integer contentServerTimeOut = ConfigCore
                        .getIntParameterOrDefaultValue(ParameterCore.KITODO_CONTENT_SERVER_TIMEOUT);
                method.getParams().setParameter("http.socket.timeout", contentServerTimeOut);
                int statusCode = httpclient.executeMethod(method);
                if (statusCode != HttpStatus.SC_OK) {
                    return;
                }
                logger.trace("statusCode: {}", statusCode);
                InputStream inStream = method.getResponseBodyAsStream();
                logger.trace("inStream");
                try (BufferedInputStream bis = new BufferedInputStream(inStream);
                        OutputStream fos = fileService.write(outFileName)) {
                    logger.trace("BufferedInputStream");
                    logger.trace("FileOutputStream");
                    byte[] bytes = new byte[8192];
                    int count = bis.read(bytes);
                    while (count != -1 && count <= 8192) {
                        fos.write(bytes, 0, count);
                        count = bis.read(bytes);
                    }
                    if (count != -1) {
                        fos.write(bytes, 0, count);
                    }
                }
                logger.trace("write");
                inStream.close();
            }
            logger.trace("end scaleFile");
        }
    }

    // Add a method to validate the image files

    /**
     * Die Images eines Prozesses auf Vollstndigkeit prfen.
     */
    public boolean checkIfImagesValid(String title, URI folder) {
        boolean isValid = true;
        this.myLastImage = 0;

        /*
         * alle Bilder durchlaufen und dafr die Seiten anlegen
         */
        if (fileService.fileExist(folder)) {
            List<URI> files = fileService.getSubUris(dataFilter, folder);
            if (files.isEmpty()) {
                Helper.setErrorMessage("[" + title + "] No objects found");
                return false;
            }

            this.myLastImage = files.size();
            if (ConfigCore.getParameterOrDefaultValue(ParameterCore.IMAGE_PREFIX).equals("\\d{8}")) {
                Collections.sort(files);
                int counter = 1;
                int myDiff = 0;
                String currentFileName = null;
                try {
                    for (Iterator<URI> iterator = files.iterator(); iterator.hasNext(); counter++) {
                        currentFileName = fileService.getFileName(iterator.next());
                        int curFileNumber = Integer.parseInt(currentFileName);
                        if (curFileNumber != counter + myDiff) {
                            Helper.setErrorMessage("[" + title + "] expected Image " + (counter + myDiff)
                                    + " but found File " + currentFileName);
                            myDiff = curFileNumber - counter;
                            isValid = false;
                        }
                    }
                } catch (NumberFormatException e1) {
                    isValid = false;
                    Helper.setErrorMessage(
                            "[" + title + "] Filename of image wrong - not an 8-digit-number: " + currentFileName);
                }
                return isValid;
            }
            return true;
        }
        Helper.setErrorMessage("[" + title + "] No image-folder found");
        return false;
    }

    /**
     * Get image files.
     *
     * @param directory
     *            current folder
     * @return sorted list with strings representing images of process
     */
    public List<URI> getImageFiles(URI directory) {
        /* Verzeichnis einlesen */
        List<URI> files = fileService.getSubUris(imageNameFilter, directory);
        ArrayList<URI> finalFiles = new ArrayList<>();
        for (URI file : files) {
            String newURI = file.toString().replace(directory.toString(), "");
            finalFiles.add(URI.create(newURI));
        }

        List<URI> dataList = new ArrayList<>(finalFiles);

        if (!dataList.isEmpty()) {
            List<URI> orderedFileNameList = prepareOrderedFileNameList(dataList);

            if (orderedFileNameList.size() == dataList.size()) {
                return orderedFileNameList;
            } else {
                dataList.sort(new MetadataImageComparator());
                return dataList;
            }
        } else {
            return new ArrayList<>();
        }
    }

    /**
     * Get image files.
     *
     * @param physical
     *            DocStruct object
     * @return list of Strings
     */
    public List<URI> getImageFiles(LegacyDocStructHelperInterface physical) {
        List<URI> orderedFileList = new ArrayList<>();
        List<LegacyDocStructHelperInterface> pages = physical.getAllChildren();
        if (pages != null) {
            for (LegacyDocStructHelperInterface page : pages) {
                URI filename = URI.create(page.getImageName());
                orderedFileList.add(filename);
            }
        }
        return orderedFileList;
    }

    /**
     * Get data files. First read them all and next if their size is bigger than
     * zero sort them with use of GoobiImageFileComparator.
     *
     * @param process
     *            Process object
     * @return list of URIs
     */
    public List<URI> getDataFiles(Process process) throws InvalidImagesException {
        URI dir;
        try {
            dir = ServiceManager.getProcessService().getImagesTifDirectory(true, process.getId(),
                    process.getTitle(), process.getProcessBaseUri());
        } catch (RuntimeException e) {
            throw new InvalidImagesException(e);
        }
        /* Verzeichnis einlesen */
        ArrayList<URI> dataList = new ArrayList<>();
        List<URI> files = fileService.getSubUris(dataFilter, dir);
        if (!files.isEmpty()) {
            dataList.addAll(files);
            dataList.sort(new MetadataImageComparator());
        }
        return dataList;
    }

    public static final FilenameFilter imageNameFilter = (dir, name) -> {
        List<String> regexList = getImageNameRegexList();

        for (String regex : regexList) {
            if (name.matches(regex)) {
                return true;
            }
        }

        return false;
    };

    public static final FilenameFilter dataFilter = (dir, name) -> {
        List<String> regexList = getDataRegexList();

        for (String regex : regexList) {
            if (name.matches(regex)) {
                return true;
            }
        }

        return false;
    };

    private List<URI> prepareOrderedFileNameList(List<URI> dataList) {
        List<URI> orderedFileNameList = new ArrayList<>();
        List<LegacyDocStructHelperInterface> pagesList = mydocument.getPhysicalDocStruct().getAllChildren();
        if (pagesList != null) {
            for (LegacyDocStructHelperInterface page : pagesList) {
                String fileName = page.getImageName();
                String fileNamePrefix = fileName.replace("." + MetadataProcessor.getFileExtension(fileName), "");
                for (URI currentImage : dataList) {
                    String currentFileName = fileService.getFileName(currentImage);
                    if (currentFileName.equals(fileNamePrefix)) {
                        orderedFileNameList.add(currentImage);
                        break;
                    }
                }
            }
        }
        return orderedFileNameList;
    }

    private LegacyDocStructHelperInterface createPhysicalStructure(Process process) {
        LegacyLogicalDocStructTypeHelper dst = this.myPrefs.getDocStrctTypeByName("BoundBook");
        LegacyDocStructHelperInterface physicalStructure = this.mydocument.createDocStruct(dst);

        // problems with FilePath
        LegacyMetadataTypeHelper metadataTypeForPath = this.myPrefs.getMetadataTypeByName("pathimagefiles");
        LegacyMetadataHelper mdForPath = new LegacyMetadataHelper(metadataTypeForPath);
        URI pathURI = ServiceManager.getProcessService().getImagesTifDirectory(false, process.getId(),
                process.getTitle(), process.getProcessBaseUri());
        String pathString = new File(pathURI).getPath();
        mdForPath.setStringValue(pathString);
        physicalStructure.addMetadata(mdForPath);
        return physicalStructure;
    }

    private List<URI> getImagesWithoutPageElements(Process process,
            Map<String, LegacyDocStructHelperInterface> assignedImages) {
        List<URI> imagesWithoutPageElements = new ArrayList<>();
        try {
            List<URI> imageNamesInMediaFolder = getDataFiles(process);
            for (URI imageName : imageNamesInMediaFolder) {
                if (!assignedImages.containsKey(imageName.getRawPath())) {
                    imagesWithoutPageElements.add(imageName);
                }
            }
        } catch (InvalidImagesException e1) {
            logger.error(e1);
        }
        return imagesWithoutPageElements;
    }

    /**
     * Create Metadata for logical page number.
     *
     * @param currentPhysicalOrder
     *            as int
     * @param defaultPagination
     *            as String
     * @return Metadata object
     */
    private LegacyMetadataHelper createMetadataForLogicalPageNumber(int currentPhysicalOrder,
            String defaultPagination) {
        LegacyMetadataTypeHelper metadataType = this.myPrefs.getMetadataTypeByName("logicalPageNumber");
        LegacyMetadataHelper metadata = new LegacyMetadataHelper(metadataType);
        metadata.setStringValue(determinePagination(currentPhysicalOrder, defaultPagination));
        return metadata;
    }

    /**
     * Create Metadata for physical page number.
     *
     * @param currentPhysicalOrder
     *            as int
     * @return Metadata object
     */
    private LegacyMetadataHelper createMetadataForPhysicalPageNumber(int currentPhysicalOrder) {
        LegacyMetadataTypeHelper metadataType = this.myPrefs.getMetadataTypeByName("physPageNumber");
        LegacyMetadataHelper metadata = new LegacyMetadataHelper(metadataType);
        metadata.setStringValue(String.valueOf(currentPhysicalOrder));
        return metadata;
    }

    /**
     * Create ContentFile with set up location.
     *
     * @param image
     *            URI to image
     * @return ContentFile object
     */
    private LegacyContentFileHelper createContentFile(URI image) {
        LegacyContentFileHelper contentFile = new LegacyContentFileHelper();
        contentFile.setLocation(image.getPath());
        return contentFile;
    }

    /**
     * Determine pagination for metadata.
     *
     * @param currentPhysicalOrder
     *            as int
     * @param defaultPagination
     *            as String
     * @return pagination value as String
     */
    private String determinePagination(int currentPhysicalOrder, String defaultPagination) {
        if (defaultPagination.equalsIgnoreCase(
                (String) ParameterCore.METS_EDITOR_DEFAULT_PAGINATION.getParameter().getPossibleValues().get(0))) {
            return String.valueOf(currentPhysicalOrder);
        } else if (defaultPagination.equalsIgnoreCase(
                (String) ParameterCore.METS_EDITOR_DEFAULT_PAGINATION.getParameter().getPossibleValues().get(1))) {
            LegacyRomanNumeralHelper roman = new LegacyRomanNumeralHelper();
            roman.setValue(currentPhysicalOrder);
            return roman.getNumber();
        } else {
            return (String) ParameterCore.METS_EDITOR_DEFAULT_PAGINATION.getParameter().getDefaultValue();
        }
    }

    private static List<String> getImageNameRegexList() {
        String prefix = ConfigCore.getParameterOrDefaultValue(ParameterCore.IMAGE_PREFIX);

        List<String> regexList = new ArrayList<>();
        regexList.add(prefix + "\\.[Tt][Ii][Ff][Ff]?");
        regexList.add(prefix + "\\.[jJ][pP][eE]?[gG]");
        regexList.add(prefix + "\\.[jJ][pP][2]");
        regexList.add(prefix + "\\.[pP][nN][gG]");
        regexList.add(prefix + "\\.[gG][iI][fF]");
        return regexList;
    }

    private static List<String> getDataRegexList() {
        String prefix = ConfigCore.getParameterOrDefaultValue(ParameterCore.IMAGE_PREFIX);

        List<String> regexList = getImageNameRegexList();
        regexList.add(prefix + "\\.[pP][dD][fF]");
        regexList.add(prefix + "\\.[aA][vV][iI]");
        regexList.add(prefix + "\\.[mM][pP][gG]");
        regexList.add(prefix + "\\.[mM][pP]4");
        regexList.add(prefix + "\\.[mM][pP]3");
        regexList.add(prefix + "\\.[wW][aA][vV]");
        regexList.add(prefix + "\\.[wW][mM][vV]");
        regexList.add(prefix + "\\.[fF][lL][vV]");
        regexList.add(prefix + "\\.[oO][gG][gG]");
        regexList.add(prefix + "\\.docx");
        regexList.add(prefix + "\\.xls");
        regexList.add(prefix + "\\.xlsx");
        regexList.add(prefix + "\\.pptx");
        regexList.add(prefix + "\\.ppt");
        return regexList;
    }

}