org.apache.tika.parser.exiftool.ExifToolImageParserTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tika.parser.exiftool.ExifToolImageParserTest.java

Source

/*
 * Copyright (C) 2005-2012 Alfresco Software Limited.
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.exiftool;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.TimeZone;

import junit.framework.TestCase;

import org.apache.tika.metadata.IPTC;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.helpers.DefaultHandler;

public class ExifToolImageParserTest extends TestCase {
    /**
     * Logger for this class
     */
    private static final Log logger = LogFactory.getLog(ExifToolImageParserTest.class);

    private final Parser parser = new ExiftoolImageParser();

    public void testJPEGIPTC() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
        InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_IPTC_EXT.jpg");
        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());

        assertEquals("Washington", metadata.get(IPTC.CITY));
        assertEquals("United States", metadata.get(IPTC.COUNTRY));
        assertEquals("US", metadata.get(IPTC.COUNTRY_CODE));

        assertEquals(
                "A stream bank in Rock Creek Park Washington DC during a photo bike tour with ASPP DC/South chapter.",
                metadata.get(IPTC.DESCRIPTION));
        assertEquals(
                "A stream bank in Rock Creek Park Washington DC during a photo bike tour with ASPP DC/South chapter.",
                metadata.get(Metadata.DESCRIPTION));

        assertEquals("Rock Creek Park", metadata.get(IPTC.HEADLINE));
        assertEquals("Downstream", metadata.get(Metadata.TITLE));

        assertEquals("intellectual genre", metadata.get(IPTC.INTELLECTUAL_GENRE));

        List<String> iptcKeywords = Arrays.asList(metadata.getValues(IPTC.KEYWORDS));
        assertTrue(iptcKeywords.contains("stream"));
        assertTrue(iptcKeywords.contains("park"));
        assertTrue(iptcKeywords.contains("bank"));
        assertEquals(5, iptcKeywords.size());
        List<String> tikaKeywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
        assertTrue(Arrays.toString(tikaKeywords.toArray()).contains("stream"));
        assertTrue(Arrays.toString(tikaKeywords.toArray()).contains("park"));
        assertTrue(Arrays.toString(tikaKeywords.toArray()).contains("bank"));
        assertEquals(5, tikaKeywords.size());

        assertEquals("DC", metadata.get(IPTC.PROVINCE_OR_STATE));

        List<String> iptcSceneCode = Arrays.asList(metadata.getValues(IPTC.SCENE_CODE));
        assertEquals(2, iptcSceneCode.size());
        assertTrue(Arrays.toString(iptcSceneCode.toArray()).contains("iptc scene 1"));
        assertTrue(Arrays.toString(iptcSceneCode.toArray()).contains("iptc scene 2"));

        List<String> iptcSubjectCode = Arrays.asList(metadata.getValues(IPTC.SUBJECT_CODE));
        assertEquals(2, iptcSubjectCode.size());
        assertTrue(Arrays.toString(iptcSubjectCode.toArray()).contains("iptc subject code 1"));
        assertTrue(Arrays.toString(iptcSubjectCode.toArray()).contains("iptc subject code 2"));

        assertEquals("Rock Creek Park", metadata.get(IPTC.SUBLOCATION));

        GregorianCalendar calendar = new GregorianCalendar();
        calendar.set(Calendar.YEAR, 2011);
        calendar.set(Calendar.MONTH, 7);
        calendar.set(Calendar.DATE, 31);
        calendar.set(Calendar.HOUR_OF_DAY, 12);
        calendar.set(Calendar.MINUTE, 0);
        calendar.set(Calendar.SECOND, 0);
        calendar.set(Calendar.MILLISECOND, 0);
        calendar.setTimeZone(TimeZone.getTimeZone("UTC"));
        assertEquals(calendar.getTime(), metadata.getDate(IPTC.DATE_CREATED));

        assertEquals("Ray Gauss II", metadata.get(IPTC.DESCRIPTION_WRITER));
        assertEquals("instructions", metadata.get(IPTC.INSTRUCTIONS));
        assertEquals("job identifier", metadata.get(IPTC.JOB_ID));
        assertEquals("Downstream", metadata.get(IPTC.TITLE));
        assertTrue(metadata.get(IPTC.COPYRIGHT_NOTICE).contains("Ray Gauss II"));

        List<String> creators = Arrays.asList(metadata.getValues(IPTC.CREATOR));
        assertTrue(Arrays.toString(creators.toArray()).contains("Ray Gauss II"));

        assertEquals("DAM Architect", metadata.get(IPTC.CREATORS_JOB_TITLE));
        assertEquals("provider", metadata.get(IPTC.CREDIT_LINE));
        assertEquals("rights usage terms", metadata.get(IPTC.RIGHTS_USAGE_TERMS));
        assertEquals("source", metadata.get(IPTC.SOURCE));
        assertEquals("1234 Some Road", metadata.get(IPTC.CONTACT_INFO_ADDRESS));
        assertEquals("Atlanta", metadata.get(IPTC.CONTACT_INFO_CITY));
        assertEquals("US", metadata.get(IPTC.CONTACT_INFO_COUNTRY));

        List<String> ciWorkEmails = Arrays.asList(metadata.getValues(IPTC.CONTACT_INFO_EMAIL));
        // Photoshop does not support true multi-value here
        assertTrue(Arrays.toString(ciWorkEmails.toArray()).contains("info@alfresco.com"));
        assertTrue(Arrays.toString(ciWorkEmails.toArray()).contains("other@example.com"));

        List<String> ciWorkTels = Arrays.asList(metadata.getValues(IPTC.CONTACT_INFO_PHONE));
        // Photoshop does not support true multi-value here
        assertTrue(Arrays.toString(ciWorkTels.toArray()).contains("555-1234"));
        assertTrue(Arrays.toString(ciWorkTels.toArray()).contains("555-4321"));

        assertEquals("30339", metadata.get(IPTC.CONTACT_INFO_POSTAL_CODE));
        assertEquals("GA", metadata.get(IPTC.CONTACT_INFO_STATE_PROVINCE));

        List<String> ciWorkUrls = Arrays.asList(metadata.getValues(IPTC.CONTACT_INFO_WEB_URL));
        // Photoshop does not support true multi-value here
        assertTrue(Arrays.toString(ciWorkUrls.toArray()).contains("http://alfresco.com"));
        assertTrue(Arrays.toString(ciWorkUrls.toArray()).contains("http://example.com"));

        assertEquals("rocky 1 and rocky 2 are big", metadata.get(IPTC.ADDITIONAL_MODEL_INFO));

        List<String> orgCodes = Arrays.asList(metadata.getValues(IPTC.ORGANISATION_CODE));
        assertEquals(2, orgCodes.size());
        assertEquals("ASPP", orgCodes.get(0));
        assertEquals("OTHER_ORG", orgCodes.get(1));

        // List<String> cvTerms = Arrays.asList(metadata.getValues(IPTC.CONTROLLED_VOCABULARY_TERM));

        List<String> modelAges = Arrays.asList(metadata.getValues(IPTC.MODEL_AGE));
        assertEquals(2, modelAges.size());
        assertEquals("1000", modelAges.get(0));
        assertEquals("1001", modelAges.get(1));

        List<String> orgNames = Arrays.asList(metadata.getValues(IPTC.ORGANISATION_NAME));
        assertEquals(2, orgNames.size());
        assertEquals("ASPP", orgNames.get(0));
        assertEquals("Other Org", orgNames.get(1));

        List<String> peopleShown = Arrays.asList(metadata.getValues(IPTC.PERSON));
        assertEquals(2, peopleShown.size());
        assertEquals("rocky 1", peopleShown.get(0));
        assertEquals("rocky 2", peopleShown.get(1));

        assertEquals("http://cv.iptc.org/newscodes/digitalsourcetype/digitalCapture",
                metadata.get(IPTC.DIGITAL_SOURCE_TYPE));
        assertEquals("Photo Bike Tour", metadata.get(IPTC.EVENT));

        assertEquals("RGAUSS", metadata.get(IPTC.IMAGE_SUPPLIER_ID));
        assertEquals("Ray Gauss II", metadata.get(IPTC.IMAGE_SUPPLIER_NAME));
        assertEquals("supplier image ID", metadata.get(IPTC.IMAGE_SUPPLIER_IMAGE_ID));
        assertEquals("3456", metadata.get(IPTC.MAX_AVAIL_HEIGHT));
        assertEquals("5184", metadata.get(IPTC.MAX_AVAIL_WIDTH));
        assertEquals("1.2.0", metadata.get(IPTC.PLUS_VERSION));

        List<String> copyrightOwnerIds = Arrays.asList(metadata.getValues(IPTC.COPYRIGHT_OWNER_ID));
        assertEquals(1, copyrightOwnerIds.size());
        assertEquals("RGAUSS", copyrightOwnerIds.get(0));
        // assertEquals("", copyrightOwnerIds.get(1)); // TODO: Get ExifTool to preserve empty values

        List<String> copyrightOwnerNames = Arrays.asList(metadata.getValues(IPTC.COPYRIGHT_OWNER_NAME));
        assertEquals(2, copyrightOwnerNames.size());
        assertEquals("Ray Gauss II", copyrightOwnerNames.get(0));
        assertEquals("GG", copyrightOwnerNames.get(1));

        List<String> imageCreatorIds = Arrays.asList(metadata.getValues(IPTC.IMAGE_CREATOR_ID));
        assertEquals(1, imageCreatorIds.size());
        assertEquals("RGAUSS", imageCreatorIds.get(0));
        // assertEquals("", imageCreatorIds.get(1)); // TODO: Get ExifTool to preserve empty values

        assertTrue(metadata.isMultiValued(IPTC.IMAGE_CREATOR_NAME));
        List<String> imageCreatorNames = Arrays.asList(metadata.getValues(IPTC.IMAGE_CREATOR_NAME));
        assertEquals(2, imageCreatorNames.size());
        assertEquals("Ray Gauss II", imageCreatorNames.get(0));
        assertEquals("GG", imageCreatorNames.get(1));

        List<String> licensorIds = Arrays.asList(metadata.getValues(IPTC.LICENSOR_ID));
        assertEquals("RGAUSS", licensorIds.get(0));

        assertTrue(metadata.isMultiValued(IPTC.LICENSOR_NAME));
        List<String> licensorNames = Arrays.asList(metadata.getValues(IPTC.LICENSOR_NAME));
        assertEquals(2, licensorNames.size());
        assertEquals("Ray Gauss II", licensorNames.get(0));
        assertEquals("GG", licensorNames.get(1));

        // Photoshop does not support licensor addresses, cities, or countries

        List<String> licensorEmails = Arrays.asList(metadata.getValues(IPTC.LICENSOR_EMAIL));
        assertEquals("r@example.com", licensorEmails.get(0));
        // assertEquals("", licensorEmails.get(1)); // TODO: Get ExifTool to preserve empty values
        List<String> licensorTel1 = Arrays.asList(metadata.getValues(IPTC.LICENSOR_TELEPHONE_1));
        assertEquals("555-5555", licensorTel1.get(0));
        // assertEquals("", licensorTel1.get(1)); // TODO: Get ExifTool to preserve empty values
        List<String> licensorTel2 = Arrays.asList(metadata.getValues(IPTC.LICENSOR_TELEPHONE_2));
        assertEquals("555-4444", licensorTel2.get(0));
        // assertEquals("", licensorTel2.get(1)); // TODO: Get ExifTool to preserve empty values
        List<String> licensorUrls = Arrays.asList(metadata.getValues(IPTC.LICENSOR_URL));
        assertEquals("http://rgauss.com", licensorUrls.get(0));
        // assertEquals("", licensorUrls.get(1)); // TODO: Get ExifTool to preserve empty values

        assertEquals("Age Unknown", metadata.get(IPTC.MINOR_MODEL_AGE_DISCLOSURE));
        List<String> modelReleaseIds = Arrays.asList(metadata.getValues(IPTC.MODEL_RELEASE_ID));
        assertEquals("model release id 1", modelReleaseIds.get(0));
        assertEquals("model release id 2", modelReleaseIds.get(1));
        assertEquals("Not Applicable", metadata.get(IPTC.MODEL_RELEASE_STATUS));

        List<String> propertyReleaseIds = Arrays.asList(metadata.getValues(IPTC.PROPERTY_RELEASE_ID));
        assertEquals("prop release id 1", propertyReleaseIds.get(0));
        assertEquals("prop release id 2", propertyReleaseIds.get(1));
        assertEquals("Not Applicable", metadata.get(IPTC.PROPERTY_RELEASE_STATUS));

        List<String> aoCopyright = Arrays
                .asList(metadata.getValues(IPTC.ARTWORK_OR_OBJECT_DETAIL_COPYRIGHT_NOTICE));
        assertEquals("Ray Gauss II", aoCopyright.get(0));
        // assertEquals("", aoCopyright.get(1)); // TODO: Get ExifTool to preserve empty values
        // assertEquals("", aoCopyright.get(2)); // TODO: Get ExifTool to preserve empty values
        List<String> aoCreator = Arrays.asList(metadata.getValues(IPTC.ARTWORK_OR_OBJECT_DETAIL_CREATOR));
        assertEquals("Mother Nature", aoCreator.get(0));
        assertEquals("Man", aoCreator.get(1));
        assertEquals("Mother Nature", aoCreator.get(2));
        List<String> aoDateCreated = Arrays.asList(metadata.getValues(IPTC.ARTWORK_OR_OBJECT_DETAIL_DATE_CREATED));
        assertEquals("1890:01:01", aoDateCreated.get(0));
        // assertEquals("", aoDateCreated.get(1)); // TODO: Get ExifTool to preserve empty values
        assertEquals("1901:02:01", aoDateCreated.get(1));
        // assertEquals("", aoDateCreated.get(2)); // TODO: Get ExifTool to preserve empty values
        List<String> aoSource = Arrays.asList(metadata.getValues(IPTC.ARTWORK_OR_OBJECT_DETAIL_SOURCE));
        assertEquals("National Park Service", aoSource.get(0));
        // assertEquals("", aoSource.get(1)); // TODO: Get ExifTool to preserve empty values
        // assertEquals("", aoSource.get(2)); // TODO: Get ExifTool to preserve empty values
        List<String> aoSourceInventoryNum = Arrays
                .asList(metadata.getValues(IPTC.ARTWORK_OR_OBJECT_DETAIL_SOURCE_INVENTORY_NUMBER));
        assertEquals("123456", aoSourceInventoryNum.get(0));
        // assertEquals("", aoSourceInventoryNum.get(1)); // TODO: Get ExifTool to preserve empty values
        assertEquals("654321", aoSourceInventoryNum.get(1)); // This should be index 2, TODO: Get ExifTool to preserve empty values
        List<String> aoSourceTitles = Arrays.asList(metadata.getValues(IPTC.ARTWORK_OR_OBJECT_DETAIL_TITLE));
        assertEquals("Rock Creek Stream Bank", aoSourceTitles.get(0));
        assertEquals("Pollution", aoSourceTitles.get(1));
        assertEquals("Some Tree", aoSourceTitles.get(2));

        List<String> locationShownCity = Arrays.asList(metadata.getValues(IPTC.LOCATION_SHOWN_CITY));
        assertEquals("Washington", locationShownCity.get(0));
        // assertEquals("", locationShownCity.get(1)); // TODO: Get ExifTool to preserve empty values
        List<String> locationShownCountryCode = Arrays.asList(metadata.getValues(IPTC.LOCATION_SHOWN_COUNTRY_CODE));
        assertEquals("US", locationShownCountryCode.get(0));
        // assertEquals("", locationShownCountryCode.get(1)); // TODO: Get ExifTool to preserve empty values
        List<String> locationShownCountryName = Arrays.asList(metadata.getValues(IPTC.LOCATION_SHOWN_COUNTRY_NAME));
        assertEquals("United States", locationShownCountryName.get(0));
        // assertEquals("", locationShownCountryName.get(1)); // TODO: Get ExifTool to preserve empty values
        List<String> locationShownState = Arrays.asList(metadata.getValues(IPTC.LOCATION_SHOWN_PROVINCE_OR_STATE));
        assertEquals("D.C.", locationShownState.get(0));
        // assertEquals("", locationShownState.get(1)); // TODO: Get ExifTool to preserve empty values
        List<String> locationShownSublocation = Arrays.asList(metadata.getValues(IPTC.LOCATION_SHOWN_SUBLOCATION));
        assertEquals("Rock Creek Park Sub", locationShownSublocation.get(0));
        assertEquals("Stream Section", locationShownSublocation.get(1));
        List<String> locationShownWorldRegion = Arrays.asList(metadata.getValues(IPTC.LOCATION_SHOWN_WORLD_REGION));
        assertEquals("North America", locationShownWorldRegion.get(0));
        // assertEquals("", locationShownWorldRegion.get(1)); // TODO: Get ExifTool to preserve empty values

        assertEquals("Washington", metadata.get(IPTC.LOCATION_CREATED_CITY));
        assertEquals("US", metadata.get(IPTC.LOCATION_CREATED_COUNTRY_CODE));
        assertEquals("United States", metadata.get(IPTC.LOCATION_CREATED_COUNTRY_NAME));
        assertEquals("D.C.", metadata.get(IPTC.LOCATION_CREATED_PROVINCE_OR_STATE));
        assertEquals("Rock Creek Park", metadata.get(IPTC.LOCATION_CREATED_SUBLOCATION));
        assertEquals("North America", metadata.get(IPTC.LOCATION_CREATED_WORLD_REGION));

        assertTrue(IPTC.REGISTRY_ENTRY_CREATED_ORGANISATION_ID.isMultiValuePermitted());
        assertTrue(metadata.isMultiValued(IPTC.REGISTRY_ENTRY_CREATED_ORGANISATION_ID));
        List<String> registryEntryOrgIds = Arrays
                .asList(metadata.getValues(IPTC.REGISTRY_ENTRY_CREATED_ORGANISATION_ID));
        assertEquals(2, registryEntryOrgIds.size());
        assertEquals("PLUS", registryEntryOrgIds.get(0));
        // assertEquals("", registryEntryOrgIds.get(1)); // TODO: Get ExifTool to preserve empty values
        assertEquals("ORG 2", registryEntryOrgIds.get(1)); // This should be index 2, TODO: Get ExifTool to preserve empty values

        assertTrue(IPTC.REGISTRY_ENTRY_CREATED_ORGANISATION_ID.isMultiValuePermitted());
        assertTrue(metadata.isMultiValued(IPTC.REGISTRY_ENTRY_CREATED_ITEM_ID));
        List<String> registryEntryItemIds = Arrays.asList(metadata.getValues(IPTC.REGISTRY_ENTRY_CREATED_ITEM_ID));
        assertEquals(registryEntryItemIds.size(), 3);
        assertEquals("100-ABC-ABC-555", registryEntryItemIds.get(0));
        assertEquals("11223344", registryEntryItemIds.get(1));
        assertEquals("55667788", registryEntryItemIds.get(2));

    }

    public void testJPEGCustomXmp() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
        InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_IPTC_EXT.jpg");
        ArrayList<Property> passthroughXmpProperties = new ArrayList<Property>(2);
        passthroughXmpProperties.add(Property.internalText("XMP-custom:Text"));
        passthroughXmpProperties.add(Property.internalText("XMP-custom:TextML"));
        Parser passthroughParser = new ExiftoolImageParser(null, passthroughXmpProperties);
        passthroughParser.parse(stream, new DefaultHandler(), metadata, new ParseContext());

        assertEquals("customTextField", metadata.get("XMP-custom:Text"));
        assertEquals("customMultilineField", metadata.get("XMP-custom:TextML"));
    }

    public void testJPEG() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
        InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG.jpg");
        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());

        assertEquals("100", metadata.get(TIFF.IMAGE_WIDTH));
        for (String name : metadata.names()) {
            logger.trace("JPEG-- " + name + "=" + metadata.get(name));
        }
    }

    public void testPNGIPTC() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, "image/png");
        InputStream stream = getClass().getResourceAsStream("/test-documents/testPNG_IPTC.png");
        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());

        for (String name : metadata.names()) {
            logger.trace("PNG-- " + name + "=" + metadata.get(name));
        }
        assertEquals("100", metadata.get(TIFF.IMAGE_WIDTH));
        assertEquals("Cat in a garden", metadata.get(IPTC.HEADLINE));
    }

    public void testTIFFIPTC() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
        InputStream stream = getClass().getResourceAsStream("/test-documents/testTIFF_IPTC.tif");
        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());

        for (String name : metadata.names()) {
            logger.trace("TIFF-- " + name + "=" + metadata.get(name));
        }
        List<String> iptcKeywords = Arrays.asList(metadata.getValues(IPTC.KEYWORDS));
        assertTrue(iptcKeywords.contains("garden"));
        assertTrue(iptcKeywords.contains("cat"));
        assertEquals("Cat in a garden", metadata.get(IPTC.HEADLINE));
    }

}