org.ambraproject.filestore.impl.RepoObjectIDMapper.java Source code

Introduction

Here is the source code for org.ambraproject.filestore.impl.RepoObjectIDMapper.java
Source

/*
 * $HeadURL: http://ambraproject.org/svn/ambra/branches/ambra-2.2/ambra/plos-commons/file-store/src/main/java/org/plos/filestore/FSIDMapper.java $
 * $Id: FSIDMapper.java 9699 2011-10-24 21:45:08Z wtoconnor $
 *
 * Copyright (c) 2006-2011 by Public Library of Science
 *     http://plos.org
 *     http://ambraproject.org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ambraproject.filestore.impl;

import org.ambraproject.filestore.ObjectIDMapper;
import org.apache.commons.io.FilenameUtils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * The RepoObjectIDMapper is used to map PLoS DOI's to file store identifiers. Currently we only support
 * DOI's specific to PLoS but methods can be added here to handle other identifiers that may need
 * mappings ie PMED ID's.
 *
 * @author Bill OConnor
 * @author Joe Osowski
 *
 * WTO: To Do - need to look at handling image article DOI's correctly ... ignoring that for now.
 * WTO: Need to add prefix to interface to handle non PLoS DOI's.
 */
public class RepoObjectIDMapper extends ObjectIDMapper {

    // These regex patterns match 6 versions of PLoS DOI's currently in use.
    // WTO: need to match image articles...
    private static Pattern p1 = Pattern
            .compile("(info:doi)/([0-9\\.]+)/journal\\.([a-z]+)\\.([0-9]+)([\\._a-z0-9]*)");
    private static Pattern p2 = Pattern
            .compile("(info%3Adoi)%2F([0-9\\.]+)%2Fjournal\\.([a-z]+)\\.([0-9]+)([\\._a-z0-9]*)");
    private static Pattern p3 = Pattern
            .compile("(info:doi)/([0-9\\.]+)/(image\\.[a-z]+\\.v[0-9]+\\.i[0-9]+)([\\._a-z0-9]*)");
    private static Pattern p4 = Pattern
            .compile("(info:doi)/([0-9\\.]+)/journal\\.(image\\.[a-z]+\\.v[0-9]+\\.i[0-9]+)([\\._a-z0-9]*)");
    //I make some assumptions here on length of the annotation URIs.
    private static Pattern p5 = Pattern.compile("(info:doi)/([0-9\\.]+/annotation)/([a-z0-9\\-]{36})$");
    private static Pattern p6 = Pattern
            .compile("(info:doi)/([0-9\\.]+/annotation)/([a-z0-9\\-]{36})([\\.\\-a-z0-9]*)");

    /*
     * Given a PLoS DOI and file type return an FSID string. Since we are talking
     * to the object store, the references should be mostly unmodified.
     *
     * @param prefix - DOI
     * @param type - file type ie pdf, xml etc.
     * @return - a files store identifier string.
     */
    //  @Override
    public String doiTofsid(String doi, String type) {
        return doi.replaceFirst("^(info:doi)/", "") + "." + type;
    }

    public String zipToFSID(String doi, String fileName) {
        doi = doi.toLowerCase();

        Matcher m1 = p1.matcher(doi);
        if (m1.matches()) {
            String name = FilenameUtils.removeExtension(fileName);
            String ext = FilenameUtils.getExtension(fileName).toUpperCase();
            fileName = name + (ext != null && !ext.isEmpty() ? "." + ext : "");
            return m1.group(2) + "/journal." + fileName;
        }

        // TODO: handle other cases, like annotation
        return doi + "/" + fileName;
    }

}