fr.ortolang.diffusion.store.binary.BinaryStoreServiceBean.java Source code

Java tutorial

Introduction

Here is the source code for fr.ortolang.diffusion.store.binary.BinaryStoreServiceBean.java

Source

package fr.ortolang.diffusion.store.binary;

/*
 * #%L
 * ORTOLANG
 * A online network structure for hosting language resources and tools.
 * 
 * Jean-Marie Pierrel / ATILF UMR 7118 - CNRS / Universit de Lorraine
 * Etienne Petitjean / ATILF UMR 7118 - CNRS
 * Jrme Blanchard / ATILF UMR 7118 - CNRS
 * Bertrand Gaiffe / ATILF UMR 7118 - CNRS
 * Cyril Pestel / ATILF UMR 7118 - CNRS
 * Marie Tonnelier / ATILF UMR 7118 - CNRS
 * Ulrike Fleury / ATILF UMR 7118 - CNRS
 * Frdric Pierre / ATILF UMR 7118 - CNRS
 * Cline Moro / ATILF UMR 7118 - CNRS
 *  
 * This work is based on work done in the equipex ORTOLANG (http://www.ortolang.fr/), by several Ortolang contributors (mainly CNRTL and SLDR)
 * ORTOLANG is funded by the French State program "Investissements d'Avenir" ANR-11-EQPX-0032
 * %%
 * Copyright (C) 2013 - 2015 Ortolang Team
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 * 
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/lgpl-3.0.html>.
 * #L%
 */

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileStore;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import javax.annotation.PostConstruct;
import javax.annotation.security.PermitAll;
import javax.ejb.Local;
import javax.ejb.Lock;
import javax.ejb.LockType;
import javax.ejb.Singleton;
import javax.ejb.Startup;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;

import org.apache.commons.io.IOUtils;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.jboss.ejb3.annotation.SecurityDomain;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import fr.ortolang.diffusion.OrtolangConfig;
import fr.ortolang.diffusion.store.binary.BinaryStoreContent.Type;
import fr.ortolang.diffusion.store.binary.hash.HashedFilterInputStream;
import fr.ortolang.diffusion.store.binary.hash.HashedFilterInputStreamFactory;
import fr.ortolang.diffusion.store.binary.hash.SHA1FilterInputStreamFactory;

/**
 * Local FileSystem based implementation of the BinaryStoreService.<br>
 * <br>
 * This implementation store all contents in the provided base folder in the local file system using a SHA1 hash generator.
 *
 * @author Jerome Blanchard (jayblanc@gmail.com)
 * @version 1.0
 */
@Local(BinaryStoreService.class)
@Startup
@Singleton(name = BinaryStoreService.SERVICE_NAME)
@SecurityDomain("ortolang")
@Lock(LockType.READ)
@PermitAll
public class BinaryStoreServiceBean implements BinaryStoreService {

    public static final String DEFAULT_BINARY_HOME = "binary-store";
    public static final int DISTINGUISH_SIZE = 2;

    private static final Logger LOGGER = Logger.getLogger(BinaryStoreServiceBean.class.getName());

    private static final String WORK = "work";
    private static final String COLLIDE = "collide";

    private HashedFilterInputStreamFactory factory;
    private Path base;
    private Path working;
    private Path collide;

    private ContentHandler handler;
    private Parser autoDetectParser;

    public BinaryStoreServiceBean() {
    }

    @PostConstruct
    public void init() {
        this.base = Paths.get(OrtolangConfig.getInstance().getHomePath().toString(), DEFAULT_BINARY_HOME);
        this.working = Paths.get(base.toString(), WORK);
        this.collide = Paths.get(base.toString(), COLLIDE);
        this.factory = new SHA1FilterInputStreamFactory();
        LOGGER.log(Level.FINEST, "Initializing service with base folder: " + base);
        try {
            Files.createDirectories(base);
            Files.createDirectories(working);
            Files.createDirectories(collide);
        } catch (Exception e) {
            LOGGER.log(Level.SEVERE, "unable to initialize binary store", e);
        }
        this.handler = new DefaultHandler();
        this.autoDetectParser = new AutoDetectParser();
    }

    public HashedFilterInputStreamFactory getHashedFilterInputStreamFactory() {
        return factory;
    }

    public void setHashedFilterInputStreamFactory(HashedFilterInputStreamFactory factory) {
        this.factory = factory;
    }

    public Path getBase() {
        return base;
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public boolean contains(String identifier) throws BinaryStoreServiceException {
        try {
            Path path = getPathForIdentifier(identifier);
            return Files.exists(path);
        } catch (DataNotFoundException e) {
            return false;
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public InputStream get(String identifier) throws BinaryStoreServiceException, DataNotFoundException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        try {
            return Files.newInputStream(path);
        } catch (Exception e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public File getFile(String identifier) throws BinaryStoreServiceException, DataNotFoundException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        try {
            return path.toFile();
        } catch (Exception e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public long size(String identifier) throws BinaryStoreServiceException, DataNotFoundException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        try {
            return Files.size(path);
        } catch (Exception e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public String type(String identifier) throws BinaryStoreServiceException, DataNotFoundException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        try {
            Tika tika = new Tika();
            return tika.detect(path.toFile());
        } catch (Exception e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public String type(String identifier, String filename)
            throws BinaryStoreServiceException, DataNotFoundException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        try (InputStream is = Files.newInputStream(path)) {
            Tika tika = new Tika();
            String type;
            if (Files.size(path) < 50000000) {
                LOGGER.log(Level.FINEST, "file size is not too large, trying to detect also containers");
                try (TikaInputStream tis = TikaInputStream.get(is)) {
                    type = tika.detect(tis, filename);
                }
            } else {
                LOGGER.log(Level.FINEST, "file size is TOO large, does not detect types inside containers");
                type = tika.detect(is, filename);
            }
            return type;
        } catch (Exception e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public String extract(String identifier) throws BinaryStoreServiceException, DataNotFoundException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        try {
            Tika tika = new Tika();
            tika.setMaxStringLength(20000000);
            return tika.parseToString(path.toFile());
        } catch (Exception e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public String put(InputStream content) throws BinaryStoreServiceException, DataCollisionException {
        try {
            HashedFilterInputStream input = factory.getHashedFilterInputStream(content);
            try {
                Path tmpfile = Paths.get(working.toString(), Long.toString(System.nanoTime()));

                Files.copy(input, tmpfile);
                LOGGER.log(Level.FINE, "content stored in local temporary file: " + tmpfile.toString());
                String hash = input.getHash();
                LOGGER.log(Level.FINE, "content based generated sha1 hash: " + hash);

                String digit = hash.substring(0, DISTINGUISH_SIZE);
                Path volume = Paths.get(base.toString(), BinaryStoreVolumeMapper.getVolume(digit));
                Path parent = Paths.get(base.toString(), BinaryStoreVolumeMapper.getVolume(digit), digit);
                Path file = Paths.get(base.toString(), BinaryStoreVolumeMapper.getVolume(digit), digit, hash);
                if (!Files.exists(volume)) {
                    Files.createDirectory(volume);
                }
                if (!Files.exists(parent)) {
                    Files.createDirectory(parent);
                }
                if (!Files.exists(file)) {
                    Files.move(tmpfile, file);
                    LOGGER.log(Level.FINE, "content moved in local definitive file: " + file.toString());
                } else {
                    LOGGER.log(Level.FINE, "a file with same hash already exists, trying to detect collision");
                    try (InputStream input1 = Files.newInputStream(file);
                            InputStream input2 = Files.newInputStream(tmpfile)) {
                        if (IOUtils.contentEquals(input1, input2)) {
                            Files.delete(tmpfile);
                        } else {
                            LOGGER.log(Level.SEVERE,
                                    "BINARY COLLISION DETECTED - storing colliding files in dedicated folder");
                            Files.copy(file, Paths.get(collide.toString(), hash + ".origin"));
                            Files.move(tmpfile, Paths.get(collide.toString(), hash + ".colliding"));
                            throw new DataCollisionException();
                        }
                    }
                }
                return hash;
            } catch (IOException | VolumeNotFoundException e) {
                throw new BinaryStoreServiceException(e);
            } finally {
                IOUtils.closeQuietly(input);
            }
        } catch (NoSuchAlgorithmException e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public void check(String identifier)
            throws BinaryStoreServiceException, DataNotFoundException, DataCorruptedException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        String check;
        try (InputStream input = Files.newInputStream(path)) {
            check = generate(input);
        } catch (IOException e) {
            throw new BinaryStoreServiceException(e);
        }
        if (!check.equals(identifier)) {
            throw new DataCorruptedException("The object with id [" + identifier
                    + "] is CORRUPTED. The stored object's content has generate a wrong identifier [" + check
                    + "]");
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public void delete(String identifier) throws BinaryStoreServiceException, DataNotFoundException {
        Path path = getPathForIdentifier(identifier);
        if (!Files.exists(path)) {
            throw new DataNotFoundException("Unable to find an object with id [" + identifier + "] in the storage");
        }
        try {
            Files.delete(path);
        } catch (Exception e) {
            throw new BinaryStoreServiceException(e);
        }
    }

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public String generate(InputStream content) throws BinaryStoreServiceException {
        try {
            HashedFilterInputStream input = factory.getHashedFilterInputStream(content);
            byte[] buffer = new byte[10240];
            while (input.read(buffer) >= 0) {
            }
            return input.getHash();
        } catch (Exception e) {
            throw new BinaryStoreServiceException("Unable to generate a hash for this content: " + e.getMessage(),
                    e);
        }
    }

    private Path getPathForIdentifier(String identifier) throws DataNotFoundException {
        String digit = identifier.substring(0, DISTINGUISH_SIZE);
        try {
            return Paths.get(base.toString(), BinaryStoreVolumeMapper.getVolume(digit), digit, identifier);
        } catch (VolumeNotFoundException e) {
            throw new DataNotFoundException(e);
        }
    }

    private long getStoreNbFiles() throws IOException {
        return Files.walk(base).count();
    }

    private long getStoreSize() throws IOException {
        return Files.walk(base).mapToLong(this::size).sum();
    }

    private long size(Path p) {
        try {
            return Files.size(p);
        } catch (Exception e) {
            return 0;
        }
    }

    // System methods

    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public List<BinaryStoreContent> systemBrowse(String name, String prefix) throws BinaryStoreServiceException {
        if (name == null || name.length() == 0) {
            List<BinaryStoreContent> vinfos = new ArrayList<BinaryStoreContent>();
            List<String> vnames = new ArrayList<String>();
            vnames.addAll(BinaryStoreVolumeMapper.listVolumes());
            vnames.add(WORK);
            for (String vname : vnames) {
                try {
                    BinaryStoreContent volume = new BinaryStoreContent();
                    Path vpath = Paths.get(base.toString(), vname);
                    FileStore vstore = Files.getFileStore(vpath);
                    volume.setPath(vname);
                    volume.setType(Type.VOLUME);
                    volume.setFsName(vstore.name());
                    volume.setFsType(vstore.type());
                    volume.setFsTotalSize(vstore.getTotalSpace());
                    volume.setFsFreeSize(vstore.getUsableSpace());
                    volume.setSize(Files.size(vpath));
                    volume.setLastModificationDate(Files.getLastModifiedTime(vpath).toMillis());
                    vinfos.add(volume);
                } catch (IOException e) {
                    LOGGER.log(Level.WARNING,
                            "Unable to retrieve binary store volume information for volume: " + vname);
                }
            }
            return vinfos;
        } else {
            try {
                if (prefix == null) {
                    prefix = "";
                }
                Path vpath = Paths.get(base.toString(), name, prefix);
                if (!Files.exists(vpath)) {
                    throw new BinaryStoreServiceException(
                            "volume name does not point to an existing file or a directory");
                }
                return Files.list(vpath).map(this::pathToContent).collect(Collectors.toList());
            } catch (IOException e) {
                throw new BinaryStoreServiceException(e);
            }
        }
    }

    @Override
    public Metadata parse(String hash)
            throws BinaryStoreServiceException, DataNotFoundException, TikaException, SAXException, IOException {
        try (InputStream inputStream = get(hash)) {
            Metadata metadata = new Metadata();
            ParseContext parseContext = new ParseContext();
            autoDetectParser.parse(inputStream, handler, metadata, parseContext);
            return metadata;
        }
    }

    private BinaryStoreContent pathToContent(Path path) {
        BinaryStoreContent content = new BinaryStoreContent();
        content.setPath(base.relativize(path).toString());
        if (Files.isDirectory(path)) {
            content.setType(Type.DIRECTORY);
        } else {
            content.setType(Type.FILE);
        }
        try {
            content.setSize(Files.size(path));
            content.setLastModificationDate(Files.getLastModifiedTime(path).toMillis());
        } catch (IOException e) {
            LOGGER.log(Level.WARNING, "Unable to retrieve binary store content information for path: " + path);
        }
        return content;
    }

    // Service methods

    @Override
    public String getServiceName() {
        return BinaryStoreService.SERVICE_NAME;
    }

    @Override
    public Map<String, String> getServiceInfos() {
        Map<String, String> infos = new HashMap<String, String>();
        infos.put(INFO_PATH, this.base.toString());
        try {
            infos.put(INFO_FILES, Long.toString(getStoreNbFiles()));
        } catch (Exception e) {
            LOGGER.log(Level.INFO, "unable to collect info: " + INFO_FILES, e);
        }
        try {
            infos.put(INFO_SIZE, Long.toString(getStoreSize()));
        } catch (Exception e) {
            LOGGER.log(Level.INFO, "unable to collect info: " + INFO_SIZE, e);
        }
        return infos;
    }

}