eu.delving.services.core.impl.DataSetImpl.java Source code

Java tutorial

Introduction

Here is the source code for eu.delving.services.core.impl.DataSetImpl.java

Source

/*
 * Copyright 2010 DELVING BV
 *
 * Licensed under the EUPL, Version 1.1 or as soon they
 * will be approved by the European Commission - subsequent
 * versions of the EUPL (the "Licence");
 * you may not use this work except in compliance with the
 * Licence.
 * You may obtain a copy of the Licence at:
 *
 * http://ec.europa.eu/idabc/eupl
 *
 * Unless required by applicable law or agreed to in
 * writing, software distributed under the Licence is
 * distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied.
 * See the Licence for the specific language governing
 * permissions and limitations under the Licence.
 */

package eu.delving.services.core.impl;

import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import eu.delving.metadata.Facts;
import eu.delving.metadata.MetadataNamespace;
import eu.delving.metadata.Path;
import eu.delving.metadata.RecordMapping;
import eu.delving.services.core.MetaRepo;
import eu.delving.services.core.MongoObjectParser;
import eu.delving.services.exceptions.AccessKeyException;
import eu.delving.services.exceptions.MappingNotFoundException;
import eu.delving.services.exceptions.MetaRepoSystemException;
import eu.delving.services.exceptions.RecordParseException;
import eu.delving.sip.DataSetState;
import eu.europeana.sip.core.MappingException;
import org.apache.log4j.Logger;
import org.bson.types.ObjectId;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import static eu.delving.core.util.MongoObject.mob;

/**
 * Implementing the data set interface
 *
 * @author Gerald de Jong <geralddejong@gmail.com>
 */

class DataSetImpl implements MetaRepo.DataSet {
    private static Logger LOG = Logger.getLogger(DataSetImpl.class);
    private ImplFactory implFactory;
    private DBObject object;
    private DBCollection recColl;

    DataSetImpl(ImplFactory implFactory, DBObject object) {
        this.implFactory = implFactory;
        this.object = object;
    }

    private DBCollection records() {
        if (recColl == null) {
            recColl = implFactory.records(getSpec());
        }
        return recColl;
    }

    @Override
    public String getSpec() {
        return (String) object.get(SPEC);
    }

    @Override
    public DBObject getNamespaces() {
        return (DBObject) object.get(NAMESPACES);
    }

    @Override
    public DataSetState getState(boolean fresh) {
        if (fresh) {
            object = implFactory.dataSets().findOne(mob("_id", object.get("_id")));
        }
        return DataSetState.get((String) object.get(DATA_SET_STATE));
    }

    @Override
    public String getErrorMessage() {
        return (String) object.get(ERROR_MESSAGE);
    }

    @Override
    public void setState(DataSetState dataSetState) {
        object.put(DATA_SET_STATE, dataSetState.toString());
        object.removeField(ERROR_MESSAGE);
    }

    @Override
    public void setErrorState(String message) {
        setState(DataSetState.ERROR);
        object.put(ERROR_MESSAGE, message);
    }

    @Override
    public void parseRecords(InputStream inputStream) throws RecordParseException {
        implFactory.removeFirstHarvestSteps(getSpec());
        object.put(SOURCE_HASH, "");
        save();
        records().drop();
        try {
            MetaRepo.Details details = getDetails();
            Facts facts = Facts.read(new ByteArrayInputStream(details.getFacts()));
            MongoObjectParser parser = new MongoObjectParser(inputStream, new Path(facts.getRecordRootPath()),
                    new Path(facts.getUniqueElementPath()), details.getMetadataFormat().getPrefix(),
                    details.getMetadataFormat().getNamespace());
            MongoObjectParser.Record record;
            Date modified = new Date();
            object.put(NAMESPACES, parser.getNamespaces());
            int recordCount = 0;
            while ((record = parser.nextRecord()) != null) {
                record.getMob().put(MetaRepo.Record.MODIFIED, modified);
                record.getMob().put(MetaRepo.Record.DELETED, false);
                records().insert(record.getMob());
                //                records().update( // just match unique values for now
                //                        mob(
                //                                MetaRepo.Record.UNIQUE,
                //                                record.getMob().get(MetaRepo.Record.UNIQUE)
                //                        ),
                //                        record.getMob(),
                //                        true,
                //                        false
                //                );
                recordCount++;
                if (recordCount % 10000 == 0) {
                    LOG.info(String.format("%d Records read, current count %d", recordCount, records().count()));
                }
            }
            LOG.info(String.format("Finally, %d Records read, current count %d", recordCount, records().count()));
            parser.close();
            //            // mark records unmodified by above loop as deleted
            //            records().update(
            //                    mob(
            //                            MetaRepo.Record.MODIFIED,
            //                            mob("$lt", modified)
            //                    ),
            //                    mob(
            //                            "$set",
            //                            mob(MetaRepo.Record.DELETED, true)
            //                    ),
            //                    false,
            //                    true
            //            );
            //            LOG.info(String.format("After marking orphans, %d Records read, current count %d", recordCount, records().count()));
        } catch (Exception e) {
            throw new RecordParseException("Unable to parse records", e);
        }
        object.put(MetaRepo.DataSet.DATA_SET_STATE, DataSetState.UPLOADED.toString());
        save();
    }

    @Override
    public void setMapping(RecordMapping recordMapping, boolean accessKeyRequired) {
        DBObject mappings = (DBObject) object.get(MAPPINGS);
        if (mappings == null) {
            object.put(MAPPINGS, mappings = mob());
        }
        MetadataNamespace mappedNamespace = null;
        for (MetadataNamespace namespace : MetadataNamespace.values()) {
            if (namespace.getPrefix().equals(recordMapping.getPrefix())) {
                mappedNamespace = namespace;
                break;
            }
        }
        if (mappedNamespace == null) {
            throw new MetaRepoSystemException(
                    String.format("Namespace prefix %s not recognized", recordMapping.getPrefix()));
        }
        implFactory.removeFirstHarvestSteps(getSpec());
        String xml = RecordMapping.toXml(recordMapping);
        mappings.put(mappedNamespace.getPrefix(), mob(MetaRepo.Mapping.FORMAT,
                mob(MetaRepo.MetadataFormat.PREFIX, mappedNamespace.getPrefix(), MetaRepo.MetadataFormat.NAMESPACE,
                        mappedNamespace.getUri(), MetaRepo.MetadataFormat.SCHEMA, mappedNamespace.getSchema(),
                        MetaRepo.MetadataFormat.ACCESS_KEY_REQUIRED, accessKeyRequired),
                MetaRepo.Mapping.RECORD_MAPPING, xml));
        save();
    }

    @Override
    public void setFactsHash(String sourceHash) {
        object.put(FACTS_HASH, sourceHash);
    }

    @Override
    public void setSourceHash(String sourceHash, boolean downloaded) {
        object.put(downloaded ? DOWNLOADED_SOURCE_HASH : SOURCE_HASH, sourceHash);
    }

    @Override
    public void setMappingHash(String metadataPrefix, String hash) {
        object.put(MAPPING_HASH_PREFIX + metadataPrefix, hash);
    }

    @Override
    public int getRecordsIndexed() {
        Object obj = object.get(RECORDS_INDEXED);
        if (obj == null) {
            return 0;
        }
        return (Integer) obj;
    }

    @Override
    public void setRecordsIndexed(int count) {
        object.put(RECORDS_INDEXED, count);
    }

    @Override
    public void incrementRecordsIndexed(int increment) {
        implFactory.dataSets().update(mob(MetaRepo.DataSet.SPEC, getSpec()),
                mob("$inc", mob(RECORDS_INDEXED, increment)));
        setRecordsIndexed(getRecordsIndexed() + increment);
    }

    @Override
    public boolean hasDetails() {
        return object.get(DETAILS) != null;
    }

    @Override
    public MetaRepo.Details createDetails() {
        DBObject details = mob();
        object.put(DETAILS, details);
        return new DetailsImpl(details);
    }

    @Override
    public MetaRepo.Details getDetails() {
        Object detailsObject = object.get(DETAILS);
        if (detailsObject == null) {
            throw new MetaRepoSystemException("No Details found");
        }
        return new DetailsImpl((DBObject) detailsObject);
    }

    @Override
    public Map<String, MetaRepo.Mapping> mappings() {
        Map<String, MetaRepo.Mapping> mappingMap = new TreeMap<String, MetaRepo.Mapping>();
        DBObject mappingsObject = (DBObject) object.get(MAPPINGS);
        if (mappingsObject != null) {
            for (String prefix : mappingsObject.keySet()) {
                mappingMap.put(prefix, implFactory.createMapping(this, (DBObject) mappingsObject.get(prefix)));
            }
        }
        return mappingMap;
    }

    @Override
    public int getRecordCount() {
        return (int) records().count();
    }

    @Override
    public MetaRepo.Record getRecord(ObjectId id, String prefix, String accessKey)
            throws MappingNotFoundException, AccessKeyException, MappingException { // if prefix is passed in, mapping can be done
        DBObject rawRecord = records().findOne(mob(MetaRepo.MONGO_ID, id));
        if (rawRecord != null) {
            MetaRepo.Mapping mapping = getMapping(prefix, accessKey);
            List<RecordImpl> list = new ArrayList<RecordImpl>();
            RecordImpl rec = new RecordImpl(records(), rawRecord, getDetails().getMetadataFormat().getPrefix(),
                    getNamespaces());
            System.out.println("Fingerprint {");
            for (Map.Entry<String, Integer> entry : rec.getFingerprint().entrySet()) {
                System.out.println("   " + entry.getKey() + " => " + entry.getValue());
            }
            System.out.println("}");
            list.add(rec);
            if (mapping != null) {
                Map<String, String> namespaces = new TreeMap<String, String>();
                DBObject namespacesObject = getNamespaces();
                for (String nsPrefix : namespacesObject.keySet()) {
                    namespaces.put(nsPrefix, (String) namespacesObject.get(nsPrefix));
                }
                ((MappingInternal) mapping).executeMapping(list, namespaces);
            }
            return list.isEmpty() ? null : list.get(0);
        }
        return null;
    }

    @Override
    public RecordFetch getRecords(String prefix, int count, Date from, ObjectId afterId, Date until,
            String accessKey) throws MappingNotFoundException, AccessKeyException, MappingException {
        MetaRepo.Mapping mapping = getMapping(prefix, accessKey);
        final List<RecordImpl> list = new ArrayList<RecordImpl>();
        DBCursor cursor = createCursor(from, afterId, until).limit(count).sort(mob(MetaRepo.MONGO_ID, 1));
        while (cursor.hasNext()) {
            DBObject object = cursor.next();
            list.add(new RecordImpl(records(), object, getDetails().getMetadataFormat().getPrefix(),
                    getNamespaces()));
        }
        if (list.isEmpty()) {
            return null;
        } else {
            final ObjectId nextAfterId = list.get(list.size() - 1).getId();
            if (mapping != null) {
                Map<String, String> namespaces = new TreeMap<String, String>();
                DBObject namespacesObject = (DBObject) object.get(NAMESPACES);
                for (String nsPrefix : namespacesObject.keySet()) {
                    namespaces.put(nsPrefix, (String) namespacesObject.get(nsPrefix));
                }
                ((MappingInternal) mapping).executeMapping(list, namespaces); // can remove members when records don't validate
            }
            return new RecordFetch() {

                @Override
                public List<? extends MetaRepo.Record> getRecords() {
                    return list;
                }

                @Override
                public ObjectId getAfterId() {
                    return nextAfterId;
                }
            };
        }
    }

    @Override
    public List<String> getHashes() {
        List<String> hashes = new ArrayList<String>();
        addHash(FACTS_HASH, hashes);
        addHash(SOURCE_HASH, hashes);
        addHash(DOWNLOADED_SOURCE_HASH, hashes);
        for (String metadataPrefix : implFactory.getMetadataModel().getPrefixes()) {
            addHash(MAPPING_HASH_PREFIX + metadataPrefix, hashes);
        }
        return hashes;
    }

    @Override
    public void save() {
        implFactory.dataSets().save(object);
    }

    @Override
    public void delete() {
        records().drop();
        implFactory.dataSets().remove(object);
    }

    @Override
    public String toString() {
        return String.format("DataSet(%s)", getSpec());
    }

    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (o == null || getClass() != o.getClass())
            return false;
        DataSetImpl dataSet = (DataSetImpl) o;
        return getSpec().equals(dataSet.getSpec());
    }

    @Override
    public int hashCode() {
        return getSpec().hashCode();
    }

    private void addHash(String hashAttribute, List<String> hashes) {
        String hash = (String) object.get(hashAttribute);
        if (hash != null) {
            hashes.add(hash);
        }
    }

    private MetaRepo.Mapping getMapping(String prefix, String key)
            throws AccessKeyException, MappingNotFoundException {
        MetaRepo.Mapping mapping;
        if (getDetails().getMetadataFormat().getPrefix().equals(prefix)) {
            mapping = null;
            if (getDetails().getMetadataFormat().isAccessKeyRequired()
                    && !implFactory.getAccessKey().checkKey(key)) {
                throw new AccessKeyException(
                        String.format("Raw metadata format requires access key, but %s is not valid", key));
            }
        } else {
            mapping = mappings().get(prefix);
            if (mapping == null) {
                throw new MappingNotFoundException(String.format("No mapping found to prefix %s", prefix));
            }
            if (mapping.getMetadataFormat().isAccessKeyRequired() && !implFactory.getAccessKey().checkKey(key)) {
                throw new AccessKeyException(
                        String.format("Mapping to metadata format requires access key, but %s is not valid", key));
            }
        }
        return mapping;
    }

    private DBCursor createCursor(Date from, ObjectId afterId, Date until) {
        DBObject query = mob();
        if (from != null) {
            query.put(MetaRepo.Record.MODIFIED, mob("$gte", from));
        }
        if (afterId != null) {
            query.put(MetaRepo.MONGO_ID, mob("$gt", afterId));
        }
        if (until != null) {
            query.put(MetaRepo.Record.MODIFIED, mob("$lte", until));
        }
        return records().find(query);
    }
}