org.lobid.lodmill.RdfModelFileWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.lobid.lodmill.RdfModelFileWriter.java

Source

/* Copyright 2013 Pascal Christoph, hbz.
 * Licensed under the Eclipse Public License 1.0 */

package org.lobid.lodmill;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.NoSuchElementException;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.RDFLanguages;
import org.culturegraph.mf.exceptions.MetafactureException;
import org.culturegraph.mf.framework.DefaultObjectReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hp.hpl.jena.rdf.model.LiteralRequiredException;
import com.hp.hpl.jena.rdf.model.Model;

/**
 * A sink, writing triples into files. The filenames are constructed from the
 * literal of an given property.
 * 
 * @author Pascal Christoph
 */
@Description("Writes the object value of an RDF model into a file. Default serialization is 'NTRIPLES'. The filename is "
        + "constructed from the literal of an given property (recommended properties are identifier)."
        + " Variable are " + "- 'target' (determining the output directory)"
        + "- 'property' (the property in the RDF model. The object of this property"
        + " will be the main part of the file's name.) "
        + "- 'startIndex' ( a subfolder will be extracted out of the filename. This marks the index' beginning )"
        + "- 'stopIndex' ( a subfolder will be extracted out of the filename. This marks the index' end )"
        + "- 'serialization (e.g. one of 'NTRIPLES', 'TURTLE', 'RDFXML','RDFJSON'")
@In(Model.class)
@Out(Void.class)
public final class RdfModelFileWriter extends DefaultObjectReceiver<Model> implements FilenameExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(RdfModelFileWriter.class);

    private FilenameUtil filenameUtil = new FilenameUtil();
    private Lang serialization;

    /**
     * Default constructor
     */
    public RdfModelFileWriter() {
        setProperty("http://purl.org/dc/terms/identifier");
        setFileSuffix("nt");
        setSerialization("NTRIPLES");
    }

    @Override
    public String getEncoding() {
        return filenameUtil.encoding;
    }

    @Override
    public void setEncoding(final String encoding) {
        filenameUtil.encoding = encoding;
    }

    @Override
    public void setTarget(final String target) {
        filenameUtil.target = target;
    }

    @Override
    public void setProperty(final String property) {
        filenameUtil.property = property;
    }

    @Override
    public void setFileSuffix(final String fileSuffix) {
        filenameUtil.fileSuffix = fileSuffix;
    }

    @Override
    public void setStartIndex(final int startIndex) {
        filenameUtil.startIndex = startIndex;
    }

    @Override
    public void setEndIndex(final int endIndex) {
        filenameUtil.endIndex = endIndex;
    }

    /**
     * 
     * @param serialization Sets the serialization format. Default is NTriples.
     */
    public void setSerialization(final String serialization) {
        this.serialization = RDFLanguages.nameToLang(serialization);
    }

    @Override
    public void process(final Model model) {
        String identifier = null;
        try {
            identifier = model.listObjectsOfProperty(model.createProperty(filenameUtil.property)).next().asLiteral()
                    .toString();
            LOG.debug("Going to store identifier=" + identifier);
        } catch (NoSuchElementException e) {
            LOG.warn("No identifier => cannot derive a filename for " + model.toString(), e);
            return;
        } catch (LiteralRequiredException e) {
            LOG.warn("Identifier is a URI. Derive filename from that URI ... " + model.toString(), e);
            identifier = model.listObjectsOfProperty(model.createProperty(filenameUtil.property)).next().toString();
        }

        String directory = identifier;
        if (directory.length() >= filenameUtil.endIndex) {
            directory = directory.substring(filenameUtil.startIndex, filenameUtil.endIndex);
        }
        final String file = FilenameUtils.concat(filenameUtil.target,
                FilenameUtils.concat(directory + File.separator, identifier + "." + filenameUtil.fileSuffix));

        LOG.info("Write to " + file);
        ensurePathExists(file);

        try {
            final Writer writer = new OutputStreamWriter(new FileOutputStream(file), filenameUtil.encoding);
            final StringWriter tripleWriter = new StringWriter();
            RDFDataMgr.write(tripleWriter, model, this.serialization);
            tripleWriter.toString();
            IOUtils.write(tripleWriter.toString(), writer);
            writer.close();
        } catch (IOException e) {
            e.printStackTrace();
            throw new MetafactureException(e);
        }
    }

    private static void ensurePathExists(final String path) {
        final File parent = new File(path).getAbsoluteFile().getParentFile();
        parent.mkdirs();
    }

}