org.carrot2.core.ProcessingComponentSuite.java Source code

Java tutorial

Introduction

Here is the source code for org.carrot2.core.ProcessingComponentSuite.java

Source

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2015, Dawid Weiss, Stanisaw Osiski.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import static com.google.common.collect.Lists.newArrayList;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.carrot2.util.CloseableUtils;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.resource.ResourceLookup;
import org.carrot2.util.simplexml.PersisterHelpers;
import org.simpleframework.xml.ElementList;
import org.simpleframework.xml.Root;
import org.simpleframework.xml.core.Commit;
import org.simpleframework.xml.core.Persister;
import org.simpleframework.xml.strategy.TreeStrategy;

import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;

/**
 * A set of {@link IProcessingComponent}s used in Carrot2 applications.
 */
@Root(name = "component-suite")
public class ProcessingComponentSuite {
    @ElementList(inline = true, required = false, entry = "include")
    ArrayList<ProcessingComponentSuiteInclude> includes;

    @ElementList(name = "sources", entry = "source", required = false)
    private ArrayList<DocumentSourceDescriptor> sources;

    @ElementList(name = "algorithms", entry = "algorithm", required = false)
    private ArrayList<ProcessingComponentDescriptor> algorithms;

    @ElementList(name = "components", entry = "component", required = false)
    private ArrayList<ProcessingComponentDescriptor> otherComponents;

    public ProcessingComponentSuite() {
    }

    public ProcessingComponentSuite(ArrayList<DocumentSourceDescriptor> sources,
            ArrayList<ProcessingComponentDescriptor> algorithms) {
        this.algorithms = algorithms;
        this.sources = sources;
        this.otherComponents = Lists.newArrayList();
    }

    /**
     * Returns the internal list of document sources. Changes to this list will affect the
     * suite.
     */
    public List<DocumentSourceDescriptor> getSources() {
        return sources;
    }

    /**
     * Returns the internal list of algorithms. Changes to this list will affect the
     * suite.
     */
    public List<ProcessingComponentDescriptor> getAlgorithms() {
        return algorithms;
    }

    /**
     * Return a list of other components (not algorithms, not sources).
     */
    public List<ProcessingComponentDescriptor> getOtherComponents() {
        return otherComponents;
    }

    /**
     * Returns all components available in this suite, including data sources, algorithms
     * and any other types.
     */
    public List<ProcessingComponentDescriptor> getComponents() {
        return Lists.newArrayList(Iterables.concat(sources, algorithms, otherComponents));
    }

    /**
     * Replace missing attributes with empty lists.
     */
    @Commit
    private void postDeserialize(Map<Object, Object> session) throws Exception {
        if (sources == null)
            sources = newArrayList();
        if (algorithms == null)
            algorithms = newArrayList();
        if (includes == null)
            includes = newArrayList();
        if (otherComponents == null)
            otherComponents = newArrayList();

        // Acquire contextual resource lookup from the session.
        final ResourceLookup resourceLookup = PersisterHelpers.getResourceLookup(session);

        // Load included suites. Currently, we don't check for cycles.
        final List<ProcessingComponentSuite> suites = Lists.newArrayList();

        for (ProcessingComponentSuiteInclude include : includes) {
            final IResource resource = resourceLookup.getFirst(include.suite);
            if (resource == null) {
                throw new Exception("Could not locate resource: " + include.suite);
            }
            suites.add(deserialize(resource, resourceLookup));
        }

        // Merge sources
        for (ProcessingComponentSuite suite : suites) {
            sources.addAll(suite.getSources());
            algorithms.addAll(suite.getAlgorithms());
            otherComponents.addAll(suite.getOtherComponents());
        }
    }

    /**
     * Deserializes component suite information from an XML stream.
     * 
     * @param resource The resource to be deserialized (must not be null).
     * @param resourceLookup Resource lookup utilities for potential included resources. 
     */
    public static ProcessingComponentSuite deserialize(IResource resource, ResourceLookup resourceLookup)
            throws Exception {
        if (resource == null) {
            throw new IOException("Resource must not be null.");
        }

        final InputStream inputStream = resource.open();
        try {
            if (inputStream == null) {
                throw new IOException("Input stream must not be null.");
            }

            final Persister persister = PersisterHelpers.createPersister(resourceLookup, new TreeStrategy());
            final ProcessingComponentSuite suite = persister.read(ProcessingComponentSuite.class, inputStream);

            // Clear internals related do deserialization
            suite.includes = null;
            return suite;
        } finally {
            CloseableUtils.close(inputStream);
        }
    }

    /**
     * Serializes this component suite as an UTF-8 encoded XML.
     */
    public void serialize(OutputStream stream) throws Exception {
        new Persister().write(this, stream);
    }

    /**
     * Remove components marked as unavailable from the suite.
     * 
     * @see ProcessingComponentDescriptor#isComponentAvailable()
     */
    public List<ProcessingComponentDescriptor> removeUnavailableComponents() {
        ArrayList<ProcessingComponentDescriptor> failed = Lists.newArrayList();
        ProcessingComponentDescriptor p;
        for (Iterator<? extends ProcessingComponentDescriptor> i = Iterators.concat(sources.iterator(),
                algorithms.iterator()); i.hasNext();) {
            p = i.next();
            if (!p.isComponentAvailable()) {
                failed.add(p);
                i.remove();
            }
        }

        return failed;
    }

    /**
     * Returns all processing component configurations available in this suite.
     * 
     * @see Controller#init(Map, ProcessingComponentConfiguration...)
     */
    public ProcessingComponentConfiguration[] getComponentConfigurations() {
        final List<ProcessingComponentDescriptor> components = getComponents();
        final ProcessingComponentConfiguration[] result = new ProcessingComponentConfiguration[components.size()];
        int i = 0;
        for (ProcessingComponentDescriptor processingComponentDescriptor : components) {
            result[i++] = processingComponentDescriptor.getComponentConfiguration();
        }
        return result;
    }
}