fr.acxio.tools.agia.alfresco.AlfrescoNodeReader.java Source code

Java tutorial

Introduction

Here is the source code for fr.acxio.tools.agia.alfresco.AlfrescoNodeReader.java

Source

package fr.acxio.tools.agia.alfresco;

/*
 * Copyright 2014 Acxio
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.namespace.NamespaceContext;

import org.alfresco.webservice.repository.RepositoryServiceSoapBindingStub;
import org.alfresco.webservice.types.NamedValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemStream;
import org.springframework.batch.item.ItemStreamException;
import org.springframework.batch.item.NonTransientResourceException;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.InitializingBean;

import com.googlecode.sardine.DavResource;
import com.googlecode.sardine.Sardine;

import fr.acxio.tools.agia.alfresco.domain.Aspect;
import fr.acxio.tools.agia.alfresco.domain.Document;
import fr.acxio.tools.agia.alfresco.domain.Folder;
import fr.acxio.tools.agia.alfresco.domain.Node;
import fr.acxio.tools.agia.alfresco.domain.Property;
import fr.acxio.tools.agia.alfresco.domain.QName;

public class AlfrescoNodeReader extends AlfrescoServicesConsumer
        implements ItemReader<Node>, ItemStream, StepExecutionListener, InitializingBean, DisposableBean {

    private static final Logger LOGGER = LoggerFactory.getLogger(AlfrescoNodeReader.class);

    private static final Pattern PATH_EXTRACT_PATTERN = Pattern.compile("^(?:(.*/)[^/]*/|(?:(.*/))?[^/]*)$");
    private static final Pattern CMCONTENT_PATTERN = Pattern.compile("([^|=]+)=([^|=]+)");

    private static final String CONTEXT_KEY_CURRENTINDEXES = "alfresco.reader.currentIndexdes";
    private static final String CONTEXT_KEY_CURRENTPATH = "alfresco.reader.currentPath";

    private static final String SUBPROP_ENCODING = "encoding";
    private static final String SUBPROP_CONTENT_URL = "contentUrl";
    private static final String SUBPROP_MIMETYPE = "mimetype";
    private static final String PROP_CM_CONTENT = "cm:content";
    private static final String WEBDAV_PATH = "webdav";

    private NamespaceContext namespaceContext;
    private DavResourcesResolver davResourcesResolver;
    private String path;

    private String currentDirPath;
    private Deque<Integer> currentIndexes;

    private Sardine sardine;
    private URI baseURI;

    public void setNamespaceContext(NamespaceContext sNamespaceContext) {
        namespaceContext = sNamespaceContext;
    }

    public void setDavResourcesResolver(DavResourcesResolver sDavResourcesResolver) {
        davResourcesResolver = sDavResourcesResolver;
    }

    public void setPath(String sPath) {
        path = sPath;
    }

    @Override
    public void destroy() throws Exception {
        currentDirPath = null;
        currentIndexes = null;
        sardine = null;
        baseURI = null;
    }

    @Override
    public void afterPropertiesSet() throws Exception {
        // TODO Add properties checks
    }

    @Override
    public void beforeStep(StepExecution sStepExecution) {
        // Nothing to do
    }

    @Override
    public ExitStatus afterStep(StepExecution sStepExecution) {
        return ExitStatus.COMPLETED;
    }

    @Override
    public void open(ExecutionContext sExecutionContext) throws ItemStreamException {

        String aFullPath = null;

        try {
            baseURI = new URI(getAlfrescoService().getWebappAddress()).resolve(WEBDAV_PATH);
            aFullPath = getWebDavDirectoryURI(baseURI.getPath() + path).getPath();
        } catch (URISyntaxException e) {
            throw new ItemStreamException(e);
        }

        currentDirPath = sExecutionContext.getString(CONTEXT_KEY_CURRENTPATH, aFullPath);
        Object aCurrentIndexes = sExecutionContext.get(CONTEXT_KEY_CURRENTINDEXES);
        if (aCurrentIndexes == null) {
            currentIndexes = new ArrayDeque<Integer>();
            currentIndexes.addFirst(0);
        } else {
            Integer[] aArray = (Integer[]) aCurrentIndexes;
            currentIndexes = new ArrayDeque<Integer>(Arrays.asList(aArray));
        }

        sardine = getAlfrescoService().startWebDavSession();
    }

    @Override
    public void update(ExecutionContext sExecutionContext) throws ItemStreamException {
        sExecutionContext.putString(CONTEXT_KEY_CURRENTPATH, currentDirPath);
        sExecutionContext.put(CONTEXT_KEY_CURRENTINDEXES, currentIndexes.toArray(new Integer[] {}));
    }

    @Override
    public void close() throws ItemStreamException {
        // Nothing to do
    }

    protected URI getWebDavDirectoryURI(String sAbsolutePath) throws URISyntaxException {
        return new URI(baseURI.getScheme(), baseURI.getUserInfo(), baseURI.getHost(), baseURI.getPort(),
                sAbsolutePath, null, null);
    }

    @Override
    public Node read() throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {

        Node aResult = null;

        init();
        RepositoryServiceSoapBindingStub repositoryService = getAlfrescoService().getRepositoryService();

        List<DavResource> aResources = davResourcesResolver.getDirectoryList(sardine,
                getWebDavDirectoryURI(currentDirPath).toASCIIString());

        boolean isFolderItself;

        do {
            isFolderItself = false;

            boolean hasMore = !currentIndexes.isEmpty();
            int aLength = aResources.size();

            int aCurrentIndex = currentIndexes.removeFirst();

            if ((aLength == 0) || (aCurrentIndex >= aLength)) {
                // Go 1 step upper
                hasMore = !currentIndexes.isEmpty();

                if (hasMore) {
                    aCurrentIndex = currentIndexes.removeFirst() + 1;

                    Matcher aPathMatcher = PATH_EXTRACT_PATTERN.matcher(currentDirPath);
                    if (aPathMatcher.matches()) {
                        currentDirPath = (aPathMatcher.group(1) != null) ? aPathMatcher.group(1)
                                : aPathMatcher.group(2);
                    }

                    aResources = davResourcesResolver.getDirectoryList(sardine,
                            getWebDavDirectoryURI(currentDirPath).toASCIIString());

                    isFolderItself = true; // FIXME : change the name of this
                                           // variable
                }
            } else {
                DavResource aResource = aResources.get(aCurrentIndex);

                if (aResource.isDirectory()) {
                    if (!currentDirPath.equals(aResource.getPath())) {

                        if (LOGGER.isDebugEnabled()) {
                            LOGGER.debug("Row " + aCurrentIndex + ": " + aResource.getPath());
                        }

                        String aPath = aResource.getPath().substring(baseURI.getPath().length());
                        aResult = buildNode(repositoryService, aResource, aPath);

                        // Go 1 step deeper
                        currentDirPath = aResource.getPath();
                        currentIndexes.addFirst(aCurrentIndex);
                        aCurrentIndex = 0;
                        aResources = davResourcesResolver.getDirectoryList(sardine,
                                getWebDavDirectoryURI(currentDirPath).toASCIIString());
                    } else {
                        // Skip current dir (webdav node lists itself)
                        aCurrentIndex++;
                        isFolderItself = true;
                    }
                } else {
                    // Handle content node
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("Row " + aCurrentIndex + ": " + aResource.getPath());
                    }

                    String aPath = aResource.getPath().substring(baseURI.getPath().length());
                    aResult = buildNode(repositoryService, aResource, aPath);

                    aCurrentIndex++;
                }

            }

            if (hasMore) {
                currentIndexes.addFirst(aCurrentIndex);
            }

        } while (isFolderItself);

        return aResult;
    }

    protected Node buildNode(RepositoryServiceSoapBindingStub repositoryService, DavResource aResource,
            String aPath) throws NodePathException {
        Node aResult;
        org.alfresco.webservice.types.Node[] aNodes = getRepositoryMatchingNodes(repositoryService, aPath);

        if (aResource.isDirectory()) {
            Folder aFolder = new Folder();
            aResult = aFolder;
        } else {
            Document aDocument = new Document();
            aResult = aDocument;
        }

        if ((aNodes != null) && (aNodes.length > 0)) {

            aResult.setType(new QName(aNodes[0].getType(), namespaceContext));

            for (NamedValue aProperty : aNodes[0].getProperties()) {
                Property aNodeProperty = new Property();
                aNodeProperty.setName(new QName(aProperty.getName(), namespaceContext));
                aNodeProperty.addValue(aProperty.getValue());
                aResult.addProperty(aNodeProperty);

                if ((PROP_CM_CONTENT.equals(aNodeProperty.getName().getShortName()))
                        && (!aResource.isDirectory())) {
                    Document aDocument = (Document) aResult;
                    Map<String, String> aValues = readCMContent(aProperty.getValue());
                    aDocument.setMimeType(aValues.get(SUBPROP_MIMETYPE));
                    try {
                        aDocument.setContentPath(getWebDavDirectoryURI(aResource.getPath()).toASCIIString());
                    } catch (URISyntaxException e) {
                        throw new NodePathException(e);
                    }
                    aDocument.setEncoding(aValues.get(SUBPROP_ENCODING));
                }
            }

            for (String aAspect : aNodes[0].getAspects()) {
                Aspect aNodeAspect = new Aspect();
                aNodeAspect.setName(new QName(aAspect, namespaceContext));
                aResult.addAspect(aNodeAspect);
            }
        }
        return aResult;
    }

    // 1. Query Nodes via WebDav => no limit on result, but file-like result,
    // without any property (we may use a query if the size of a dir is < 1000)
    // 2. ProcessIndicator will use the NodeRef => Processor can mark node
    // (really necessary ??)
    // 3. Store file index in the job => allow restart and continue (see
    // sbia/ch08/FilesInDirectoryItemReader)
    // 4. NodeProcessor will aggregate nodes into NodeList (Hibernate / Alf /
    // Drive) or tranform nodes into a FieldSet (CSV)

    protected Map<String, String> readCMContent(String sValue) {
        Map<String, String> aResult = new HashMap<String, String>(5);
        Matcher aMatcher = CMCONTENT_PATTERN.matcher(sValue);
        while (aMatcher.find()) {
            aResult.put(aMatcher.group(1), aMatcher.group(2));
        }
        return aResult;
    }
}