com.marklogic.contentpump.utilities.TransformHelper.java Source code

Java tutorial

Introduction

Here is the source code for com.marklogic.contentpump.utilities.TransformHelper.java

Source

/*
 * Copyright 2003-2016 MarkLogic Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.marklogic.contentpump.utilities;

import java.io.InterruptedIOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;

import com.marklogic.contentpump.ContentWithFileNameWritable;
import com.marklogic.contentpump.DatabaseDocumentWithMeta;
import com.marklogic.contentpump.RDFWritable;
import com.marklogic.contentpump.TransformOutputFormat;
import com.marklogic.io.Base64;
import com.marklogic.mapreduce.ContentType;
import com.marklogic.xcc.AdhocQuery;
import com.marklogic.xcc.ContentCapability;
import com.marklogic.xcc.ContentCreateOptions;
import com.marklogic.xcc.ContentPermission;
import com.marklogic.xcc.DocumentRepairLevel;
import com.marklogic.xcc.types.ValueType;

/**
 * Helper class for server-side transform
 * @author ali
 *
 */
public class TransformHelper {
    public static final Log LOG = LogFactory.getLog(TransformHelper.class);
    private static String MAP_ELEM_START_TAG = "<map:map xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" xmlns:xsi"
            + "=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:map=\"http:" + "//marklogic.com/xdmp/map\">";

    private static String getInvokeModuleQuery(String moduleUri, String functionNs, String functionName,
            String functionParam) {
        StringBuilder q = new StringBuilder();
        q.append("xquery version \"1.0-ml\";\n").append("import module namespace hadoop = \"http://marklogic.com")
                .append("/xdmp/hadoop\" at \"/MarkLogic/hadoop.xqy\";\n")
                .append("declare variable $URI as xs:string external;\n")
                .append("declare variable $CONTENT as item() external;\n")
                .append("declare variable $INSERT-OPTIONS as element() external;\n")
                .append("hadoop:transform-and-insert(\"").append(moduleUri).append("\",\"").append(functionNs)
                .append("\",\"").append(functionName).append("\",\"").append(functionParam)
                .append("\", $URI, $CONTENT, $INSERT-OPTIONS)");
        return q.toString();
    }

    private static String getTypeFromMap(String uri) {
        int idx = uri.lastIndexOf(".");
        Text format = null;
        if (idx != -1) {
            String suff = uri.substring(idx + 1, uri.length());
            if (suff.equalsIgnoreCase("xml"))
                return "xml";
            format = (Text) TransformOutputFormat.mimetypeMap.get(new Text(suff));
        }
        if (format == null) {
            return "binary";
        } else {
            return format.toString();
        }
    }

    public static String constructQryString(String moduleUri, String functionNs, String functionName,
            String functionParam) {
        String q = getInvokeModuleQuery(moduleUri, functionNs, functionName, functionParam);
        if (LOG.isDebugEnabled()) {
            LOG.debug(q);
        }
        return q;
    }

    /**
     * for Import all file types except archive.
     *  
     * @param conf
     * @param query
     * @param moduleUri
     * @param functionNs
     * @param functionName
     * @param functionParam
     * @param uri
     * @param value
     * @param type
     * @param cOptions
     * @return
     * @throws InterruptedIOException
     * @throws UnsupportedEncodingException
     */
    public static AdhocQuery getTransformInsertQry(Configuration conf, AdhocQuery query, String moduleUri,
            String functionNs, String functionName, String functionParam, String uri, Object value, String type,
            ContentCreateOptions cOptions) throws InterruptedIOException, UnsupportedEncodingException {
        HashMap<String, String> optionsMap = new HashMap<String, String>();

        query.setNewStringVariable("URI", uri);
        ContentType contentType = ContentType.valueOf(type);
        if (contentType == ContentType.MIXED) {
            // get type from mimetype map
            contentType = ContentType.forName(getTypeFromMap(uri));
        }

        switch (contentType) {
        case BINARY:
            query.setNewVariable("CONTENT", ValueType.XS_BASE64_BINARY,
                    Base64.encodeBytes(((BytesWritable) value).getBytes(), 0, ((BytesWritable) value).getLength()));
            optionsMap.put("value-type", ValueType.XS_BASE64_BINARY.toString());
            break;

        case TEXT:
            if (value instanceof BytesWritable) {
                // in MIXED type, value is byteswritable
                String encoding = cOptions.getEncoding();
                query.setNewStringVariable("CONTENT", new String(((BytesWritable) value).getBytes(), 0,
                        ((BytesWritable) value).getLength(), encoding));
            } else {
                // must be text or xml
                query.setNewStringVariable("CONTENT", ((Text) value).toString());
            }
            optionsMap.put("value-type", ValueType.TEXT.toString());
            break;
        case JSON:
        case XML:
            if (value instanceof BytesWritable) {
                // in MIXED type, value is byteswritable
                String encoding = cOptions.getEncoding();
                query.setNewStringVariable("CONTENT", new String(((BytesWritable) value).getBytes(), 0,
                        ((BytesWritable) value).getLength(), encoding));
            } else if (value instanceof RDFWritable) {
                //RDFWritable's value is Text
                query.setNewStringVariable("CONTENT", ((RDFWritable) value).getValue().toString());
            } else if (value instanceof ContentWithFileNameWritable) {
                query.setNewStringVariable("CONTENT", ((ContentWithFileNameWritable) value).getValue().toString());
            } else {
                // must be text or xml
                query.setNewStringVariable("CONTENT", ((Text) value).toString());
            }
            optionsMap.put("value-type", ValueType.XS_STRING.toString());
            break;
        case MIXED:
        case UNKNOWN:
            throw new InterruptedIOException("Unexpected:" + contentType);
        default:
            throw new UnsupportedOperationException("invalid type:" + contentType);
        }
        String namespace = cOptions.getNamespace();
        if (namespace != null) {
            optionsMap.put("namespace", namespace);
        }
        String lang = cOptions.getLanguage();
        if (lang != null) {
            optionsMap.put("language", "default-language=" + lang);
        }
        ContentPermission[] perms = cOptions.getPermissions();
        StringBuilder rolesReadList = new StringBuilder();
        StringBuilder rolesExeList = new StringBuilder();
        StringBuilder rolesUpdateList = new StringBuilder();
        StringBuilder rolesInsertList = new StringBuilder();
        if (perms != null && perms.length > 0) {
            for (ContentPermission cp : perms) {
                String roleName = cp.getRole();
                if (roleName == null || roleName.isEmpty()) {
                    LOG.error("Illegal role name: " + roleName);
                    continue;
                }
                ContentCapability cc = cp.getCapability();
                if (cc.equals(ContentCapability.READ)) {
                    if (rolesReadList.length() != 0) {
                        rolesReadList.append(",");
                    }
                    rolesReadList.append(roleName);
                } else if (cc.equals(ContentCapability.EXECUTE)) {
                    if (rolesExeList.length() != 0) {
                        rolesExeList.append(",");
                    }
                    rolesExeList.append(roleName);
                } else if (cc.equals(ContentCapability.INSERT)) {
                    if (rolesInsertList.length() != 0) {
                        rolesInsertList.append(",");
                    }
                    rolesInsertList.append(roleName);
                } else if (cc.equals(ContentCapability.UPDATE)) {
                    if (rolesUpdateList.length() != 0) {
                        rolesUpdateList.append(",");
                    }
                    rolesUpdateList.append(roleName);
                }
            }
        }
        optionsMap.put("roles-read", rolesReadList.toString());
        optionsMap.put("roles-execute", rolesExeList.toString());
        optionsMap.put("roles-update", rolesUpdateList.toString());
        optionsMap.put("roles-insert", rolesInsertList.toString());

        String[] collections = cOptions.getCollections();
        StringBuilder sb = new StringBuilder();
        if (collections != null || value instanceof ContentWithFileNameWritable) {
            if (collections != null) {
                for (int i = 0; i < collections.length; i++) {
                    if (i != 0)
                        sb.append(",");
                    sb.append(collections[i].trim());
                }
            }

            if (value instanceof ContentWithFileNameWritable) {
                if (collections != null)
                    sb.append(",");
                sb.append(((ContentWithFileNameWritable) value).getFileName());
            }

            optionsMap.put("collections", sb.toString());
        }

        optionsMap.put("quality", String.valueOf(cOptions.getQuality()));
        DocumentRepairLevel repairLevel = cOptions.getRepairLevel();
        if (!DocumentRepairLevel.DEFAULT.equals(repairLevel)) {
            optionsMap.put("xml-repair-level", "repair-" + repairLevel);
        }

        String optionElem = mapToElement(optionsMap);
        query.setNewVariable("INSERT-OPTIONS", ValueType.ELEMENT, optionElem);
        return query;
    }

    /**
     * Get transform and insert query for MarkLogicDocumentWithMeta, 
     * used in importing archive, copy
     * @param conf
     * @param session
     * @param moduleUri
     * @param functionNs
     * @param functionName
     * @param functionParam
     * @param uri
     * @param doc
     * @param cOptions
     * @return
     * @throws InterruptedIOException
     * @throws UnsupportedEncodingException
     */
    public static AdhocQuery getTransformInsertQryMLDocWithMeta(Configuration conf, AdhocQuery query,
            String moduleUri, String functionNs, String functionName, String functionParam, String uri,
            DatabaseDocumentWithMeta doc, ContentCreateOptions cOptions)
            throws InterruptedIOException, UnsupportedEncodingException {
        HashMap<String, String> optionsMap = new HashMap<String, String>();

        query.setNewStringVariable("URI", uri);
        ContentType contentType = doc.getContentType();
        switch (contentType) {
        case BINARY:
            query.setNewVariable("CONTENT", ValueType.XS_BASE64_BINARY,
                    Base64.encodeBytes(doc.getContentAsByteArray()));
            optionsMap.put("value-type", ValueType.XS_BASE64_BINARY.toString());
            break;
        case TEXT:
            query.setNewStringVariable("CONTENT", doc.getContentAsText().toString());

            optionsMap.put("value-type", ValueType.TEXT.toString());
            break;
        case JSON:
        case XML:
            query.setNewStringVariable("CONTENT", doc.getContentAsText().toString());

            optionsMap.put("value-type", ValueType.XS_STRING.toString());
            break;
        default:
            throw new UnsupportedOperationException("invalid type:" + contentType);
        }
        String namespace = cOptions.getNamespace();
        if (namespace != null) {
            optionsMap.put("namespace", namespace);
        }
        String lang = cOptions.getLanguage();
        if (lang != null) {
            optionsMap.put("language", "default-language=" + lang);
        }
        ContentPermission[] perms = cOptions.getPermissions();
        StringBuilder rolesReadList = new StringBuilder();
        StringBuilder rolesExeList = new StringBuilder();
        StringBuilder rolesUpdateList = new StringBuilder();
        StringBuilder rolesInsertList = new StringBuilder();
        if (perms != null && perms.length > 0) {
            for (ContentPermission cp : perms) {
                String roleName = cp.getRole();
                if (roleName == null || roleName.isEmpty()) {
                    LOG.error("Illegal role name: " + roleName);
                    continue;
                }
                ContentCapability cc = cp.getCapability();
                if (cc.equals(ContentCapability.READ)) {
                    if (rolesReadList.length() != 0) {
                        rolesReadList.append(",");
                    }
                    rolesReadList.append(roleName);
                } else if (cc.equals(ContentCapability.EXECUTE)) {
                    if (rolesExeList.length() != 0) {
                        rolesExeList.append(",");
                    }
                    rolesExeList.append(roleName);
                } else if (cc.equals(ContentCapability.INSERT)) {
                    if (rolesInsertList.length() != 0) {
                        rolesInsertList.append(",");
                    }
                    rolesInsertList.append(roleName);
                } else if (cc.equals(ContentCapability.UPDATE)) {
                    if (rolesUpdateList.length() != 0) {
                        rolesUpdateList.append(",");
                    }
                    rolesUpdateList.append(roleName);
                }
            }
        }
        optionsMap.put("roles-read", rolesReadList.toString());
        optionsMap.put("roles-execute", rolesExeList.toString());
        optionsMap.put("roles-update", rolesUpdateList.toString());
        optionsMap.put("roles-insert", rolesInsertList.toString());

        String[] collections = cOptions.getCollections();
        if (collections != null) {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < collections.length; i++) {
                if (i != 0)
                    sb.append(",");
                sb.append(collections[i].trim());
            }
            optionsMap.put("collections", sb.toString());
        }

        optionsMap.put("quality", String.valueOf(cOptions.getQuality()));
        DocumentRepairLevel repairLevel = cOptions.getRepairLevel();
        if (!DocumentRepairLevel.DEFAULT.equals(repairLevel)) {
            optionsMap.put("xml-repair-level", "repair-" + repairLevel);
        }

        String optionElem = mapToElement(optionsMap);
        query.setNewVariable("INSERT-OPTIONS", ValueType.ELEMENT, optionElem);
        return query;
    }

    private static String mapToElement(HashMap<String, String> map) {
        StringBuilder sb = new StringBuilder();
        sb.append(MAP_ELEM_START_TAG);
        Set<String> keys = map.keySet();
        for (String k : keys) {
            addKeyValue(sb, k, map.get(k));
        }
        sb.append("</map:map>");
        return sb.toString();
    }

    private static void addKeyValue(StringBuilder sb, String key, String value) {
        sb.append("<map:entry key=\"").append(key).append("\"><map:value xsi:type=\"xs:string\">").append(value)
                .append("</map:value></map:entry>");
    }
}