com.google.enterprise.connector.db.diffing.LobDocumentBuilder.java Source code

Java tutorial

Introduction

Here is the source code for com.google.enterprise.connector.db.diffing.LobDocumentBuilder.java

Source

// Copyright 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.db.diffing;

import com.google.common.base.Predicate;
import com.google.common.collect.Maps;
import com.google.enterprise.connector.db.DBContext;
import com.google.enterprise.connector.db.DBException;
import com.google.enterprise.connector.db.Util;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.TraversalContext;
import com.google.enterprise.connector.util.InputStreamFactory;
import com.google.enterprise.connector.util.MimeTypeDetector;

import java.io.CharArrayReader;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;

/**
 * Class for transforming database row to JsonDocument.
 */
class LobDocumentBuilder extends DocumentBuilder {
    private static final Logger LOG = Logger.getLogger(LobDocumentBuilder.class.getName());

    private final TraversalContext context;

    /**
     * Maximum document size that connector manager supports, or the
     * maximum size of a byte array (~2 GB), whichever is smaller.
     */
    private final long maxDocSize;

    private final MimeTypeDetector mimeTypeDetector = new MimeTypeDetector();

    protected LobDocumentBuilder(DBContext dbContext, TraversalContext context) {
        super(dbContext);

        this.context = context;
        this.maxDocSize = Math.min(context.maxDocumentSize(), Integer.MAX_VALUE);
    }

    private byte[] getBinaryContent(Object largeObject, String docId) throws SQLException {
        // Check if large object is of type BLOB from the the type of largeObject.
        // If the largeObject is of type java.sql.Blob or byte array means large
        // object is of type BLOB, else it is CLOB.
        byte[] binaryContent;

        if (largeObject instanceof byte[]) {
            binaryContent = (byte[]) largeObject;
            int length = binaryContent.length;
            LOG.info("BLOB Data found");
        } else if (largeObject instanceof Blob) {
            long length = ((Blob) largeObject).length();
            try {
                binaryContent = ((Blob) largeObject).getBytes(1, (int) length);
            } catch (SQLException e) {
                // Try to get byte array of blob content from input stream.
                InputStream contentStream = ((Blob) largeObject).getBinaryStream();
                if (contentStream != null) {
                    binaryContent = Util.getBytes((int) length, contentStream);
                } else {
                    binaryContent = null;
                }
            }
            LOG.info("BLOB Data found");
        } else {
            // Get the value of CLOB as StringBuilder. iBATIS returns char array or
            // String for CLOB data depending upon Database.
            long length;
            Reader clobReader;
            if (largeObject instanceof char[]) {
                length = ((char[]) largeObject).length;
                clobReader = new CharArrayReader((char[]) largeObject);
            } else if (largeObject instanceof String) {
                length = ((String) largeObject).length();
                clobReader = new StringReader((String) largeObject);
            } else if (largeObject instanceof Clob) {
                length = ((Clob) largeObject).length();
                clobReader = ((Clob) largeObject).getCharacterStream();
            } else {
                // It's not a CLOB, but we'll include it anyway.
                String value = largeObject.toString();
                length = value.length();
                clobReader = new StringReader(value);
            }

            if (clobReader != null) {
                binaryContent = Util.getBytes((int) length, clobReader);
            } else {
                binaryContent = null;
            }

            LOG.info("CLOB Data found");
        }
        return binaryContent;
    }

    private byte[] getBytes(Object largeObject, String docId) throws DBException {
        byte[] binaryContent;
        if (largeObject == null) {
            binaryContent = null;
        } else {
            try {
                binaryContent = getBinaryContent(largeObject, docId);
            } catch (SQLException e) {
                throw new DBException("Error retrieving LOB content", e);
            }
        }
        if (binaryContent == null) {
            // TODO(jlacey): The LobTypeHandler strategies log this at a lower level.
            LOG.warning("Content of Document " + docId + " has null value.");
            binaryContent = new byte[0];
        }
        return binaryContent;
    }

    @Override
    protected ContentHolder getContentHolder(Map<String, Object> row, List<String> primaryKey, String docId)
            throws DBException {
        // Get the value of large object from map representing a row.
        Object largeObject = row.get(dbContext.getLobField());

        DigestContentHolder holder;
        if (largeObject instanceof DigestContentHolder) {
            // Custom LOB TypeHandler creates a partial ContentHolder.
            holder = (DigestContentHolder) largeObject;
        } else {
            // TODO(jlacey): This should be dead code with the LOB TypeHandler.
            holder = DigestContentHolder.getInstance(getBytes(largeObject, docId), mimeTypeDetector);
        }

        if (holder.getLength() > maxDocSize) {
            LOG.warning("Size of the document '" + docId + "' is larger than supported");
            holder = DigestContentHolder.getEmptyInstance(holder.getMimeType());
        } else if (context.mimeTypeSupportLevel(holder.getMimeType()) <= 0) {
            LOG.warning("Content MIME type " + holder.getMimeType() + " of the document '" + docId
                    + "' is not supported");
            holder = DigestContentHolder.getEmptyInstance(holder.getMimeType());
        }

        // Finish up calculating the checksum and return the ContentHolder.
        // TODO: Look into which encoding/charset to use for getBytes().
        holder.updateDigest(getXmlDoc(getRowForXmlDoc(row), primaryKey, "").getBytes());
        return holder;
    }

    /**
     * Converts a large Object (BLOB or CLOB) into equivalent JsonDocument.
     */
    @Override
    protected JsonDocument getJsonDocument(DocumentHolder holder) throws DBException {
        JsonObjectUtil jsonObjectUtil = new JsonObjectUtil();

        List<String> skipColumns = new ArrayList<String>();

        skipColumns.add(dbContext.getLobField());

        jsonObjectUtil.setProperty(SpiConstants.PROPNAME_DOCID, holder.docId);

        jsonObjectUtil.setProperty(SpiConstants.PROPNAME_FEEDTYPE, SpiConstants.FeedType.CONTENT.toString());
        jsonObjectUtil.setProperty(SpiConstants.PROPNAME_ACTION, SpiConstants.ActionType.ADD.toString());

        // Set "ispublic" false if authZ query is provided by the user.
        if (dbContext != null && !dbContext.isPublicFeed()) {
            jsonObjectUtil.setProperty(SpiConstants.PROPNAME_ISPUBLIC, "false");
        }

        jsonObjectUtil.setBinaryContent(SpiConstants.PROPNAME_CONTENT,
                (InputStreamFactory) holder.contentHolder.getContent());
        jsonObjectUtil.setProperty(SpiConstants.PROPNAME_MIMETYPE, holder.contentHolder.getMimeType());

        // If connector admin has provided Fetch URL column then use the value of
        // that column as a "Display URL". Else construct the display URL.
        String displayUrl;
        String fetchUrlField = dbContext.getFetchURLField();
        if (fetchUrlField != null) {
            Object fetchUrl = holder.row.get(fetchUrlField);
            if (fetchUrl != null && fetchUrl.toString().trim().length() > 0) {
                displayUrl = fetchUrl.toString().trim();
                skipColumns.add(fetchUrlField);
            } else {
                displayUrl = getDisplayUrl(holder.docId);
            }
        } else {
            displayUrl = getDisplayUrl(holder.docId);
        }
        jsonObjectUtil.setProperty(SpiConstants.PROPNAME_DISPLAYURL, displayUrl);

        skipLastModified(skipColumns, dbContext);
        skipColumns.addAll(holder.primaryKey);
        setLastModified(holder.row, jsonObjectUtil, dbContext);
        setMetaInfo(jsonObjectUtil, holder.row, skipColumns);

        return new JsonDocument(jsonObjectUtil.getProperties(), jsonObjectUtil.getJsonObject());
    }

    /**
     * Returns a filtered Map of the row with the LOB field filtered out.
     *
     * @param row
     * @return map representing a database table row.
     */
    private Map<String, Object> getRowForXmlDoc(Map<String, Object> row) {
        return Maps.filterKeys(row, new Predicate<String>() {
            @Override
            public boolean apply(String key) {
                return !dbContext.getLobField().equals(key);
            }
        });
    }
}