com.google.enterprise.connector.db.diffing.JsonDocument.java Source code

Java tutorial

Introduction

Here is the source code for com.google.enterprise.connector.db.diffing.JsonDocument.java

Source

// Copyright 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.db.diffing;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.enterprise.connector.db.InputStreamFactories;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.Property;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SimpleProperty;
import com.google.enterprise.connector.spi.SkippedDocumentException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.TraversalContext;
import com.google.enterprise.connector.spi.Value;
import com.google.enterprise.connector.util.InputStreamFactory;
import com.google.enterprise.connector.util.diffing.SnapshotRepositoryRuntimeException;

import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;

import java.io.IOException;
import java.io.StringWriter;
import java.text.ParseException;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * A simple {@link Document} implementation created from a {@link JSONObject}.
 */
public class JsonDocument implements Document {
    private static final Logger LOG = Logger.getLogger(JsonDocument.class.getName());

    private final JSONObject jsonObject;
    private final String objectId;
    private final Map<String, List<Value>> properties;
    private static TraversalContext traversalContext;

    public static void setTraversalContext(TraversalContext traversalContext) {
        JsonDocument.traversalContext = traversalContext;
    }

    /**
     * Constructor used by {@link DBHandle} when deserializing a
     * {@code DocumentHandle} from the recovery file.
     */
    public JsonDocument(JSONObject jsonObject) {
        this(buildJsonProperties(jsonObject), jsonObject);
    }

    /**
     * Constructor used by the {@link DocumentBuilder} for creating a
     * {@link JsonDocument} object used by {@link RepositoryHandler}
     * for building a collection over JsonDocument.
     */
    public JsonDocument(Map<String, List<Value>> properties, JSONObject jsonObject) {
        this.properties = properties;
        this.jsonObject = jsonObject;
        objectId = getSingleValueString(SpiConstants.PROPNAME_DOCID);
        if (Strings.isNullOrEmpty(objectId)) {
            throw new IllegalArgumentException("Unable to parse for docID from the properties:" + properties);
        }
    }

    public String getDocumentId() {
        return objectId;
    }

    public String toJson() {
        // JSON does not support custom serialization, so we have to find
        // the InputStreamFactory for the content and serialize it
        // ourselves. This could be cleaner if we supported toString on the
        // InputStreamFactory implementations, but that would mean less
        // control over when the LOB was materialized in memory.
        try {
            StringWriter buffer = new StringWriter();
            JSONWriter writer = new JSONWriter(buffer);
            writer.object();
            for (String name : JSONObject.getNames(jsonObject)) {
                writer.key(name).value(toJson(name, jsonObject.get(name)));
            }
            writer.endObject();
            return buffer.toString();
        } catch (IOException e) {
            throw new SnapshotRepositoryRuntimeException("Error serializing document " + objectId, e);
        } catch (JSONException e) {
            throw new SnapshotRepositoryRuntimeException("Error serializing document " + objectId, e);
        }
    }

    /**
     * Serializes the given value. This always returns the value itself,
     * unless it is an {@code InputStreamFactory} for the google:content
     * property, in which case we Base64 encode it.
     */
    private Object toJson(String name, Object input) throws IOException {
        if (name.equals(SpiConstants.PROPNAME_CONTENT)) {
            if (input instanceof String) {
                return input;
            } else if (input instanceof InputStreamFactory) {
                return InputStreamFactories.toBase64String((InputStreamFactory) input);
            } else {
                LOG.warning("Unexpected content object class: " + input.getClass().getName());
                return input;
            }
        } else {
            return input;
        }
    }

    /**
     * A class level method for extracting attributes from JSONObject object and
     * creating a {@code Map<String,List<Value>>} used by the superclass({@link
     * SimpleDocument}) constructor and hence creating a JsonDocument Object.
     */
    private static Map<String, List<Value>> buildJsonProperties(JSONObject jo) {
        ImmutableMap.Builder<String, List<Value>> mapBuilder = ImmutableMap.builder();
        @SuppressWarnings("unchecked")
        Iterator<String> jsonKeys = jo.keys();
        while (jsonKeys.hasNext()) {
            String key = jsonKeys.next();
            if (key.equals(SpiConstants.PROPNAME_DOCID)) {
                extractDocid(jo, mapBuilder);
            } else if (key.equals(SpiConstants.PROPNAME_CONTENT)) {
                extractContent(jo, mapBuilder);
            } else if (key.equals(SpiConstants.PROPNAME_LASTMODIFIED)) {
                extractLastModified(jo, mapBuilder);
            } else {
                extractAttribute(jo, mapBuilder, key);
            }
        }
        return mapBuilder.build();
    }

    /**
     * Copies a string-valued attribute from a JSONObject to a map of SPI
     * Value objects.
     */
    private static void extractAttribute(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder,
            String key) {
        try {
            if (!jo.isNull(key)) {
                mapBuilder.put(key, ImmutableList.of(Value.getStringValue(jo.getString(key))));
            }
        } catch (JSONException e) {
            LOG.log(Level.WARNING, "Exception thrown while extracting key: " + key, e);
        }
    }

    /**
     * Copies a required docid attribute from a JSONObject to a map of
     * SPI Value objects.
     */
    private static void extractDocid(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder) {
        String docid;
        try {
            docid = jo.getString(SpiConstants.PROPNAME_DOCID);
            Preconditions.checkState(!Strings.isNullOrEmpty(docid));
        } catch (Exception e) {
            throw new IllegalArgumentException("Internal consistency error: missing docid", e);
        }
        mapBuilder.put(SpiConstants.PROPNAME_DOCID, ImmutableList.of(Value.getStringValue(docid)));
    }

    /**
     * Extracts Base64-encoded google:content values and puts the
     * decoded value into an InputStreamFactory that minimizes memory
     * usage. If the google:content value is not Base64-encoded, it is
     * converted to bytes using UTF-8. The value in the JSONObject is
     * also replaced by the new value to save memory.
     */
    private static void extractContent(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder) {
        try {
            String content = jo.getString(SpiConstants.PROPNAME_CONTENT);
            List<Value> values;
            if (Strings.isNullOrEmpty(content)) {
                values = null;
            } else {
                InputStreamFactory factory = InputStreamFactories.fromBase64String(content);
                jo.put(SpiConstants.PROPNAME_CONTENT, factory);
                values = ImmutableList.of(Value.getBinaryValue(factory));
            }
            mapBuilder.put(SpiConstants.PROPNAME_CONTENT, values);
        } catch (JSONException e) {
            LOG.log(Level.WARNING, "Exception thrown while extracting content.", e);
        }
    }

    /**
     * Copies the last modified date attribute from a JSONObject to a
     * map of SPI Value objects.
     */
    private static void extractLastModified(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder) {
        try {
            String lastModified = jo.getString(SpiConstants.PROPNAME_LASTMODIFIED);

            try {
                if (!Strings.isNullOrEmpty(lastModified)) {
                    Calendar cal = Value.iso8601ToCalendar(lastModified);
                    mapBuilder.put(SpiConstants.PROPNAME_LASTMODIFIED, ImmutableList.of(Value.getDateValue(cal)));
                }
            } catch (ParseException e) {
                LOG.log(Level.WARNING, "Exception thrown while handling date: " + lastModified, e);
            }
        } catch (JSONException e) {
            LOG.log(Level.WARNING, "Exception thrown while extracting last modifed date.", e);
        }
    }

    @Override
    public Set<String> getPropertyNames() {
        return properties.keySet();
    }

    @Override
    public Property findProperty(String name) throws RepositoryException {
        List<Value> property = properties.get(name);
        if (name.equals(SpiConstants.PROPNAME_CONTENT) && filterMimeType()) {
            property = null;
        }
        return (property == null) ? null : new SimpleProperty(property);
    }

    /**
     * Filter the Document or just its Content based upon its MIME type.
     *
     * @return true if content should be skipped based upon its MIME type,
     *         false otherwise.
     * @throws SkippedDocumentException if this document is to be ignored
     *         based upon its MIME type.
     */
    private boolean filterMimeType() throws RepositoryException {
        String mimeType = getSingleValueString(SpiConstants.PROPNAME_MIMETYPE);
        if (mimeType != null && traversalContext != null) {
            int mimeTypeSupportLevel = traversalContext.mimeTypeSupportLevel(mimeType);
            if (mimeTypeSupportLevel == 0) {
                LOG.warning("Skipping the contents with docId: " + objectId + " as content MIME type " + mimeType
                        + " is not supported.");
                return true;
            } else if (mimeTypeSupportLevel < 0) {
                String msg = new StringBuilder("Skipping the document with docId: ").append(objectId)
                        .append(" as the MIME type ").append(mimeType)
                        .append(" is in the 'ignored' MIME types list.").toString();
                LOG.warning(msg);
                throw new SkippedDocumentException(msg);
            }
        }
        return false;
    }

    /** Returns the first value of the named property as a String. */
    private String getSingleValueString(String name) {
        List<Value> values = properties.get(name);
        if (values != null) {
            Value value = values.iterator().next();
            if (value != null) {
                return value.toString();
            }
        }
        return null;
    }
}