Java tutorial
// Copyright 2011 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.enterprise.connector.db.diffing; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.enterprise.connector.db.InputStreamFactories; import com.google.enterprise.connector.spi.Document; import com.google.enterprise.connector.spi.Property; import com.google.enterprise.connector.spi.RepositoryException; import com.google.enterprise.connector.spi.SimpleProperty; import com.google.enterprise.connector.spi.SkippedDocumentException; import com.google.enterprise.connector.spi.SpiConstants; import com.google.enterprise.connector.spi.TraversalContext; import com.google.enterprise.connector.spi.Value; import com.google.enterprise.connector.util.InputStreamFactory; import com.google.enterprise.connector.util.diffing.SnapshotRepositoryRuntimeException; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONWriter; import java.io.IOException; import java.io.StringWriter; import java.text.ParseException; import java.util.Calendar; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; /** * A simple {@link Document} implementation created from a {@link JSONObject}. */ public class JsonDocument implements Document { private static final Logger LOG = Logger.getLogger(JsonDocument.class.getName()); private final JSONObject jsonObject; private final String objectId; private final Map<String, List<Value>> properties; private static TraversalContext traversalContext; public static void setTraversalContext(TraversalContext traversalContext) { JsonDocument.traversalContext = traversalContext; } /** * Constructor used by {@link DBHandle} when deserializing a * {@code DocumentHandle} from the recovery file. */ public JsonDocument(JSONObject jsonObject) { this(buildJsonProperties(jsonObject), jsonObject); } /** * Constructor used by the {@link DocumentBuilder} for creating a * {@link JsonDocument} object used by {@link RepositoryHandler} * for building a collection over JsonDocument. */ public JsonDocument(Map<String, List<Value>> properties, JSONObject jsonObject) { this.properties = properties; this.jsonObject = jsonObject; objectId = getSingleValueString(SpiConstants.PROPNAME_DOCID); if (Strings.isNullOrEmpty(objectId)) { throw new IllegalArgumentException("Unable to parse for docID from the properties:" + properties); } } public String getDocumentId() { return objectId; } public String toJson() { // JSON does not support custom serialization, so we have to find // the InputStreamFactory for the content and serialize it // ourselves. This could be cleaner if we supported toString on the // InputStreamFactory implementations, but that would mean less // control over when the LOB was materialized in memory. try { StringWriter buffer = new StringWriter(); JSONWriter writer = new JSONWriter(buffer); writer.object(); for (String name : JSONObject.getNames(jsonObject)) { writer.key(name).value(toJson(name, jsonObject.get(name))); } writer.endObject(); return buffer.toString(); } catch (IOException e) { throw new SnapshotRepositoryRuntimeException("Error serializing document " + objectId, e); } catch (JSONException e) { throw new SnapshotRepositoryRuntimeException("Error serializing document " + objectId, e); } } /** * Serializes the given value. This always returns the value itself, * unless it is an {@code InputStreamFactory} for the google:content * property, in which case we Base64 encode it. */ private Object toJson(String name, Object input) throws IOException { if (name.equals(SpiConstants.PROPNAME_CONTENT)) { if (input instanceof String) { return input; } else if (input instanceof InputStreamFactory) { return InputStreamFactories.toBase64String((InputStreamFactory) input); } else { LOG.warning("Unexpected content object class: " + input.getClass().getName()); return input; } } else { return input; } } /** * A class level method for extracting attributes from JSONObject object and * creating a {@code Map<String,List<Value>>} used by the superclass({@link * SimpleDocument}) constructor and hence creating a JsonDocument Object. */ private static Map<String, List<Value>> buildJsonProperties(JSONObject jo) { ImmutableMap.Builder<String, List<Value>> mapBuilder = ImmutableMap.builder(); @SuppressWarnings("unchecked") Iterator<String> jsonKeys = jo.keys(); while (jsonKeys.hasNext()) { String key = jsonKeys.next(); if (key.equals(SpiConstants.PROPNAME_DOCID)) { extractDocid(jo, mapBuilder); } else if (key.equals(SpiConstants.PROPNAME_CONTENT)) { extractContent(jo, mapBuilder); } else if (key.equals(SpiConstants.PROPNAME_LASTMODIFIED)) { extractLastModified(jo, mapBuilder); } else { extractAttribute(jo, mapBuilder, key); } } return mapBuilder.build(); } /** * Copies a string-valued attribute from a JSONObject to a map of SPI * Value objects. */ private static void extractAttribute(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder, String key) { try { if (!jo.isNull(key)) { mapBuilder.put(key, ImmutableList.of(Value.getStringValue(jo.getString(key)))); } } catch (JSONException e) { LOG.log(Level.WARNING, "Exception thrown while extracting key: " + key, e); } } /** * Copies a required docid attribute from a JSONObject to a map of * SPI Value objects. */ private static void extractDocid(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder) { String docid; try { docid = jo.getString(SpiConstants.PROPNAME_DOCID); Preconditions.checkState(!Strings.isNullOrEmpty(docid)); } catch (Exception e) { throw new IllegalArgumentException("Internal consistency error: missing docid", e); } mapBuilder.put(SpiConstants.PROPNAME_DOCID, ImmutableList.of(Value.getStringValue(docid))); } /** * Extracts Base64-encoded google:content values and puts the * decoded value into an InputStreamFactory that minimizes memory * usage. If the google:content value is not Base64-encoded, it is * converted to bytes using UTF-8. The value in the JSONObject is * also replaced by the new value to save memory. */ private static void extractContent(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder) { try { String content = jo.getString(SpiConstants.PROPNAME_CONTENT); List<Value> values; if (Strings.isNullOrEmpty(content)) { values = null; } else { InputStreamFactory factory = InputStreamFactories.fromBase64String(content); jo.put(SpiConstants.PROPNAME_CONTENT, factory); values = ImmutableList.of(Value.getBinaryValue(factory)); } mapBuilder.put(SpiConstants.PROPNAME_CONTENT, values); } catch (JSONException e) { LOG.log(Level.WARNING, "Exception thrown while extracting content.", e); } } /** * Copies the last modified date attribute from a JSONObject to a * map of SPI Value objects. */ private static void extractLastModified(JSONObject jo, ImmutableMap.Builder<String, List<Value>> mapBuilder) { try { String lastModified = jo.getString(SpiConstants.PROPNAME_LASTMODIFIED); try { if (!Strings.isNullOrEmpty(lastModified)) { Calendar cal = Value.iso8601ToCalendar(lastModified); mapBuilder.put(SpiConstants.PROPNAME_LASTMODIFIED, ImmutableList.of(Value.getDateValue(cal))); } } catch (ParseException e) { LOG.log(Level.WARNING, "Exception thrown while handling date: " + lastModified, e); } } catch (JSONException e) { LOG.log(Level.WARNING, "Exception thrown while extracting last modifed date.", e); } } @Override public Set<String> getPropertyNames() { return properties.keySet(); } @Override public Property findProperty(String name) throws RepositoryException { List<Value> property = properties.get(name); if (name.equals(SpiConstants.PROPNAME_CONTENT) && filterMimeType()) { property = null; } return (property == null) ? null : new SimpleProperty(property); } /** * Filter the Document or just its Content based upon its MIME type. * * @return true if content should be skipped based upon its MIME type, * false otherwise. * @throws SkippedDocumentException if this document is to be ignored * based upon its MIME type. */ private boolean filterMimeType() throws RepositoryException { String mimeType = getSingleValueString(SpiConstants.PROPNAME_MIMETYPE); if (mimeType != null && traversalContext != null) { int mimeTypeSupportLevel = traversalContext.mimeTypeSupportLevel(mimeType); if (mimeTypeSupportLevel == 0) { LOG.warning("Skipping the contents with docId: " + objectId + " as content MIME type " + mimeType + " is not supported."); return true; } else if (mimeTypeSupportLevel < 0) { String msg = new StringBuilder("Skipping the document with docId: ").append(objectId) .append(" as the MIME type ").append(mimeType) .append(" is in the 'ignored' MIME types list.").toString(); LOG.warning(msg); throw new SkippedDocumentException(msg); } } return false; } /** Returns the first value of the named property as a String. */ private String getSingleValueString(String name) { List<Value> values = properties.get(name); if (values != null) { Value value = values.iterator().next(); if (value != null) { return value.toString(); } } return null; } }