org.carrot2.core.ProcessingResultTest.java Source code

Java tutorial

Introduction

Here is the source code for org.carrot2.core.ProcessingResultTest.java

Source

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2015, Dawid Weiss, Stanisaw Osiski.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import static org.carrot2.core.test.assertions.Carrot2CoreAssertions.*;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.output.NullOutputStream;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.util.CloseableUtils;
import org.carrot2.util.CollectionUtils;
import org.carrot2.util.tests.CarrotTestCase;
import org.fest.assertions.Assertions;
import org.junit.Test;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
 * Test cases for {@link ProcessingResult}.
 */
public class ProcessingResultTest extends CarrotTestCase {
    @Test
    public void testSerializationDeserializationAll() throws Exception {
        checkSerializationDeserialization(true, true, true);
    }

    @Test
    public void testSerializationDeserializationDocumentsOnly() throws Exception {
        checkSerializationDeserialization(true, false, false);
    }

    @Test
    public void testSerializationDeserializationClustersOnly() throws Exception {
        checkSerializationDeserialization(false, true, false);
    }

    @Test
    public void testSerializationDeserializationAttributesOnly() throws Exception {
        checkSerializationDeserialization(false, false, true);
    }

    @Test
    public void testDocumentDeserializationFromLegacyXml() throws Exception {
        final String query = "apple computer";
        final String title = "Apple Computer, Inc.";
        final String snippet = "Macintosh hardware, software, and Internet tools.";
        final String url = "http:// www.apple.com/";

        StringBuilder xml = new StringBuilder();
        xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        xml.append("<searchresult>\n");
        xml.append("<query>" + query + "</query>\n");
        xml.append("<document id=\"0\">");
        xml.append("<title>" + title + "</title>\n");
        xml.append("<snippet>" + snippet + "</snippet>\n");
        xml.append("<url>" + url + "</url>\n");
        xml.append("</document>\n");
        xml.append("</searchresult>\n");

        final ProcessingResult deserialized = ProcessingResult
                .deserialize(new ByteArrayInputStream(xml.toString().getBytes("UTF-8")));

        assertNotNull(deserialized);
        assertNotNull(deserialized.getAttributes());

        Document deserializedDocument = CollectionUtils.getFirst(deserialized.getDocuments());
        assertEquals(title, deserializedDocument.getField(Document.TITLE));
        assertEquals(snippet, deserializedDocument.getField(Document.SUMMARY));
        assertEquals(url, deserializedDocument.getField(Document.CONTENT_URL));
        assertEquals(null, deserializedDocument.getField(Document.LANGUAGE));
        Assertions.assertThat(deserialized.getAttributes().get(AttributeNames.QUERY)).isEqualTo(query);
    }

    @Test
    public void deserializeStringDocumentIds() throws Exception {
        final String title = "Apple Computer, Inc.";
        final String id = "cafe00f0";

        StringBuilder xml = new StringBuilder();
        xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        xml.append("<searchresult>\n");
        xml.append("<document id=\"" + id + "\">");
        xml.append("<title>" + title + "</title>\n");
        xml.append("</document>\n");
        xml.append("</searchresult>\n");

        final ProcessingResult deserialized = ProcessingResult
                .deserialize(new ByteArrayInputStream(xml.toString().getBytes("UTF-8")));

        assertNotNull(deserialized);
        assertNotNull(deserialized.getAttributes());

        Document deserializedDocument = CollectionUtils.getFirst(deserialized.getDocuments());
        assertEquals(title, deserializedDocument.getField(Document.TITLE));
        assertEquals(id, deserializedDocument.getStringId());
    }

    @Test
    public void testDocumentDeserializationLanguageByIsoCode() throws Exception {
        final LanguageCode language = LanguageCode.POLISH;
        assertThat(ProcessingResult.deserialize(documentXml(language.getIsoCode())).getDocuments().get(0)
                .getLanguage()).isEqualTo(language);

    }

    @Test
    public void testDocumentDeserializationLanguageByEnumCode() throws Exception {
        final LanguageCode language = LanguageCode.POLISH;
        assertThat(ProcessingResult.deserialize(documentXml(language.name())).getDocuments().get(0).getLanguage())
                .isEqualTo(language);

    }

    private InputStream documentXml(String language) throws Exception {
        final StringBuilder xml = new StringBuilder();
        xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        xml.append("<searchresult>\n");
        xml.append("<document id=\"0\" language=\"" + language + "\">");
        xml.append("</document>\n");
        xml.append("</searchresult>\n");
        return new ByteArrayInputStream(xml.toString().getBytes("UTF8"));
    }

    @Test
    public void testClusterDeserializationFromLegacyXml() throws Exception {
        final String query = "apple computer";

        final String title = "Apple Computer, Inc.";
        final String snippet = "Macintosh hardware, software, and Internet tools.";
        final String url = "http:// www.apple.com/";

        final int documentCount = 3;

        StringBuilder xml = new StringBuilder();

        xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        xml.append("<searchresult>");
        xml.append("<query>" + query + "</query>\n");
        for (int i = 0; i < documentCount; i++) {
            xml.append("<document id=\"" + i + "\">");
            xml.append("<title>" + title + i + "</title>\n");
            xml.append("<snippet>" + snippet + i + "</snippet>\n");
            xml.append("<url>" + url + i + "</url>\n");
            xml.append("</document>\n");
        }
        xml.append("<group score=\"1.0\">");
        xml.append("<title>");
        xml.append("<phrase>Data Mining Techniques</phrase>");
        xml.append("<phrase>Lectures</phrase>");
        xml.append("</title>");
        xml.append("<group>");
        xml.append("<title>");
        xml.append("<phrase>Research</phrase>");
        xml.append("</title>");
        xml.append("<document refid=\"0\"/>");
        xml.append("<document refid=\"1\"/>");
        xml.append("</group>");
        xml.append("</group>");
        xml.append("<group score=\"0.55\">");
        xml.append("<title>");
        xml.append("<phrase>Software</phrase>");
        xml.append("</title>");
        xml.append("<document refid=\"1\"/>");
        xml.append("<document refid=\"2\"/>");
        xml.append("</group>");
        xml.append("</searchresult>\n");

        final ProcessingResult deserialized = ProcessingResult
                .deserialize(new ByteArrayInputStream(xml.toString().getBytes("UTF-8")));

        assertNotNull(deserialized);
        assertNotNull(deserialized.getAttributes());

        // Check documents
        assertThatDocuments(deserialized.getDocuments()).hasSize(documentCount);
        int index = 0;
        final List<Document> documents = deserialized.getDocuments();
        for (Document document : documents) {
            assertEquals(title + index, document.getField(Document.TITLE));
            assertEquals(snippet + index, document.getField(Document.SUMMARY));
            assertEquals(url + index, document.getField(Document.CONTENT_URL));
            index++;
        }

        // Check clusters
        final List<Cluster> clusters = deserialized.getClusters();

        final Cluster clusterA = new Cluster();
        clusterA.addPhrases("Data Mining Techniques", "Lectures");
        clusterA.setAttribute(Cluster.SCORE, 1.0);

        final Cluster clusterAA = new Cluster();
        clusterAA.addPhrases("Research");
        clusterAA.addDocuments(documents.get(0), documents.get(1));
        clusterA.addSubclusters(clusterAA);

        final Cluster clusterB = new Cluster();
        clusterB.addPhrases("Software");
        clusterB.setAttribute(Cluster.SCORE, 0.55);
        clusterB.addDocuments(documents.get(1), documents.get(2));

        assertThatClusters(clusters).isEquivalentTo(Lists.newArrayList(clusterA, clusterB));
        Assertions.assertThat(deserialized.getAttributes().get(AttributeNames.QUERY)).isEqualTo(query);
    }

    @Test
    public void testJsonSerializationAll() throws IOException {
        final ProcessingResult result = prepareProcessingResult();
        final JsonNode root = getJsonRootNode(result, null, true, true, true);

        checkJsonQuery(root);
        checkJsonClusters(result, root);
        checkJsonDocuments(result, root);
        Assertions.assertThat(root.get("results")).isNotNull();
    }

    @Test
    public void testJsonSerializationWithCallback() throws IOException {
        final String callback = "callback";
        final ProcessingResult result = prepareProcessingResult();

        final StringWriter json = new StringWriter();
        result.serializeJson(json, callback, true, true);
        final String jsonString = json.toString();

        Assertions.assertThat(jsonString).startsWith(callback + "(").endsWith(");");

        final String data = jsonString.substring(callback.length() + 1, jsonString.length() - 2);
        final JsonNode root = getJsonRootNode(data);

        checkJsonQuery(root);
        checkJsonClusters(result, root);
        checkJsonDocuments(result, root);
        Assertions.assertThat(root.get("results")).isNotNull();
    }

    @Test
    public void testJsonSerializationDocumentsOnly() throws IOException {
        final ProcessingResult result = prepareProcessingResult();
        final JsonNode root = getJsonRootNode(result, null, true, false, false);

        checkJsonQuery(root);
        checkJsonDocuments(result, root);
        Assertions.assertThat(root.get("clusters")).isNull();
        Assertions.assertThat(root.get("results")).isNull();
    }

    @Test
    public void testJsonSerializationClustersOnly() throws IOException {
        final ProcessingResult result = prepareProcessingResult();
        final JsonNode root = getJsonRootNode(result, null, false, true, false);

        checkJsonQuery(root);
        checkJsonClusters(result, root);
        Assertions.assertThat(root.get("documents")).isNull();
        Assertions.assertThat(root.get("results")).isNull();
    }

    @Test
    public void testJsonSerializationAttributesOnly() throws IOException {
        final ProcessingResult result = prepareProcessingResult();
        final JsonNode root = getJsonRootNode(result, null, false, false, true);

        checkJsonQuery(root);
        Assertions.assertThat(root.get("documents")).isNull();
        Assertions.assertThat(root.get("clusters")).isNull();
        Assertions.assertThat(root.get("results")).isNotNull();
    }

    @Test
    public void testNoFalseJunkGroupAttribute() throws Exception {
        Cluster a, b, c;
        final HashMap<String, Object> attrs = Maps.newHashMap();
        attrs.put(AttributeNames.CLUSTERS,
                Arrays.asList(a = new Cluster("a"), b = new Cluster("b"), c = new Cluster("c")));

        b.setOtherTopics(false);
        c.setOtherTopics(true);

        ProcessingResult pr = new ProcessingResult(attrs);
        pr = ProcessingResult.deserialize(pr.serialize());

        assertEquals("a", (a = pr.getClusters().get(0)).getLabel());
        assertEquals("b", (b = pr.getClusters().get(1)).getLabel());
        assertEquals("c", (c = pr.getClusters().get(2)).getLabel());
        assertThat(a.getAttribute(Cluster.OTHER_TOPICS)).isNull();
        assertThat(b.getAttribute(Cluster.OTHER_TOPICS)).isNull();
        assertThat(c.getAttribute(Cluster.OTHER_TOPICS)).isEqualTo(Boolean.TRUE);
    }

    private void checkJsonQuery(final JsonNode root) {
        Assertions.assertThat(root.get("query").textValue()).isEqualTo("query");
    }

    private void checkJsonClusters(final ProcessingResult result, final JsonNode root) {
        final JsonNode clusters = root.get("clusters");
        Assertions.assertThat(clusters).isNotNull();
        final ArrayList<JsonNode> clusterNodes = Lists.newArrayList(clusters.elements());
        Assertions.assertThat(clusterNodes).hasSize(result.getClusters().size());
    }

    private void checkJsonDocuments(final ProcessingResult result, final JsonNode root) {
        final JsonNode documents = root.get("documents");
        Assertions.assertThat(documents).isNotNull();
        final ArrayList<JsonNode> documentNodes = Lists.newArrayList(documents.elements());
        Assertions.assertThat(documentNodes).hasSize(result.getDocuments().size());
    }

    private JsonNode getJsonRootNode(final ProcessingResult result, String callback, boolean saveDocuments,
            boolean saveClusters, boolean saveAttributes) throws IOException, JsonParseException {
        return getJsonRootNode(getJsonString(result, callback, saveDocuments, saveClusters, saveAttributes));
    }

    private String getJsonString(final ProcessingResult result, String callback, boolean saveDocuments,
            boolean saveClusters, boolean saveAttributes) throws IOException {
        final StringWriter json = new StringWriter() {
            @Override
            public void close() throws IOException {
                throw new IOException("Should not be calling close.");
            }
        };
        result.serializeJson(json, callback, false, saveDocuments, saveClusters, saveAttributes);
        return json.toString();
    }

    private JsonNode getJsonRootNode(final String jsonString) throws IOException, JsonParseException {
        final JsonParser jsonParser = new JsonFactory().createJsonParser(new StringReader(jsonString));
        final ObjectMapper mapper = new ObjectMapper();
        final JsonNode root = mapper.readTree(jsonParser);
        return root;
    }

    private void checkSerializationDeserialization(boolean documentsDeserialized, boolean clustersDeserialized,
            boolean attributesDeserialized) throws Exception {
        final ProcessingResult sourceProcessingResult = prepareProcessingResult();

        final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        sourceProcessingResult.serialize(new NullOutputStream());
        sourceProcessingResult.serialize(outputStream, documentsDeserialized, clustersDeserialized,
                attributesDeserialized);
        CloseableUtils.close(outputStream);

        final ProcessingResult deserialized = ProcessingResult
                .deserialize(new ByteArrayInputStream(outputStream.toByteArray()));

        assertNotNull(deserialized);
        assertNotNull(deserialized.getAttributes());

        if (documentsDeserialized) {
            assertThatDocuments(deserialized.getDocuments()).isEquivalentTo(sourceProcessingResult.getDocuments());
        } else {
            Assertions.assertThat(deserialized.getDocuments()).isNull();
        }

        if (clustersDeserialized) {
            assertThatClusters(deserialized.getClusters()).isEquivalentTo(sourceProcessingResult.getClusters(),
                    documentsDeserialized);
        } else {
            Assertions.assertThat(deserialized.getClusters()).isNull();
        }

        if (attributesDeserialized) {
            Assertions.assertThat(deserialized.getAttribute(AttributeNames.RESULTS))
                    .isEqualTo(sourceProcessingResult.getAttribute(AttributeNames.RESULTS));
        } else {
            Assertions.assertThat(deserialized.getAttribute(AttributeNames.RESULTS)).isNull();
        }

        Assertions.assertThat(deserialized.getAttributes().get(AttributeNames.QUERY))
                .isEqualTo(sourceProcessingResult.getAttributes().get(AttributeNames.QUERY));
    }

    private ProcessingResult prepareProcessingResult() {
        final List<Document> documents = Lists.newArrayList(
                new Document("Test title 1", "Test snippet 1", "http://test1.com"),
                new Document("Test title 2", "Test snippet 2", "http://test2.com/test"),
                new Document("Test title 3", "Test snippet 3. Some more words and <b>html</b>", "http://test2.com"),
                new Document("Other", "Other", "Other"));
        final Map<String, Object> attributes = Maps.newHashMap();
        attributes.put(AttributeNames.DOCUMENTS, documents);

        final Document document = documents.get(0);
        document.setSources(Lists.newArrayList("s1", "s2"));
        document.setField("testString", "test");
        document.setField("testInteger", 10);
        document.setField("testDouble", 10.3);
        document.setField("testBoolean", true);
        document.setLanguage(LanguageCode.POLISH);
        document.setSources(Lists.newArrayList("s1", "s2"));
        Document.assignDocumentIds(documents);

        final Cluster clusterA = new Cluster();
        clusterA.addPhrases("Label 1", "Label 2");
        clusterA.setAttribute(Cluster.SCORE, 1.0);
        clusterA.setAttribute("testString", "test");
        clusterA.setAttribute("testInteger", 10);
        clusterA.setAttribute("testDouble", 10.3);
        clusterA.setAttribute("testBoolean", true);

        final Cluster clusterAA = new Cluster();
        clusterAA.addPhrases("Label 3 za gl ja");
        clusterAA.addDocuments(documents.get(0), documents.get(1));
        clusterA.addSubclusters(clusterAA);

        final Cluster clusterB = new Cluster();
        clusterB.addPhrases("Label 4");
        clusterB.setAttribute(Cluster.SCORE, 0.55);
        clusterB.addDocuments(documents.get(1), documents.get(2));

        final Cluster clusterO = new Cluster();
        clusterO.setOtherTopics(true);
        clusterO.addPhrases(Cluster.OTHER_TOPICS_LABEL);
        clusterO.addDocuments(documents.get(3));

        final List<Cluster> clusters = Lists.newArrayList(clusterA, clusterB, clusterO);
        attributes.put(AttributeNames.CLUSTERS, clusters);

        attributes.put(AttributeNames.QUERY, "query");

        attributes.put(AttributeNames.RESULTS, 120);

        return new ProcessingResult(attributes);
    }
}