examples.ClassPropertyUsageAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for examples.ClassPropertyUsageAnalyzer.java

Source

package examples;

/*
 * #%L
 * Wikidata Toolkit Examples
 * %%
 * Copyright (C) 2014 Wikidata Toolkit Developers
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter;
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory;
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocumentProcessor;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Reference;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.TermedDocument;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import org.wikidata.wdtk.datamodel.interfaces.ValueSnak;

/**
 * This advanced example analyses the use of properties and classes in a dump
 * file, and stores the results in two CSV files. These files can be used with
 * the Miga data viewer to create the <a
 * href="http://tools.wmflabs.org/wikidata-exports/miga/#">Wikidata Class and
 * Properties browser</a>. You can view the settings for configuring Miga in the
 * <a href="http://tools.wmflabs.org/wikidata-exports/miga/apps/classes/">Miga
 * directory for this app</a>.
 * <p>
 * However, you can also view the files in any other tool that processes CSV.
 * The only peculiarity is that some fields in CSV contain lists of items as
 * values, with items separated by "@". This is not supported by most
 * applications since it does not fit into the tabular data model of CSV.
 * <p>
 * The code is somewhat complex and not always clean. It should be considered as
 * an advanced example, not as a first introduction.
 *
 * @author Markus Kroetzsch
 *
 */
public class ClassPropertyUsageAnalyzer implements EntityDocumentProcessor {

    DataObjectFactory factory = new DataObjectFactoryImpl();
    DatamodelConverter converter = new DatamodelConverter(factory);

    /**
     * Set of top-level classes (without a superclass) that should be considered
     * during processing.
     * <p>
     * We use this list since our one-pass processing may fail to collect labels
     * for some classes, if they are used as classes only after they occur in
     * the dump. This can only occur for top-level classes (since a
     * "subclass of" statement would already be a use as a class). This list
     * tries to make sure that some more labels are collected for known
     * top-level classes. It is not a problem if some of these classes are not
     * really "top level" in the current dump.
     */
    private static final HashSet<String> TOP_LEVEL_CLASSES = new HashSet<>();
    static {
        TOP_LEVEL_CLASSES.add("Q35120"); // Entity
        TOP_LEVEL_CLASSES.add("Q14897293"); // Fictional entity
        TOP_LEVEL_CLASSES.add("Q726"); // horse
        TOP_LEVEL_CLASSES.add("Q12567"); // Vikings
        TOP_LEVEL_CLASSES.add("Q32099");
        TOP_LEVEL_CLASSES.add("Q47883");
        TOP_LEVEL_CLASSES.add("Q188913");
        TOP_LEVEL_CLASSES.add("Q236209");
        TOP_LEVEL_CLASSES.add("Q459297");
        TOP_LEVEL_CLASSES.add("Q786014");
        TOP_LEVEL_CLASSES.add("Q861951");
        TOP_LEVEL_CLASSES.add("Q7045");
        TOP_LEVEL_CLASSES.add("Q31579");
        TOP_LEVEL_CLASSES.add("Q35054");
        TOP_LEVEL_CLASSES.add("Q39825");
        TOP_LEVEL_CLASSES.add("Q81513");
        TOP_LEVEL_CLASSES.add("Q102496");
        TOP_LEVEL_CLASSES.add("Q159661");
        TOP_LEVEL_CLASSES.add("Q1130491");
        TOP_LEVEL_CLASSES.add("Q2022036");
        TOP_LEVEL_CLASSES.add("Q2198291");
        TOP_LEVEL_CLASSES.add("Q3034652");
        TOP_LEVEL_CLASSES.add("Q3505845");
    }

    /**
     * Class to record the use of some class item or property.
     *
     * @author Markus Kroetzsch
     *
     */
    private abstract class UsageRecord {
        /**
         * Number of items using this entity. For properties, this is the number
         * of items with such a property. For class items, this is the number of
         * instances of this class.
         */
        public int itemCount = 0;
        /**
         * Map that records how many times certain properties are used on items
         * that use this entity (where "use" has the meaning explained for
         * {@link UsageRecord#itemCount}).
         */
        public HashMap<PropertyIdValue, Integer> propertyCoCounts = new HashMap<PropertyIdValue, Integer>();
    }

    /**
     * Class to record the usage of a property in the data.
     *
     * @author Markus Kroetzsch
     *
     */
    private class PropertyRecord extends UsageRecord {
        /**
         * Number of statements with this property.
         */
        public int statementCount = 0;
        /**
         * Number of qualified statements that use this property.
         */
        public int statementWithQualifierCount = 0;
        /**
         * Number of statement qualifiers that use this property.
         */
        public int qualifierCount = 0;
        /**
         * Number of uses of this property in references. Multiple uses in the
         * same references will be counted.
         */
        public int referenceCount = 0;
        /**
         * {@link PropertyDocument} for this property.
         */
        public PropertyDocument propertyDocument = null;
    }

    /**
     * Class to record the usage of a class item in the data.
     *
     * @author Markus Kroetzsch
     *
     */
    private class ClassRecord extends UsageRecord {
        /**
         * Number of subclasses of this class item.
         */
        public int subclassCount = 0;
        /**
         * {@link ItemDocument} of this class.
         */
        public ItemDocument itemDocument = null;
        /**
         * List of all super classes of this class.
         */
        public ArrayList<EntityIdValue> superClasses = new ArrayList<>();
    }

    /**
     * Comparator to order class items by their number of instances and direct
     * subclasses.
     *
     * @author Markus Kroetzsch
     *
     */
    private class ClassUsageRecordComparator
            implements Comparator<Entry<? extends EntityIdValue, ? extends ClassRecord>> {
        @Override
        public int compare(Entry<? extends EntityIdValue, ? extends ClassRecord> o1,
                Entry<? extends EntityIdValue, ? extends ClassRecord> o2) {
            return o2.getValue().subclassCount + o2.getValue().itemCount
                    - (o1.getValue().subclassCount + o1.getValue().itemCount);
        }
    }

    /**
     * Comparator to order class items by their number of instances and direct
     * subclasses.
     *
     * @author Markus Kroetzsch
     *
     */
    private class UsageRecordComparator
            implements Comparator<Entry<? extends EntityIdValue, ? extends PropertyRecord>> {
        @Override
        public int compare(Entry<? extends EntityIdValue, ? extends PropertyRecord> o1,
                Entry<? extends EntityIdValue, ? extends PropertyRecord> o2) {
            return (o2.getValue().itemCount + o2.getValue().qualifierCount + o2.getValue().referenceCount)
                    - (o1.getValue().itemCount + o1.getValue().qualifierCount + o1.getValue().referenceCount);
        }
    }

    /**
     * Total number of items processed.
     */
    long countItems = 0;
    /**
     * Total number of items that have some statement.
     */
    long countPropertyItems = 0;
    /**
     * Total number of properties processed.
     */
    long countProperties = 0;
    /**
     * Total number of items that are used as classes.
     */
    long countClasses = 0;

    /**
     * Collection of all property records.
     */
    final HashMap<PropertyIdValue, PropertyRecord> propertyRecords = new HashMap<PropertyIdValue, PropertyRecord>();
    /**
     * Collection of all item records of items used as classes.
     */
    final HashMap<EntityIdValue, ClassRecord> classRecords = new HashMap<>();

    /**
     * Map used during serialization to ensure that every label is used only
     * once. The Map assigns an item to each label. If another item wants to use
     * a label that is already assigned, it will use a label with an added Q-ID
     * for disambiguation.
     */
    final HashMap<String, EntityIdValue> labels = new HashMap<>();

    /**
     * Main method. Processes the whole dump using this processor. To change
     * which dump file to use and whether to run in offline mode, modify the
     * settings in {@link ExampleHelpers}.
     *
     * @param args
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
        ExampleHelpers.configureLogging();
        ClassPropertyUsageAnalyzer.printDocumentation();

        ClassPropertyUsageAnalyzer processor = new ClassPropertyUsageAnalyzer();
        ExampleHelpers.processEntitiesFromWikidataDump(processor);
        processor.writeFinalReports();
    }

    @Override
    public void processItemDocument(ItemDocument itemDocument) {
        this.countItems++;
        if (itemDocument.getStatementGroups().size() > 0) {
            this.countPropertyItems++;
        }

        ClassRecord classRecord = null;
        if (TOP_LEVEL_CLASSES.contains(itemDocument.getItemId().getId())
                || this.classRecords.containsKey(itemDocument.getItemId())) {
            classRecord = getClassRecord(itemDocument.getItemId());
        }

        for (StatementGroup sg : itemDocument.getStatementGroups()) {
            PropertyRecord propertyRecord = getPropertyRecord(sg.getProperty());
            propertyRecord.itemCount++;
            propertyRecord.statementCount = propertyRecord.statementCount + sg.getStatements().size();

            boolean isInstanceOf = "P31".equals(sg.getProperty().getId());
            boolean isSubclassOf = "P279".equals(sg.getProperty().getId());
            if (isSubclassOf && classRecord == null) {
                classRecord = getClassRecord(itemDocument.getItemId());
            }

            for (Statement s : sg.getStatements()) {
                // Count uses of properties in qualifiers
                for (SnakGroup q : s.getClaim().getQualifiers()) {
                    countPropertyQualifier(q.getProperty(), q.getSnaks().size());
                }
                // Count statements with qualifiers
                if (s.getClaim().getQualifiers().size() > 0) {
                    propertyRecord.statementWithQualifierCount++;
                }
                // Count uses of properties in references
                for (Reference r : s.getReferences()) {
                    for (SnakGroup snakGroup : r.getSnakGroups()) {
                        countPropertyReference(snakGroup.getProperty(), snakGroup.getSnaks().size());
                    }
                }

                // Process value of instance of/subclass of:
                if ((isInstanceOf || isSubclassOf) && s.getClaim().getMainSnak() instanceof ValueSnak) {
                    Value value = ((ValueSnak) s.getClaim().getMainSnak()).getValue();
                    if (value instanceof EntityIdValue) {
                        ClassRecord otherClassRecord = getClassRecord((EntityIdValue) value);
                        if (isInstanceOf) {
                            otherClassRecord.itemCount++;
                            countCooccurringProperties(itemDocument, otherClassRecord, null);
                        } else {
                            otherClassRecord.subclassCount++;
                            classRecord.superClasses.add((EntityIdValue) value);
                        }
                    }
                }
            }

            countCooccurringProperties(itemDocument, propertyRecord, sg.getProperty());
        }

        if (classRecord != null) {
            this.countClasses++;
            classRecord.itemDocument = converter.copy(itemDocument);
        }

        // print a report once in a while:
        if (this.countItems % 100000 == 0) {
            printReport();
        }
        // if (this.countItems % 100000 == 0) {
        // writePropertyData();
        // writeClassData();
        // }
    }

    @Override
    public void processPropertyDocument(PropertyDocument propertyDocument) {
        this.countProperties++;

        PropertyRecord propertyRecord = getPropertyRecord(propertyDocument.getPropertyId());
        propertyRecord.propertyDocument = propertyDocument;
    }

    /**
     * Creates the final file output of the analysis.
     */
    public void writeFinalReports() {
        writePropertyData();
        writeClassData();
    }

    /**
     * Print some basic documentation about this program.
     */
    public static void printDocumentation() {
        System.out.println("********************************************************************");
        System.out.println("*** Wikidata Toolkit: Class and Property Usage Analyzer");
        System.out.println("*** ");
        System.out.println("*** This program will download and process dumps from Wikidata.");
        System.out.println("*** It will create a CSV file with statistics about class and");
        System.out.println("*** property useage. These files can be used with the Miga data");
        System.out.println("*** viewer to create the browser seen at ");
        System.out.println("*** http://tools.wmflabs.org/wikidata-exports/miga/");
        System.out.println("********************************************************************");
    }

    /**
     * Returns record where statistics about a class should be stored.
     *
     * @param entityIdValue
     *            the class to initialize
     * @return the class record
     */
    private ClassRecord getClassRecord(EntityIdValue entityIdValue) {
        if (!this.classRecords.containsKey(entityIdValue)) {
            ClassRecord classRecord = new ClassRecord();
            this.classRecords.put(entityIdValue, classRecord);
            return classRecord;
        } else {
            return this.classRecords.get(entityIdValue);
        }
    }

    /**
     * Returns record where statistics about a property should be stored.
     *
     * @param property
     *            the property to initialize
     * @return the property record
     */
    private PropertyRecord getPropertyRecord(PropertyIdValue property) {
        if (!this.propertyRecords.containsKey(property)) {
            PropertyRecord propertyRecord = new PropertyRecord();
            this.propertyRecords.put(property, propertyRecord);
            return propertyRecord;
        } else {
            return this.propertyRecords.get(property);
        }
    }

    private void countCooccurringProperties(ItemDocument itemDocument, UsageRecord usageRecord,
            PropertyIdValue thisPropertyIdValue) {
        for (StatementGroup sg : itemDocument.getStatementGroups()) {
            if (!sg.getProperty().equals(thisPropertyIdValue)) {
                if (!usageRecord.propertyCoCounts.containsKey(sg.getProperty())) {
                    usageRecord.propertyCoCounts.put(sg.getProperty(), 1);
                } else {
                    usageRecord.propertyCoCounts.put(sg.getProperty(),
                            usageRecord.propertyCoCounts.get(sg.getProperty()) + 1);
                }
            }
        }
    }

    /**
     * Counts additional occurrences of a property as qualifier property of
     * statements.
     *
     * @param property
     *            the property to count
     * @param count
     *            the number of times to count the property
     */
    private void countPropertyQualifier(PropertyIdValue property, int count) {
        PropertyRecord propertyRecord = getPropertyRecord(property);
        propertyRecord.qualifierCount = propertyRecord.qualifierCount + count;
    }

    /**
     * Counts additional occurrences of a property as property in references.
     *
     * @param property
     *            the property to count
     * @param count
     *            the number of times to count the property
     */
    private void countPropertyReference(PropertyIdValue property, int count) {
        PropertyRecord propertyRecord = getPropertyRecord(property);
        propertyRecord.referenceCount = propertyRecord.referenceCount + count;
    }

    /**
     * Prints a report about the statistics gathered so far.
     */
    private void printReport() {
        System.out.println("Processed " + this.countItems + " items:");
        System.out.println(" * Properties encountered: " + this.propertyRecords.size());
        System.out.println(" * Property documents: " + this.countProperties);
        System.out.println(" * Classes encountered: " + this.classRecords.size());
        System.out.println(" * Class documents: " + this.countClasses);
    }

    /**
     * Writes the data collected about properties to a file.
     */
    private void writePropertyData() {
        try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream("properties.csv"))) {

            out.println("Id" + ",Label" + ",Description" + ",URL" + ",Datatype" + ",Uses in statements"
                    + ",Items with such statements" + ",Uses in statements with qualifiers" + ",Uses in qualifiers"
                    + ",Uses in references" + ",Uses total" + ",Related properties");

            List<Entry<PropertyIdValue, PropertyRecord>> list = new ArrayList<Entry<PropertyIdValue, PropertyRecord>>(
                    this.propertyRecords.entrySet());
            Collections.sort(list, new UsageRecordComparator());
            for (Entry<PropertyIdValue, PropertyRecord> entry : list) {
                printPropertyRecord(out, entry.getValue(), entry.getKey());
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Writes the data collected about classes to a file.
     */
    private void writeClassData() {
        try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream("classes.csv"))) {

            out.println("Id" + ",Label" + ",Description" + ",URL" + ",Image" + ",Number of direct instances"
                    + ",Number of direct subclasses" + ",Direct superclasses" + ",All superclasses"
                    + ",Related properties");

            List<Entry<EntityIdValue, ClassRecord>> list = new ArrayList<>(this.classRecords.entrySet());
            Collections.sort(list, new ClassUsageRecordComparator());
            for (Entry<EntityIdValue, ClassRecord> entry : list) {
                if (entry.getValue().itemCount > 0 || entry.getValue().subclassCount > 0) {
                    printClassRecord(out, entry.getValue(), entry.getKey());
                }
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Prints the data for a single class to the given stream. This will be a
     * single line in CSV.
     *
     * @param out
     *            the output to write to
     * @param classRecord
     *            the class record to write
     * @param entityIdValue
     *            the item id that this class record belongs to
     */
    private void printClassRecord(PrintStream out, ClassRecord classRecord, EntityIdValue entityIdValue) {
        printTerms(out, classRecord.itemDocument, entityIdValue, "\"" + getClassLabel(entityIdValue) + "\"");
        printImage(out, classRecord.itemDocument);

        out.print("," + classRecord.itemCount + "," + classRecord.subclassCount);

        printClassList(out, classRecord.superClasses);

        HashSet<EntityIdValue> superClasses = new HashSet<>();
        for (EntityIdValue superClass : classRecord.superClasses) {
            addSuperClasses(superClass, superClasses);
        }

        printClassList(out, superClasses);

        printRelatedProperties(out, classRecord);

        out.println("");
    }

    /**
     * Prints a list of classes to the given output. The list is encoded as a
     * single CSV value, using "@" as a separator. Miga can decode this.
     * Standard CSV processors do not support lists of entries as values,
     * however.
     *
     * @param out
     *            the output to write to
     * @param classes
     *            the list of class items
     */
    private void printClassList(PrintStream out, Iterable<EntityIdValue> classes) {
        out.print(",\"");
        boolean first = true;
        for (EntityIdValue superClass : classes) {
            if (first) {
                first = false;
            } else {
                out.print("@");
            }
            // makeshift escaping for Miga:
            out.print(getClassLabel(superClass).replace("@", ""));
        }
        out.print("\"");
    }

    private void addSuperClasses(EntityIdValue itemIdValue, HashSet<EntityIdValue> superClasses) {
        if (superClasses.contains(itemIdValue)) {
            return;
        }
        superClasses.add(itemIdValue);
        ClassRecord classRecord = this.classRecords.get(itemIdValue);
        if (classRecord == null) {
            return;
        }

        for (EntityIdValue superClass : classRecord.superClasses) {
            addSuperClasses(superClass, superClasses);
        }
    }

    /**
     * Prints the terms (label, etc.) of one entity to the given stream. This
     * will lead to several values in the CSV file, which are the same for
     * properties and class items.
     *
     * @param out
     *            the output to write to
     * @param termedDocument
     *            the document that provides the terms to write
     * @param entityIdValue
     *            the entity that the data refers to.
     * @param specialLabel
     *            special label to use (rather than the label string in the
     *            document) or null if not using; used by classes, which need to
     *            support disambiguation in their labels
     */
    private void printTerms(PrintStream out, TermedDocument termedDocument, EntityIdValue entityIdValue,
            String specialLabel) {
        String label = specialLabel;
        String description = "-";

        if (termedDocument != null) {
            if (label == null) {
                MonolingualTextValue labelValue = termedDocument.getLabels().get("en");
                if (labelValue != null) {
                    label = csvStringEscape(labelValue.getText());
                }
            }
            MonolingualTextValue descriptionValue = termedDocument.getDescriptions().get("en");
            if (descriptionValue != null) {
                description = csvStringEscape(descriptionValue.getText());
            }
        }

        if (label == null) {
            label = entityIdValue.getId();
        }

        out.print(entityIdValue.getId() + "," + label + "," + description + "," + entityIdValue.getIri());
    }

    /**
     * Prints the URL of a thumbnail for the given item document to the output,
     * or a default image if no image is given for the item.
     *
     * @param out
     *            the output to write to
     * @param itemDocument
     *            the document that may provide the image information
     */
    private void printImage(PrintStream out, ItemDocument itemDocument) {
        String imageFile = null;

        if (itemDocument != null) {
            for (StatementGroup sg : itemDocument.getStatementGroups()) {
                boolean isImage = "P18".equals(sg.getProperty().getId());
                if (!isImage) {
                    continue;
                }
                for (Statement s : sg.getStatements()) {
                    if (s.getClaim().getMainSnak() instanceof ValueSnak) {
                        Value value = ((ValueSnak) s.getClaim().getMainSnak()).getValue();
                        if (value instanceof StringValue) {
                            imageFile = ((StringValue) value).getString();
                            break;
                        }
                    }
                }
                if (imageFile != null) {
                    break;
                }
            }
        }

        if (imageFile == null) {
            out.print(",\"http://commons.wikimedia.org/w/thumb.php?f=MA_Route_blank.svg&w=50\"");
        } else {
            try {
                String imageFileEncoded;
                imageFileEncoded = URLEncoder.encode(imageFile.replace(" ", "_"), "utf-8");
                // Keep special title symbols unescaped:
                imageFileEncoded = imageFileEncoded.replace("%3A", ":").replace("%2F", "/");
                out.print("," + csvStringEscape("http://commons.wikimedia.org/w/thumb.php?f=" + imageFileEncoded)
                        + "&w=50");
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException("Your JRE does not support UTF-8 encoding. Srsly?!", e);
            }
        }
    }

    /**
     * Prints the data of one property to the given output. This will be a
     * single line in CSV.
     *
     * @param out
     *            the output to write to
     * @param propertyRecord
     *            the data to write
     * @param propertyIdValue
     *            the property that the data refers to
     */
    private void printPropertyRecord(PrintStream out, PropertyRecord propertyRecord,
            PropertyIdValue propertyIdValue) {

        printTerms(out, propertyRecord.propertyDocument, propertyIdValue, null);

        String datatype = "Unknown";
        if (propertyRecord.propertyDocument != null) {
            datatype = getDatatypeLabel(propertyRecord.propertyDocument.getDatatype());
        }

        out.print("," + datatype + "," + propertyRecord.statementCount + "," + propertyRecord.itemCount + ","
                + propertyRecord.statementWithQualifierCount + "," + propertyRecord.qualifierCount + ","
                + propertyRecord.referenceCount + ","
                + (propertyRecord.statementCount + propertyRecord.qualifierCount + propertyRecord.referenceCount));

        printRelatedProperties(out, propertyRecord);

        out.println("");
    }

    /**
     * Returns an English label for a given datatype.
     *
     * @param datatype
     *            the datatype to label
     * @return the label
     */
    private String getDatatypeLabel(DatatypeIdValue datatype) {
        if (datatype.getIri() == null) { // TODO should be redundant once the
            // JSON parsing works
            return "Unknown";
        }

        switch (datatype.getIri()) {
        case DatatypeIdValue.DT_COMMONS_MEDIA:
            return "Commons media";
        case DatatypeIdValue.DT_GLOBE_COORDINATES:
            return "Globe coordinates";
        case DatatypeIdValue.DT_ITEM:
            return "Item";
        case DatatypeIdValue.DT_QUANTITY:
            return "Quantity";
        case DatatypeIdValue.DT_STRING:
            return "String";
        case DatatypeIdValue.DT_TIME:
            return "Time";
        case DatatypeIdValue.DT_URL:
            return "URL";
        case DatatypeIdValue.DT_PROPERTY:
            return "Property";
        case DatatypeIdValue.DT_EXTERNAL_ID:
            return "External identifier";
        case DatatypeIdValue.DT_MATH:
            return "Math";
        case DatatypeIdValue.DT_MONOLINGUAL_TEXT:
            return "Monolingual Text";
        default:
            throw new RuntimeException("Unknown datatype " + datatype.getIri());
        }
    }

    /**
     * Prints a list of related properties to the output. The list is encoded as
     * a single CSV value, using "@" as a separator. Miga can decode this.
     * Standard CSV processors do not support lists of entries as values,
     * however.
     *
     * @param out
     *            the output to write to
     * @param usageRecord
     *            the data to write
     */
    private void printRelatedProperties(PrintStream out, UsageRecord usageRecord) {

        List<ImmutablePair<PropertyIdValue, Double>> list = new ArrayList<ImmutablePair<PropertyIdValue, Double>>(
                usageRecord.propertyCoCounts.size());
        for (Entry<PropertyIdValue, Integer> coCountEntry : usageRecord.propertyCoCounts.entrySet()) {
            double otherThisItemRate = (double) coCountEntry.getValue() / usageRecord.itemCount;
            double otherGlobalItemRate = (double) this.propertyRecords.get(coCountEntry.getKey()).itemCount
                    / this.countPropertyItems;
            double otherThisItemRateStep = 1 / (1 + Math.exp(6 * (-2 * otherThisItemRate + 0.5)));
            double otherInvGlobalItemRateStep = 1 / (1 + Math.exp(6 * (-2 * (1 - otherGlobalItemRate) + 0.5)));

            list.add(new ImmutablePair<PropertyIdValue, Double>(coCountEntry.getKey(),
                    otherThisItemRateStep * otherInvGlobalItemRateStep * otherThisItemRate / otherGlobalItemRate));
        }

        Collections.sort(list, new Comparator<ImmutablePair<PropertyIdValue, Double>>() {
            @Override
            public int compare(ImmutablePair<PropertyIdValue, Double> o1,
                    ImmutablePair<PropertyIdValue, Double> o2) {
                return o2.getValue().compareTo(o1.getValue());
            }
        });

        out.print(",\"");
        int count = 0;
        for (ImmutablePair<PropertyIdValue, Double> relatedProperty : list) {
            if (relatedProperty.right < 1.5) {
                break;
            }
            if (count > 0) {
                out.print("@");
            }
            // makeshift escaping for Miga:
            out.print(getPropertyLabel(relatedProperty.left).replace("@", ""));
            count++;
        }
        out.print("\"");
    }

    /**
     * Returns a string that should be used as a label for the given property.
     *
     * @param propertyIdValue
     *            the property to label
     * @return the label
     */
    private String getPropertyLabel(PropertyIdValue propertyIdValue) {
        PropertyRecord propertyRecord = this.propertyRecords.get(propertyIdValue);
        if (propertyRecord == null || propertyRecord.propertyDocument == null) {
            return propertyIdValue.getId();
        } else {
            return getLabel(propertyIdValue, propertyRecord.propertyDocument);
        }
    }

    /**
     * Returns a string that should be used as a label for the given item. The
     * method also ensures that each label is used for only one class. Other
     * classes with the same label will have their QID added for disambiguation.
     *
     * @param entityIdValue
     *            the item to label
     * @return the label
     */
    private String getClassLabel(EntityIdValue entityIdValue) {
        ClassRecord classRecord = this.classRecords.get(entityIdValue);
        String label;
        if (classRecord == null || classRecord.itemDocument == null) {
            label = entityIdValue.getId();
        } else {
            label = getLabel(entityIdValue, classRecord.itemDocument);
        }

        EntityIdValue labelOwner = this.labels.get(label);
        if (labelOwner == null) {
            this.labels.put(label, entityIdValue);
            return label;
        } else if (labelOwner.equals(entityIdValue)) {
            return label;
        } else {
            return label + " (" + entityIdValue.getId() + ")";
        }
    }

    /**
     * Returns the CSV-escaped label for the given entity based on the terms in
     * the given document. The returned string will have its quotes escaped, but
     * it will not be put in quotes (since this is not appropriate in all
     * contexts where this method is used).
     *
     * @param entityIdValue
     *            the entity to label
     * @param termedDocument
     *            the document to get labels from
     * @return the label
     */
    private String getLabel(EntityIdValue entityIdValue, TermedDocument termedDocument) {
        MonolingualTextValue labelValue = termedDocument.getLabels().get("en");
        if (labelValue != null) {
            return labelValue.getText().replace("\"", "\"\"");
        } else {
            return entityIdValue.getId();
        }
    }

    /**
     * Escapes a string for use in CSV. In particular, the string is quoted and
     * quotation marks are escaped.
     *
     * @param string
     *            the string to escape
     * @return the escaped string
     */
    private String csvStringEscape(String string) {
        return "\"" + string.replace("\"", "\"\"") + "\"";
    }
}