org.deidentifier.arx.gui.worker.WorkerSave.java Source code

Java tutorial

Introduction

Here is the source code for org.deidentifier.arx.gui.worker.WorkerSave.java

Source

/*
 * ARX: Powerful Data Anonymization
 * Copyright 2012 - 2018 Fabian Prasser and contributors
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.deidentifier.arx.gui.worker;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.zip.Deflater;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import org.apache.commons.io.FileUtils;
import org.deidentifier.arx.ARXLattice;
import org.deidentifier.arx.ARXLattice.ARXNode;
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.DataDefinition;
import org.deidentifier.arx.DataHandle;
import org.deidentifier.arx.DataHandleOutput;
import org.deidentifier.arx.DataType;
import org.deidentifier.arx.DataType.DataTypeWithFormat;
import org.deidentifier.arx.criteria.PrivacyCriterion;
import org.deidentifier.arx.gui.Controller;
import org.deidentifier.arx.gui.model.Model;
import org.deidentifier.arx.gui.model.ModelConfiguration;
import org.deidentifier.arx.gui.resources.Resources;
import org.deidentifier.arx.gui.worker.io.FileBuilder;
import org.deidentifier.arx.gui.worker.io.Vocabulary;
import org.deidentifier.arx.gui.worker.io.Vocabulary_V2;
import org.deidentifier.arx.gui.worker.io.XMLWriter;
import org.deidentifier.arx.io.CSVDataOutput;
import org.deidentifier.arx.metric.InformationLoss;
import org.eclipse.core.runtime.IProgressMonitor;

/**
 * This worker saves a project file to disk.
 *
 * @author Fabian Prasser
 */
public class WorkerSave extends Worker<Model> {

    /** The vocabulary to use. */
    private Vocabulary vocabulary = new Vocabulary_V2();

    /** The path. */
    private final String path;

    /** The model. */
    private final Model model;

    /**
     * Creates a new instance.
     *
     * @param path
     * @param controller
     * @param model
     */
    public WorkerSave(final String path, final Controller controller, final Model model) {
        this.path = path;
        this.model = model;
    }

    @Override
    public void run(final IProgressMonitor arg0) throws InvocationTargetException, InterruptedException {

        arg0.beginTask(Resources.getMessage("WorkerSave.0"), 8); //$NON-NLS-1$
        File temp = null;
        try {
            temp = File.createTempFile("arx", "deid");
            final FileOutputStream f = new FileOutputStream(temp);
            final ZipOutputStream zip = new ZipOutputStream(new BufferedOutputStream(f));
            zip.setLevel(Deflater.BEST_SPEED);
            model.createConfig();
            writeMetadata(model, zip);
            arg0.worked(1);
            writeModel(model, zip);
            arg0.worked(1);
            writeInput(model, zip);
            arg0.worked(1);
            writeOutput(model, zip);
            arg0.worked(1);
            writeConfiguration(model, zip);
            arg0.worked(1);
            final Map<String, Integer> map = writeLattice(model, zip);
            arg0.worked(1);
            writeClipboard(model, map, zip);
            arg0.worked(1);
            writeFilter(model, zip);
            zip.close();
            arg0.worked(1);
            FileUtils.copyFile(temp, new File(path));
            FileUtils.deleteQuietly(temp);
        } catch (final Exception e) {
            error = e;
            arg0.done();
            FileUtils.deleteQuietly(temp);
            return;
        }

        arg0.done();
    }

    /**
     * Converts an attribute name to a file name.
     *
     * @param a
     * @return
     */
    private String toFileName(final String a) {
        return a;
    }

    /**
     * Returns an XML representation of the lattice.
     *
     * @param map
     * @param l
     * @param zip
     * @throws IOException
     */
    private void toXML(final Map<String, Integer> map, final ARXLattice l, final ZipOutputStream zip)
            throws IOException {

        // Build mapping
        int id = 0;
        for (final ARXNode[] level : l.getLevels()) {
            for (final ARXNode n : level) {
                final String key = Arrays.toString(n.getTransformation());
                if (!map.containsKey(key)) {
                    map.put(key, id++);
                }
            }
        }

        // Write directly because of size
        final FileBuilder b = new FileBuilder(new OutputStreamWriter(zip));
        final XMLWriter writer = new XMLWriter(b, true);

        writer.write(vocabulary.getHeader());

        // Build xml
        writer.indent(vocabulary.getLattice());
        for (int i = 0; i < l.getLevels().length; i++) {

            writer.indent(vocabulary.getLevel(), vocabulary.getDepth(), i);
            for (final ARXNode n : l.getLevels()[i]) {

                final String key = Arrays.toString(n.getTransformation());
                final int currentId = map.get(key);

                writer.indent(vocabulary.getNode2(), vocabulary.getId(), currentId);
                writer.write(vocabulary.getTransformation(), n.getTransformation());
                writer.write(vocabulary.getAnonymity(), n.getAnonymity());
                writer.write(vocabulary.getChecked(), n.isChecked());
                if (n.getPredecessors().length > 0) {
                    writer.write(vocabulary.getPredecessors(), n.getPredecessors(), map);
                }
                if (n.getSuccessors().length > 0) {
                    writer.write(vocabulary.getSuccessors(), n.getSuccessors(), map);
                }
                writer.indent(vocabulary.getInfoloss());
                writer.write(vocabulary.getMax2(), n.getHighestScore().toString());
                writer.write(vocabulary.getMin2(), n.getLowestScore().toString());
                writer.unindent();
                writer.unindent();
            }
            writer.unindent();
        }
        writer.unindent();
        b.flush();
    }

    /**
     * Returns an XML representation of the clipboard.
     *
     * @param map
     * @param clipboard
     * @return
     * @throws IOException
     */
    private String toXML(final Map<String, Integer> map, final List<ARXNode> clipboard) throws IOException {

        XMLWriter writer = new XMLWriter();
        writer.indent(vocabulary.getClipboard()); //$NON-NLS-1$
        for (final ARXNode n : clipboard) {
            writer.write(vocabulary.getNode(), Arrays.toString(n.getTransformation())); //$NON-NLS-1$
        }
        writer.unindent();
        return writer.toString();
    }

    /**
     * Converts a model to XML.
     *
     * @param model
     * @return
     * @throws IOException
     */
    private String toXML(final Model model) throws IOException {

        XMLWriter writer = new XMLWriter();
        writer.indent(vocabulary.getProject());
        writer.write(vocabulary.getName(), model.getName());

        writer.write(vocabulary.getSeparator(), model.getCSVSyntax().getDelimiter());
        writer.write(vocabulary.getEscape(), model.getCSVSyntax().getEscape());
        writer.write(vocabulary.getQuote(), model.getCSVSyntax().getQuote());

        String linebreak = "UNIX"; //$NON-NLS-1$
        char[] _linebreak = model.getCSVSyntax().getLinebreak();
        if (_linebreak.length == 1 && _linebreak[0] == '\r') {
            linebreak = "MAC"; //$NON-NLS-1$
        } else if (_linebreak.length == 2) {
            linebreak = "WINDOWS"; //$NON-NLS-1$
        }
        writer.write(vocabulary.getLinebreak(), linebreak);

        writer.write(vocabulary.getDescription(), model.getDescription());
        writer.write(vocabulary.getLocale(), model.getLocale().getLanguage().toUpperCase());
        writer.write(vocabulary.getHistorySize(), model.getHistorySize());
        writer.write(vocabulary.getSnapshotSizeDataset(), model.getSnapshotSizeDataset());
        writer.write(vocabulary.getSnapshotSizeSnapshot(), model.getSnapshotSizeSnapshot());
        writer.write(vocabulary.getInitialNodesInViewer(), model.getInitialNodesInViewer());
        writer.write(vocabulary.getMaxNodesInViewer(), model.getMaxNodesInViewer());
        writer.write(vocabulary.getSelectedAttribute(), model.getSelectedAttribute());
        writer.write(vocabulary.getInputBytes(), model.getInputBytes());
        writer.unindent();
        return writer.toString();
    }

    /**
     * Converts a configuration to XML.
     *
     * @param config
     * @return
     * @throws IOException
     */
    private String toXML(final ModelConfiguration config) throws IOException {

        XMLWriter writer = new XMLWriter();
        writer.indent(vocabulary.getConfig());
        writer.write(vocabulary.getSuppressionAlwaysEnabled(), config.isSuppressionAlwaysEnabled());

        // Write suppressed attribute types
        writer.indent(vocabulary.getSuppressedAttributeTypes());
        for (AttributeType type : new AttributeType[] { AttributeType.QUASI_IDENTIFYING_ATTRIBUTE,
                AttributeType.SENSITIVE_ATTRIBUTE, AttributeType.INSENSITIVE_ATTRIBUTE }) {
            if (config.isAttributeTypeSuppressed(type)) {
                writer.write(vocabulary.getType(), type.toString());
            }
        }
        writer.unindent();

        writer.write(vocabulary.getPracticalMonotonicity(), config.isPracticalMonotonicity());
        writer.write(vocabulary.getRelativeMaxOutliers(), config.getSuppressionLimit());
        writer.write(vocabulary.getMetric(), config.getMetric().toString());

        // Write weights
        writer.indent(vocabulary.getAttributeWeights());
        for (Entry<String, Double> entry : config.getAttributeWeights().entrySet()) {
            writer.indent(vocabulary.getAttributeWeight());
            writer.write(vocabulary.getAttribute(), entry.getKey());
            writer.write(vocabulary.getWeight(), entry.getValue());
            writer.unindent();
        }
        writer.unindent();

        // Write criteria
        writer.indent(vocabulary.getCriteria());
        for (PrivacyCriterion c : config.getCriteria()) {
            if (c != null) {
                writer.write(vocabulary.getCriterion(), c.toString());
            }
        }
        writer.unindent();
        writer.unindent();
        return writer.toString();
    }

    /**
     * Returns an XML representation of the data definition.
     *
     * @param config
     * @param handle
     * @param definition
     * @return
     * @throws IOException
     */
    private String toXML(final ModelConfiguration config, final DataHandle handle, final DataDefinition definition)
            throws IOException {

        XMLWriter writer = new XMLWriter();
        writer.indent(vocabulary.getDefinition());
        for (int i = 0; i < handle.getNumColumns(); i++) {
            final String attr = handle.getAttributeName(i);
            AttributeType t = definition.getAttributeType(attr);
            DataType<?> dt = definition.getDataType(attr);
            if (t == null)
                t = AttributeType.IDENTIFYING_ATTRIBUTE;
            if (dt == null)
                dt = DataType.STRING;

            writer.indent(vocabulary.getAssigment());
            writer.write(vocabulary.getName(), attr);
            writer.write(vocabulary.getType(), t.toString());
            writer.write(vocabulary.getDatatype(), dt.getDescription().getLabel());
            if (dt.getDescription().hasFormat()) {
                String format = ((DataTypeWithFormat) dt).getFormat();
                if (format != null) {
                    writer.write(vocabulary.getFormat(), format);
                }
                Locale locale = ((DataTypeWithFormat) dt).getLocale();
                if (locale != null) {
                    writer.write(vocabulary.getLocale(), locale.getLanguage().toUpperCase());
                }
            }

            // Response variables
            if (definition.isResponseVariable(attr)) {
                writer.write(vocabulary.getResponseVariable(), "true"); //$NON-NLS-1$
            }

            // Do we have a hierarchy
            if (definition.getHierarchy(attr) != null && definition.getHierarchy(attr).length != 0
                    && definition.getHierarchy(attr)[0].length != 0) {
                writer.write(vocabulary.getRef(), "hierarchies/" + toFileName(attr) + ".csv"); //$NON-NLS-1$ //$NON-NLS-2$
                Integer min = config.getMinimumGeneralization(attr);
                Integer max = config.getMaximumGeneralization(attr);
                writer.write(vocabulary.getMin(), min == null ? "All" : String.valueOf(min)); //$NON-NLS-1$
                writer.write(vocabulary.getMax(), max == null ? "All" : String.valueOf(max)); //$NON-NLS-1$
            }

            // Do we have a microaggregate function
            if (definition.getMicroAggregationFunction(attr) != null) {
                writer.write(vocabulary.getMicroAggregationFunction(),
                        config.getMicroAggregationFunction(attr).getLabel());
                writer.write(vocabulary.getMicroAggregationIgnoreMissingData(),
                        config.getMicroAggregationIgnoreMissingData(attr));
            }

            writer.unindent();

        }
        writer.unindent();
        return writer.toString();
    }

    /**
     * Writes the clipboard to the file.
     *
     * @param model
     * @param map
     * @param zip
     * @throws IOException
     */
    private void writeClipboard(final Model model, final Map<String, Integer> map, final ZipOutputStream zip)
            throws IOException {
        if (model.getClipboard().getClipboardEntries().isEmpty()) {
            return;
        }

        // Write clipboard
        zip.putNextEntry(new ZipEntry("clipboard.xml")); //$NON-NLS-1$
        final Writer w = new OutputStreamWriter(zip);
        w.write(toXML(map, model.getClipboard().getClipboardEntries()));
        w.flush();

    }

    /**
     * Writes the configuration to the file.
     *
     * @param model
     * @param zip
     * @throws IOException
     */
    private void writeConfiguration(final Model model, final ZipOutputStream zip) throws IOException {

        if (model.getInputConfig() != null) {
            writeConfiguration(model.getInputConfig(), "input/", zip); //$NON-NLS-1$
        }
        if (model.getOutputConfig() != null) {
            writeConfiguration(model.getOutputConfig(), "output/", zip); //$NON-NLS-1$
        }
    }

    /**
     * Writes the configuration to the file.
     *
     * @param config
     * @param prefix
     * @param zip
     * @throws IOException
     */
    private void writeConfiguration(final ModelConfiguration config, final String prefix, final ZipOutputStream zip)
            throws IOException {

        zip.putNextEntry(new ZipEntry(prefix + "config.dat")); //$NON-NLS-1$
        final ObjectOutputStream oos = new ObjectOutputStream(zip);
        oos.writeObject(config);
        oos.flush();

        zip.putNextEntry(new ZipEntry(prefix + "config.xml")); //$NON-NLS-1$
        final Writer w = new OutputStreamWriter(zip);
        w.write(toXML(config));
        w.flush();

        writeDefinition(config, prefix, zip);
        writeHierarchies(config, prefix, zip);
    }

    /**
     * Writes the data definition to the file.
     *
     * @param config
     * @param prefix
     * @param zip
     * @throws IOException
     */
    private void writeDefinition(final ModelConfiguration config, final String prefix, final ZipOutputStream zip)
            throws IOException {

        // Obtain definition
        DataDefinition definition = null;
        if (config == model.getInputConfig())
            definition = model.getInputDefinition();
        else
            definition = model.getOutputDefinition();

        // Store
        if (definition != null) {
            zip.putNextEntry(new ZipEntry(prefix + "definition.xml")); //$NON-NLS-1$
            final Writer w = new OutputStreamWriter(zip);
            w.write(toXML(config, config.getInput().getHandle(), definition));
            w.flush();
        }
    }

    /**
     * Writes the current filter to the file.
     *
     * @param model
     * @param zip
     * @throws IOException
     */
    private void writeFilter(final Model model, final ZipOutputStream zip) throws IOException {
        if ((model.getAnonymizer() == null) || (model.getResult() == null)) {
            return;
        }
        zip.putNextEntry(new ZipEntry("filter.dat")); //$NON-NLS-1$
        final ObjectOutputStream oos = new ObjectOutputStream(zip);
        oos.writeObject(model.getNodeFilter());
        oos.flush();
    }

    /**
     * Writes the hierarchies to the file.
     *
     * @param config
     * @param prefix
     * @param zip
     * @throws IOException
     */
    private void writeHierarchies(final ModelConfiguration config, final String prefix, final ZipOutputStream zip)
            throws IOException {

        // Store all from config
        Set<String> saved = new HashSet<>();
        for (Entry<String, Hierarchy> entry : config.getHierarchies().entrySet()) {

            // Store this hierarchy
            zip.putNextEntry(new ZipEntry(prefix + "hierarchies/" + toFileName(entry.getKey()) + ".csv")); //$NON-NLS-1$ //$NON-NLS-2$
            CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
            out.write(entry.getValue().getHierarchy());
            saved.add(entry.getKey());
        }

        // This additional code implements a bugfix. ARX automatically creates hierarchies
        // implementing the identity function when the user does not specify one but defines the attribute
        // to be a quasi-identifier. These hierarchies were not serialized into project files in ARX 3.4.1,
        // leading to inconsistent files which could not be loaded any more. We now do our best to save
        // every relevant hierarchy:

        // Obtain definition
        DataDefinition definition = null;
        if (config == model.getInputConfig())
            definition = model.getInputDefinition();
        else
            definition = model.getOutputDefinition();

        // Store all from definition that have not yet been stored
        if (config.getInput() != null) {
            DataHandle handle = config.getInput().getHandle();
            for (int i = 0; i < handle.getNumColumns(); i++) {
                final String attr = handle.getAttributeName(i);

                // Do we have a hierarchy
                if (!saved.contains(attr) && definition.getHierarchy(attr) != null
                        && definition.getHierarchy(attr).length != 0
                        && definition.getHierarchy(attr)[0].length != 0) {

                    // Store this hierarchy
                    zip.putNextEntry(new ZipEntry(prefix + "hierarchies/" + toFileName(attr) + ".csv")); //$NON-NLS-1$ //$NON-NLS-2$
                    CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
                    out.write(definition.getHierarchy(attr));
                    saved.add(attr);
                }
            }
        }
    }

    /**
     * Writes the input to the file.
     *
     * @param model
     * @param zip
     * @throws IOException
     */
    private void writeInput(final Model model, final ZipOutputStream zip) throws IOException {
        if (model.getInputConfig().getInput() != null) {
            if (model.getInputConfig().getInput().getHandle() != null) {
                zip.putNextEntry(new ZipEntry("data/input.csv")); //$NON-NLS-1$
                final CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
                out.write(model.getInputConfig().getInput().getHandle().iterator());
            }
        }
    }

    /**
     * Writes the lattice to the file.
     *
     * @param model
     * @param zip
     * @return
     * @throws IOException
     */
    private Map<String, Integer> writeLattice(final Model model, final ZipOutputStream zip) throws IOException {

        // Mapping
        final Map<String, Integer> map = new HashMap<String, Integer>();
        if ((model.getResult() == null) || (model.getResult().getLattice() == null)) {
            return map;
        }

        // Write lattice
        final ARXLattice l = model.getResult().getLattice();
        zip.putNextEntry(new ZipEntry("lattice.xml")); //$NON-NLS-1$
        toXML(map, l, zip);

        zip.putNextEntry(new ZipEntry("lattice.dat")); //$NON-NLS-1$
        ObjectOutputStream oos = new ObjectOutputStream(zip);
        oos.writeObject(model.getResult().getLattice());
        oos.writeObject(model.getResult().getLattice().access().getAttributeMap());
        oos.flush();

        // Write score
        zip.putNextEntry(new ZipEntry("infoloss.dat")); //$NON-NLS-1$
        final Map<Integer, InformationLoss<?>> max = new HashMap<Integer, InformationLoss<?>>();
        final Map<Integer, InformationLoss<?>> min = new HashMap<Integer, InformationLoss<?>>();
        for (final ARXNode[] level : l.getLevels()) {
            for (final ARXNode n : level) {
                final String key = Arrays.toString(n.getTransformation());
                min.put(map.get(key), n.getLowestScore());
                max.put(map.get(key), n.getHighestScore());
            }
        }
        oos = new ObjectOutputStream(zip);
        oos.writeObject(min);
        oos.writeObject(max);
        oos.flush();
        min.clear();
        max.clear();

        // Write attributes
        zip.putNextEntry(new ZipEntry("attributes.dat")); //$NON-NLS-1$
        final Map<Integer, Map<Integer, Object>> attrs = new HashMap<Integer, Map<Integer, Object>>();
        for (final ARXNode[] level : l.getLevels()) {
            for (final ARXNode n : level) {
                final String key = Arrays.toString(n.getTransformation());
                attrs.put(map.get(key), n.getAttributes());
            }
        }
        oos = new ObjectOutputStream(zip);
        oos.writeObject(attrs);
        oos.flush();
        attrs.clear();

        // Return mapping
        return map;
    }

    /**
     * Writes the meta data to the file.
     *
     * @param model
     * @param zip
     * @throws IOException
     */
    private void writeMetadata(final Model model, final ZipOutputStream zip) throws IOException {

        // Write metadata
        zip.putNextEntry(new ZipEntry("metadata.xml")); //$NON-NLS-1$
        final OutputStreamWriter w = new OutputStreamWriter(zip);
        XMLWriter writer = new XMLWriter(new FileBuilder(w));
        writer.indent(vocabulary.getMetadata());
        writer.write(vocabulary.getVersion(), Resources.getVersion());
        writer.write(vocabulary.getVocabulary(), vocabulary.getVocabularyVersion());
        writer.unindent();
        w.flush();

    }

    /**
     * Writes the project to the file.
     *
     * @param model
     * @param zip
     * @throws IOException
     */
    private void writeModel(final Model model, final ZipOutputStream zip) throws IOException {
        zip.putNextEntry(new ZipEntry("project.dat")); //$NON-NLS-1$
        final ObjectOutputStream oos = new ObjectOutputStream(zip);
        oos.writeObject(model);
        oos.flush();

        zip.putNextEntry(new ZipEntry("project.xml")); //$NON-NLS-1$
        final Writer w = new OutputStreamWriter(zip);
        w.write(toXML(model));
        w.flush();
    }

    /**
     * Writes the output to the file.
     *
     * @param model
     * @param zip
     * @throws IOException
     */
    private void writeOutput(final Model model, final ZipOutputStream zip) throws IOException {
        if (model.getOutput() != null) {
            zip.putNextEntry(new ZipEntry("data/output.dat")); //$NON-NLS-1$
            ((DataHandleOutput) model.getOutput()).write(zip);
        }
    }
}