org.deidentifier.arx.examples.Example47.java Source code

Java tutorial

Introduction

Here is the source code for org.deidentifier.arx.examples.Example47.java

Source

/*
 * ARX: Powerful Data Anonymization
 * Copyright 2012 - 2016 Fabian Prasser, Florian Kohlmayer and contributors
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.deidentifier.arx.examples;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.deidentifier.arx.ARXPopulationModel;
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.DataHandle;
import org.deidentifier.arx.risk.RiskEstimateBuilder;
import org.deidentifier.arx.risk.RiskModelAttributes;

/**
 * Example for evaluating distinction and separation of attributes as described in
 * R. Motwani et al. "Efficient algorithms for masking and finding quasi-identifiers"
 * Proc. VLDB Conf., 2007.
 * 
 * @author Maximilian Zitzmann
 * @author Fabian Prasser
 */
public class Example47 extends Example {

    /**
     * Entry point.
     *
     * @param args the arguments
     */
    public static void main(String[] args) throws IOException {

        Data data = loadData();

        // Flag every attribute as quasi identifier
        for (int i = 0; i < data.getHandle().getNumColumns(); i++) {
            data.getDefinition().setAttributeType(data.getHandle().getAttributeName(i),
                    AttributeType.QUASI_IDENTIFYING_ATTRIBUTE);
        }

        // Perform risk analysis
        System.out.println("\n - Input data");
        print(data.getHandle());

        System.out.println("\n - Quasi-identifiers with values (in percent):");
        analyzeAttributes(data.getHandle());
    }

    /**
     * Calculate Alpha Distinction and Separation
     *
     * @param handle the data handle
     */
    private static void analyzeAttributes(DataHandle handle) {
        ARXPopulationModel populationmodel = ARXPopulationModel.create(ARXPopulationModel.Region.USA);
        RiskEstimateBuilder builder = handle.getRiskEstimator(populationmodel);
        RiskModelAttributes riskmodel = builder.getAttributeRisks();

        // output
        printPrettyTable(riskmodel.getAttributeRisks());
    }

    private static Data loadData() {
        // Define data
        Data.DefaultData data = Data.create();
        data.add("age", "sex", "state");
        data.add("20", "Female", "CA");
        data.add("30", "Female", "CA");
        data.add("40", "Female", "TX");
        data.add("20", "Male", "NY");
        data.add("40", "Male", "CA");
        data.add("53", "Male", "CA");
        data.add("76", "Male", "EU");
        data.add("40", "Female", "AS");
        data.add("32", "Female", "CA");
        data.add("88", "Male", "CA");
        data.add("48", "Female", "AS");
        data.add("76", "Male", "UU");
        return data;
    }

    /**
     * Helper that prints a table
     * @param quasiIdentifiers
     */
    private static void printPrettyTable(RiskModelAttributes.QuasiIdentifierRisk[] quasiIdentifiers) {

        // get char count of longest quasi-identifier
        int charCountLongestQi = quasiIdentifiers[quasiIdentifiers.length - 1].getIdentifier().toString().length();

        // make sure that there is enough space for the table header strings
        charCountLongestQi = Math.max(charCountLongestQi, 12);

        // calculate space needed
        String leftAlignFormat = "| %-" + charCountLongestQi + "s | %13.2f | %12.2f |%n";

        // add 2 spaces that are in the string above on the left and right side of the first pattern
        charCountLongestQi += 2;

        // subtract the char count of the column header string to calculate
        // how many spaces we need for filling up to the right columnborder
        int spacesAfterColumHeader = charCountLongestQi - 12;

        System.out.format("+" + StringUtils.repeat("-", charCountLongestQi) + "+---------------+--------------+%n");
        System.out.format("| Identifier " + StringUtils.repeat(" ", spacesAfterColumHeader)
                + "|   Distinction |   Separation |%n");
        System.out.format("+" + StringUtils.repeat("-", charCountLongestQi) + "+---------------+--------------+%n");
        for (RiskModelAttributes.QuasiIdentifierRisk quasiIdentifier : quasiIdentifiers) {
            // print every Quasi-Identifier
            System.out.format(leftAlignFormat, quasiIdentifier.getIdentifier(),
                    quasiIdentifier.getDistinction() * 100, quasiIdentifier.getSeparation() * 100);
        }
        System.out.format("+" + StringUtils.repeat("-", charCountLongestQi) + "+---------------+--------------+%n");
    }
}