org.talend.dataquality.statistics.semantic.SemanticStatisticsTestBase.java Source code

Java tutorial

Introduction

Here is the source code for org.talend.dataquality.statistics.semantic.SemanticStatisticsTestBase.java

Source

// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.semantic;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.talend.dataquality.semantic.classifier.SemanticCategoryEnum;

/**
 * created by talend on 2015-07-28 Detailled comment.
 *
 */
class SemanticStatisticsTestBase {

    protected final List<List<String[]>> INPUT_RECORDS = new ArrayList<List<String[]>>() {

        private static final long serialVersionUID = 1L;

        {
            add(getRecords(SemanticStatisticsTestBase.class.getResourceAsStream("customers_100_bug_TDQ10380.csv")));
            add(getRecords(SemanticStatisticsTestBase.class.getResourceAsStream("avengers.csv")));
            add(getRecords(SemanticStatisticsTestBase.class.getResourceAsStream("gender.csv")));
            add(getRecords(
                    SemanticStatisticsTestBase.class.getResourceAsStream("dataset_with_invalid_records.csv")));

        }
    };

    protected final List<String[]> EXPECTED_CATEGORIES = new ArrayList<String[]>() {

        private static final long serialVersionUID = 1L;

        {
            add(new String[] { // dataset[0]
                    "", //
                    SemanticCategoryEnum.FIRST_NAME.getId(), //
                    SemanticCategoryEnum.CITY.getId(), //
                    SemanticCategoryEnum.US_STATE_CODE.getId(), //
                    "", //
                    SemanticCategoryEnum.CITY.getId(), //
                    "", //
                    "", //
                    "" //
            });
            add(new String[] { // dataset[1]
                    "", //
                    SemanticCategoryEnum.FIRST_NAME.getId(), //
                    SemanticCategoryEnum.LAST_NAME.getId(), //
                    "", //
                    SemanticCategoryEnum.CITY.getId() //
            });
            add(new String[] { // dataset[2]
                    SemanticCategoryEnum.FIRST_NAME.getId(), //
                    "", //
                    SemanticCategoryEnum.GENDER.getId() //
            });
            add(new String[] { // dataset[3]
                    SemanticCategoryEnum.FIRST_NAME.getId(), //
                    ""//
            });
        }
    };

    protected static List<String[]> getRecords(InputStream inputStream) {
        return getRecords(inputStream, ";");
    }

    protected static List<String[]> getRecords(InputStream inputStream, String lineSeparator) {
        if (inputStream == null) {
            throw new IllegalArgumentException("Input stream cannot be null.");
        }
        try {
            List<String[]> records = new ArrayList<String[]>();
            final List<String> lines = IOUtils.readLines(inputStream);
            for (String line : lines) {
                String[] record = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, lineSeparator);
                records.add(record);
            }
            return records;
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            try {
                inputStream.close();
            } catch (IOException e) {
                // Silent ignore
                e.printStackTrace();
            }
        }
    }
}