com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSniffer.java Source code

Java tutorial

Introduction

Here is the source code for com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSniffer.java

Source

/*
 * Some utilities for loading csv data into a PosgtreSQL database:
 * detect file encoding, CSV format and populate database
 *
 *     Copyright (C) 2016, 2018 J. Frard <https://github.com/jferard>
 *
 * This file is part of pgLoader Utils.
 *
 * pgLoader Utils is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * pgLoader Utils is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.github.jferard.pgloaderutils.sniffer.csd;

import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import java.util.Iterator;
import java.util.logging.Logger;

/**
 * The CSDSchemaSniffer is a csv. Given a CSDSchemaPattern, a CSVParser and a maximul line, it will return a
 * CSDSchema (or null).
 */
public class CSDSchemaSniffer<F extends CSDFieldPattern> {
    public static <G extends CSDFieldPattern> CSDSchemaSniffer<G> create(Logger logger,
            CSDFieldFactory<G> factory) {
        CSDUtil u = new CSDUtil(logger);
        CSDValidatorHelper<G> hv = new CSDValidatorHelper<G>(logger, new FlexibleColumnMatcher(logger, u, 2));
        return new CSDSchemaSniffer<G>(logger, factory, hv);
    }

    private final Logger logger;
    private CSDValidationResult<F> result;
    private CSDFieldFactory<F> factory;
    private CSDValidatorHelper<F> validatorHelper;

    public CSDSchemaSniffer(Logger logger, CSDFieldFactory<F> factory, CSDValidatorHelper<F> validatorHelper) {
        this.logger = logger;
        this.factory = factory;
        this.validatorHelper = validatorHelper;
    }

    /**
     * @param schemaPattern the pattern to be tested
     * @param parser        the CSVCRecord provider
     * @param maxLine       the maximum number of lines
     * @return the real CSDSchema, or null if the pattern does not match.
     */
    public CSDSchema<F> sniff(CSDSchemaPattern<F> schemaPattern, CSVParser parser, int maxLine) {
        this.result = new CSDValidationResult<F>(logger, schemaPattern);
        Iterator<CSVRecord> it = parser.iterator();

        if (!it.hasNext()) {
            result.noLine();
            return null;
        }

        CSVRecord firstRecord = it.next();
        if (!this.validateHeaderOrFirstRecord(result, schemaPattern, firstRecord))
            return null;

        int i = 1;
        while (it.hasNext() && i < maxLine)
            this.validatorHelper.validateRecord(result, schemaPattern, it.next(), i++);

        if (this.result.errorCount() > maxLine)
            return null;

        return schemaPattern.newSchema(factory, firstRecord);
    }

    private boolean validateHeaderOrFirstRecord(CSDValidationResult<F> result, CSDSchemaPattern<F> schemaPattern,
            CSVRecord firstRecord) {
        int headerErrorCount = this.validatorHelper.validateHeader(this.result, schemaPattern, firstRecord);

        switch (headerErrorCount) {
        case -1:
            return false;
        case 0:
            return true;
        case 1:
            this.logger.severe("One clear error in header, but it seems to match.");
            return true;
        default:
            if (schemaPattern.hasOptionalHeader()) {
                this.logger.info("The header does not match. Maybe a missing header.");
                CSDValidationResult<F> tempResult = new CSDValidationResult<F>(this.logger, schemaPattern);
                this.validatorHelper.validateRecord(tempResult, schemaPattern, firstRecord, 1);
                if (tempResult.isOk()) {
                    this.result = tempResult;
                    this.result.missingHeader();
                    return true;
                } else {
                    this.result.badHeader();
                    return false;
                }
            } else {
                return false;
            }
        }

    }
}