org.languagetool.dev.dumpcheck.DatabaseHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.languagetool.dev.dumpcheck.DatabaseHandler.java

Source

/* LanguageTool, a natural language style checker 
 * Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.dev.dumpcheck;

import org.apache.commons.lang3.StringUtils;
import org.languagetool.Language;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.patterns.AbstractPatternRule;
import org.languagetool.tools.ContextTools;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.sql.*;
import java.util.Date;
import java.util.List;
import java.util.Properties;

/**
 * Store rule matches to a database.
 * @since 2.4
 */
class DatabaseHandler extends ResultHandler {

    private static final int MAX_CONTEXT_LENGTH = 500;
    private static final int SMALL_CONTEXT_LENGTH = 40; // do not modify - it would break lookup of errors marked as 'false alarm'

    private final Connection conn;
    private final ContextTools contextTools;
    private final ContextTools smallContextTools;

    private final PreparedStatement insertSt;
    private final int batchSize;

    private int batchCount = 0;

    DatabaseHandler(File propertiesFile, int maxSentences, int maxErrors) {
        super(maxSentences, maxErrors);

        String insertSql = "INSERT INTO corpus_match "
                + "(version, language_code, ruleid, rule_category, rule_subid, rule_description, message, error_context, small_error_context, corpus_date, "
                + "check_date, sourceuri, source_type, is_visible) "
                + "VALUES (0, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1)";

        Properties dbProperties = new Properties();
        try (FileInputStream inStream = new FileInputStream(propertiesFile)) {
            dbProperties.load(inStream);
            String dbUrl = getProperty(dbProperties, "dbUrl");
            String dbUser = getProperty(dbProperties, "dbUser");
            String dbPassword = getProperty(dbProperties, "dbPassword");
            batchSize = Integer.decode(dbProperties.getProperty("batchSize", "1"));
            conn = DriverManager.getConnection(dbUrl, dbUser, dbPassword);
            insertSt = conn.prepareStatement(insertSql);
        } catch (SQLException | IOException e) {
            throw new RuntimeException(e);
        }
        contextTools = new ContextTools();
        contextTools.setContextSize(MAX_CONTEXT_LENGTH);
        contextTools.setErrorMarkerStart(MARKER_START);
        contextTools.setErrorMarkerEnd(MARKER_END);
        contextTools.setEscapeHtml(false);
        smallContextTools = new ContextTools();
        smallContextTools.setContextSize(SMALL_CONTEXT_LENGTH);
        smallContextTools.setErrorMarkerStart(MARKER_START);
        smallContextTools.setErrorMarkerEnd(MARKER_END);
        smallContextTools.setEscapeHtml(false);
    }

    private String getProperty(Properties prop, String key) {
        String value = prop.getProperty(key);
        if (value == null) {
            throw new RuntimeException("Required key '" + key + "' not found in properties");
        }
        return value;
    }

    @Override
    protected void handleResult(Sentence sentence, List<RuleMatch> ruleMatches, Language language) {
        try {
            java.sql.Date nowDate = new java.sql.Date(new Date().getTime());
            for (RuleMatch match : ruleMatches) {
                String smallContext = smallContextTools.getContext(match.getFromPos(), match.getToPos(),
                        sentence.getText());
                insertSt.setString(1, language.getShortCode());
                Rule rule = match.getRule();
                insertSt.setString(2, rule.getId());
                insertSt.setString(3, rule.getCategory().getName());
                if (rule instanceof AbstractPatternRule) {
                    AbstractPatternRule patternRule = (AbstractPatternRule) rule;
                    insertSt.setString(4, patternRule.getSubId());
                } else {
                    insertSt.setNull(4, Types.VARCHAR);
                }
                insertSt.setString(5, rule.getDescription());
                insertSt.setString(6, StringUtils.abbreviate(match.getMessage(), 255));

                String context = contextTools.getContext(match.getFromPos(), match.getToPos(), sentence.getText());
                if (context.length() > MAX_CONTEXT_LENGTH) {
                    // let's skip these strange cases, as shortening the text might leave us behind with invalid markup etc
                    continue;
                }
                insertSt.setString(7, context);
                insertSt.setString(8, StringUtils.abbreviate(smallContext, 255));

                insertSt.setDate(9, nowDate); // should actually be the dump's date, but isn't really used anyway...
                insertSt.setDate(10, nowDate);
                insertSt.setString(11, sentence.getUrl());
                insertSt.setString(12, sentence.getSource());
                insertSt.addBatch();
                if (++batchCount >= batchSize) {
                    executeBatch();
                    batchCount = 0;
                }

                checkMaxErrors(++errorCount);
                if (errorCount % 100 == 0) {
                    System.out.println("Storing error #" + errorCount + " for text:");
                    System.out.println("  " + sentence.getText());
                }
            }
            checkMaxSentences(++sentenceCount);
        } catch (DocumentLimitReachedException | ErrorLimitReachedException e) {
            throw e;
        } catch (Exception e) {
            throw new RuntimeException("Error storing matches for '" + sentence.getTitle() + "'", e);
        }
    }

    private void executeBatch() throws SQLException {
        boolean autoCommit = conn.getAutoCommit();
        conn.setAutoCommit(false);
        try {
            insertSt.executeBatch();
            if (autoCommit) {
                conn.commit();
            }
        } finally {
            conn.setAutoCommit(autoCommit);
        }
    }

    @Override
    public void close() throws Exception {
        if (insertSt != null) {
            if (batchCount > 0) {
                executeBatch();
            }
            insertSt.close();
        }
        if (conn != null) {
            conn.close();
        }
    }

}