com.grnet.parsers.Worker.java Source code

Java tutorial

Introduction

Here is the source code for com.grnet.parsers.Worker.java

Source

/*******************************************************************************
 * Copyright (c) 2014 Kostas Vogias.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/gpl.html
 * 
 * Contributors:
 *     Kostas Vogias - initial API and implementation
 ******************************************************************************/
/**
 * 
 */
package com.grnet.parsers;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Properties;

import org.ariadne.util.JDomUtils;
import org.ariadne.util.OaiUtils;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.slf4j.Logger;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import com.grnet.constants.Constants;
import com.grnet.stats.Stats;
import com.rabbitmq.client.Channel;
import com.rabbitmq.client.Connection;
import com.rabbitmq.client.ConnectionFactory;

/**
 * @author vogias
 * 
 */
public class Worker implements Runnable {

    Properties properties;
    File xml;

    String outputPath, bad, strict, queue;
    Stats stats;
    boolean flag, recon;
    private Logger slf4jLogger;
    ConnectionFactory factory;

    public Worker(File xml, Properties properties, String outputPath, String bad, Stats stats, Logger slf4jLogger,
            String strict, String queue, ConnectionFactory factory) {
        this.xml = xml;
        this.properties = properties;

        this.outputPath = outputPath;
        this.bad = bad;
        this.stats = stats;
        flag = false;
        recon = true;
        this.slf4jLogger = slf4jLogger;
        this.strict = strict;
        this.factory = factory;
        this.queue = queue;

    }

    @Override
    public void run() {
        // TODO Auto-generated method stub

        String name = xml.getName();
        Document document;
        try {
            SAXBuilder builder = new SAXBuilder();
            document = (Document) builder.build(xml);

            Element rootNode = document.getRootElement();
            Record record = new Record();

            record.setMetadata(rootNode);

            String elementsString = properties.getProperty(Constants.elements);
            String[] elements = elementsString.split(",");

            for (int i = 0; i < elements.length; i++) {

                List<Element> elementList = JDomUtils.getXpathList(elements[i],
                        Namespace.getNamespace(properties.getProperty(Constants.prefix),
                                properties.getProperty(Constants.uri)),
                        record.getMetadata());

                if (elementList != null) {

                    for (int j = 0; j < elementList.size(); j++) {
                        Element elmt = elementList.get(j);
                        String titleText = elmt.getText();

                        if (!titleText.equals("")) {

                            Attribute langAtt = elmt.getAttribute(properties.getProperty(Constants.attName));

                            String chosenLangAtt = properties.getProperty(Constants.attName);

                            if (langAtt == null || langAtt.getValue().equals("")
                                    || langAtt.getValue().equals("none")) {
                                StringBuffer logstring = new StringBuffer();
                                try {
                                    Detector detector = DetectorFactory.create();
                                    detector.append(titleText);
                                    String lang = detector.detect();

                                    Attribute attribute = new Attribute(chosenLangAtt, lang);
                                    elmt.setAttribute(attribute);

                                    stats.raiseElementsLangDetected();

                                    logstring.append(xml.getParentFile().getName());
                                    logstring.append(" " + name.substring(0, name.lastIndexOf(".")));

                                    logstring.append(" " + elements[i]);
                                    logstring.append(" " + lang);

                                    slf4jLogger.info(logstring.toString());

                                    System.out.println("Opening queue connection...");
                                    Connection connection = this.factory.newConnection();
                                    Channel channel = connection.createChannel();
                                    channel.queueDeclare(this.queue, false, false, false, null);

                                    channel.basicPublish("", this.queue, null, logstring.toString().getBytes());

                                    channel.close();
                                    connection.close();
                                    System.out.println("Opening queue connection...");

                                    stats.addElementD(elements[i]);
                                    flag = true;
                                } catch (LangDetectException e) {
                                    // TODO Auto-generated catch block
                                    // e.printStackTrace();
                                    logstring.append(xml.getParentFile().getName());
                                    logstring.append(" " + name.substring(0, name.lastIndexOf(".")));
                                    logstring.append(" " + "NoLangDetected");
                                    slf4jLogger.info(logstring.toString());

                                    Connection connection = this.factory.newConnection();
                                    Channel channel = connection.createChannel();
                                    channel.queueDeclare(this.queue, false, false, false, null);

                                    channel.basicPublish("", this.queue, null, logstring.toString().getBytes());

                                    channel.close();
                                    connection.close();

                                    if (strict.equals("true"))
                                        recon = false;
                                    else {
                                        recon = true;
                                        continue;
                                    }
                                }
                            }

                        }

                    }

                }

            }

            if (recon) {
                String xmlString = JDomUtils.parseXml2string(record.getMetadata().getDocument(), null);

                OaiUtils.writeStringToFileInEncodingUTF8(xmlString, outputPath + File.separator + name);
            } else {
                String xmlString = JDomUtils.parseXml2string(record.getMetadata().getDocument(), null);

                OaiUtils.writeStringToFileInEncodingUTF8(xmlString, bad + File.separator + name);
            }
            if (flag)
                stats.raiseFilesLangDetected();

            if (recon == false)
                stats.raiseFilessLangNotDetected();

        } catch (JDOMException | IOException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }

    }
}