it.drwolf.ridire.util.fixingpos.AsyncPosFixer.java Source code

Java tutorial

Introduction

Here is the source code for it.drwolf.ridire.util.fixingpos.AsyncPosFixer.java

Source

/*******************************************************************************
 * Copyright 2013 Universit degli Studi di Firenze
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package it.drwolf.ridire.util.fixingpos;

import it.drwolf.ridire.entity.CommandParameter;
import it.drwolf.ridire.entity.CrawledResource;
import it.drwolf.ridire.index.cwb.scripts.VRTFilesBuilder;
import it.drwolf.ridire.session.async.JobMapperMonitor;
import it.drwolf.ridire.session.async.WordCounter;
import it.drwolf.ridire.utility.RIDIREReTagger;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;

import javax.persistence.EntityManager;
import javax.transaction.HeuristicMixedException;
import javax.transaction.HeuristicRollbackException;
import javax.transaction.NotSupportedException;
import javax.transaction.RollbackException;
import javax.transaction.SystemException;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.text.StrTokenizer;
import org.jboss.seam.Component;
import org.jboss.seam.annotations.In;
import org.jboss.seam.annotations.Name;
import org.jboss.seam.annotations.async.Asynchronous;
import org.jboss.seam.transaction.UserTransaction;

@Name("asyncPosFixer")
public class AsyncPosFixer implements Serializable {

    /**
     * 
     */
    private static final long serialVersionUID = 3068568850090727817L;
    private EntityManager entityManager;
    private UserTransaction userTx;
    private RIDIREReTagger ridireReTagger;

    @In(create = true)
    private VRTFilesBuilder vrtFilesBuilder;

    @In(create = true)
    private WordCounter wordCounter;

    @SuppressWarnings("unchecked")
    @Asynchronous
    public void doAsyncFix(PosFixerData posFixerData) {
        StrTokenizer strTokenizer = new StrTokenizer("\t");
        File destDir = new File(posFixerData.getDestDir());
        File reverseDestDir = new File(posFixerData.getReverseDestDir());
        if (!destDir.exists() || !destDir.isDirectory() || !reverseDestDir.exists()
                || !reverseDestDir.isDirectory()) {
            System.err.println("Not valid destination folder.");
            return;
        }
        this.ridireReTagger = new RIDIREReTagger(null);
        try {
            this.entityManager = (EntityManager) Component.getInstance("entityManager");
            this.userTx = (UserTransaction) org.jboss.seam.Component
                    .getInstance("org.jboss.seam.transaction.transaction");
            this.userTx.setTransactionTimeout(1000 * 10 * 60);
            if (!this.userTx.isActive()) {
                this.userTx.begin();
            }
            this.entityManager.joinTransaction();
            String treeTaggerBin = this.entityManager
                    .find(CommandParameter.class, CommandParameter.TREETAGGER_EXECUTABLE_KEY).getCommandValue();
            this.ridireReTagger.setTreetaggerBin(treeTaggerBin);
            this.entityManager.flush();
            this.entityManager.clear();
            this.userTx.commit();
            List<String> lines = FileUtils.readLines(new File(posFixerData.getFile()));
            int count = 0;
            for (String l : lines) {
                if (l == null || l.trim().length() < 1) {
                    continue;
                }
                String digest = l.replaceAll("\\./", "").replaceAll("\\.vrt", "");
                if (!this.userTx.isActive()) {
                    this.userTx.begin();
                }
                this.entityManager.joinTransaction();
                List<CrawledResource> crs = this.entityManager
                        .createQuery("from CrawledResource cr where cr.digest=:digest")
                        .setParameter("digest", digest).getResultList();
                if (crs.size() != 1) {
                    System.err.println("PosFixer: " + l + " resource not found.");
                } else {
                    CrawledResource cr = crs.get(0);
                    String origFile = FilenameUtils.getFullPath(cr.getArcFile())
                            .concat(JobMapperMonitor.RESOURCESDIR).concat(digest.concat(".txt"));
                    File toBeRetagged = new File(origFile);
                    if (toBeRetagged.exists() && toBeRetagged.canRead()) {
                        String retaggedFile = this.ridireReTagger.retagFile(toBeRetagged);
                        int wordsNumber = this.wordCounter.countWordsFromPoSTagResource(new File(retaggedFile));
                        cr.setWordsNumber(wordsNumber);
                        this.entityManager.persist(cr);
                        this.vrtFilesBuilder.createVRTFile(retaggedFile, strTokenizer, cr, destDir);
                        String vrtFileName = destDir + System.getProperty("file.separator") + digest + ".vrt";
                        File vrtFile = new File(vrtFileName);
                        this.vrtFilesBuilder.reverseFile(reverseDestDir, vrtFile);
                    }
                }
                this.userTx.commit();
                System.out.println(" Processed " + (++count) + " of " + lines.size());
            }
        } catch (SystemException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (NotSupportedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (SecurityException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IllegalStateException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (RollbackException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (HeuristicMixedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (HeuristicRollbackException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            try {
                if (this.userTx != null && this.userTx.isActive()) {
                    this.userTx.rollback();
                }
            } catch (IllegalStateException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            } catch (SecurityException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            } catch (SystemException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
        }
    }
}