com.aurel.track.lucene.index.associatedFields.LuceneFileExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.aurel.track.lucene.index.associatedFields.LuceneFileExtractor.java

Source

/**
 * Genji Scrum Tool and Issue Tracker
 * Copyright (C) 2015 Steinbeis GmbH & Co. KG Task Management Solutions
    
 * <a href="http://www.trackplus.com">Genji Scrum Tool</a>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

/* $Id:$ */

package com.aurel.track.lucene.index.associatedFields;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.Reader;

import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 *
 * @author  Brian Wing Shun Chan
 * @version $Revision: 6385 $
 *
 */
public class LuceneFileExtractor {

    private static final Log LOGGER = LogFactory.getLog(LuceneFileExtractor.class);

    public static class INDEXABLE_EXTENSIONS {
        //MS Word
        public static String DOC = ".doc";
        public static String DOCX = ".docx";
        public static String HTM = ".htm";
        public static String HTML = ".html";
        public static String PDF = ".pdf";
        //rich text format
        public static String RTF = ".rtf";
        //MS Excel
        public static String XLS = ".xls";
        public static String XLSX = ".xlsx";
        public static String TXT = ".txt";
        public static String PROPERTIES = ".properties";
        //general XML
        public static String XML = ".xml";
        //open office text
        //OpenDocument Text
        public static String ODT = ".odt";
        //StarWriter

        //open office sheet
        //OpenDocument Table
        public static String ODS = ".ods";
        //StarCalc
        //open office impress
        //OpenDocument presentation
        public static String ODP = ".odp";
        //StarImpress
    }

    private static String[] STRINGEXTENSIONS = new String[] { INDEXABLE_EXTENSIONS.DOC, INDEXABLE_EXTENSIONS.DOCX,
            INDEXABLE_EXTENSIONS.HTM, INDEXABLE_EXTENSIONS.HTML, INDEXABLE_EXTENSIONS.PDF, INDEXABLE_EXTENSIONS.RTF,
            INDEXABLE_EXTENSIONS.XLS, INDEXABLE_EXTENSIONS.XLSX, INDEXABLE_EXTENSIONS.XML, INDEXABLE_EXTENSIONS.ODT,
            //INDEXABLE_EXTENSIONS.SDV,
            INDEXABLE_EXTENSIONS.ODS,
            //INDEXABLE_EXTENSIONS.SDC,
            INDEXABLE_EXTENSIONS.ODP,
            //INDEXABLE_EXTENSIONS.SDD,
    };

    private static String[] READEREXTENSIONS = new String[] { INDEXABLE_EXTENSIONS.TXT,
            INDEXABLE_EXTENSIONS.PROPERTIES };

    /**
     * Gets the extension if it is recognized 
     * @param fileName
     * @return
     */
    public static String getExtension(String fileName) {
        if (fileName == null) {
            return "";
        }
        int extensionIndex = fileName.lastIndexOf(".");
        if (extensionIndex <= 0) {
            return "";
        }
        return fileName.substring(extensionIndex);
    }

    public static boolean isStringExtension(String extension) {
        for (int i = 0; i < STRINGEXTENSIONS.length; i++) {
            if (STRINGEXTENSIONS[i].equalsIgnoreCase(extension)) {
                return true;
            }
        }
        return false;
    }

    public static boolean isReaderExtension(String extension) {
        for (int i = 0; i < READEREXTENSIONS.length; i++) {
            if (READEREXTENSIONS[i].equalsIgnoreCase(extension)) {
                return true;
            }
        }
        return false;
    }

    public static Reader getReader(File file) {
        Reader reader = null;
        if (file == null || !file.exists()) {
            return null;
        }
        try {
            reader = new FileReader(file);
        } catch (FileNotFoundException e) {
            LOGGER.info("File " + file.getName() + " not found. " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
        }
        reader = new BufferedReader(reader);
        return reader;
    }

}