Grep.java Source code

Java tutorial

Introduction

Here is the source code for Grep.java

Source

/*
 * @(#)Grep.java  1.3 01/12/13
 * Search a list of files for lines that match a given regular-expression
 * pattern.  Demonstrates NIO mapped byte buffers, charsets, and regular
 * expressions.
 *
 * Copyright 2001-2002 Sun Microsystems, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or 
 * without modification, are permitted provided that the following 
 * conditions are met:
 * 
 * -Redistributions of source code must retain the above copyright  
 * notice, this  list of conditions and the following disclaimer.
 * 
 * -Redistribution in binary form must reproduct the above copyright 
 * notice, this list of conditions and the following disclaimer in 
 * the documentation and/or other materials provided with the 
 * distribution.
 * 
 * Neither the name of Sun Microsystems, Inc. or the names of 
 * contributors may be used to endorse or promote products derived 
 * from this software without specific prior written permission.
 * 
 * This software is provided "AS IS," without a warranty of any 
 * kind. ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND 
 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY 
 * EXCLUDED. SUN AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY 
 * DAMAGES OR LIABILITIES  SUFFERED BY LICENSEE AS A RESULT OF  OR 
 * RELATING TO USE, MODIFICATION OR DISTRIBUTION OF THE SOFTWARE OR 
 * ITS DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE 
 * FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, 
 * SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER 
 * CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF 
 * THE USE OF OR INABILITY TO USE SOFTWARE, EVEN IF SUN HAS BEEN 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 * 
 * You acknowledge that Software is not designed, licensed or 
 * intended for use in the design, construction, operation or 
 * maintenance of any nuclear facility. 
 */

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * This is a utility class to find a pattern within a file, specifically the
 * sense.idx file.
 * 
 * @author brett
 * 
 */
public class Grep {

    /**
     * Set the character set for the file.
     */
    private static Charset charset = Charset.forName("ISO-8859-15");

    /**
     * The decoder for the file.
     */
    private static CharsetDecoder decoder = charset.newDecoder();

    /**
     * Line parsing pattern.
     */
    private static Pattern linePattern = Pattern.compile(".*\r?\n");

    /**
     * Input pattern we're looking for.
     */
    private static Pattern pattern;

    /**
     * The character buffer reference.
     */
    private static CharBuffer indexFile;

    /**
     * Compiles the pattern.
     * 
     * @param pat
     *          regex
     */
    private static void compile(String pat) {
        try {
            pattern = Pattern.compile(pat);
        } catch (PatternSyntaxException x) {
            System.err.println(x.getMessage());
        }
    }

    /**
     * Use the linePattern to break the given CharBuffer into lines, applying the
     * input pattern to each line to see if we have a match
     */
    private static List grep() {
        List matches = new ArrayList();
        Matcher lm = linePattern.matcher(indexFile); // Line matcher
        Matcher pm = null; // Pattern matcher
        int lines = 0;
        while (lm.find()) {
            lines++;
            CharSequence cs = lm.group(); // The current line
            if (pm == null)
                pm = pattern.matcher(cs);
            else
                pm.reset(cs);
            if (pm.find()) {
                matches.add(cs.toString());
            }
            if (lm.end() == indexFile.limit())
                break;
        }
        return matches;
    }

    public static void setFile(File f) throws IOException {
        FileInputStream fis = new FileInputStream(f);
        FileChannel fc = fis.getChannel();

        // Get the file's size and then map it into memory
        int sz = (int) fc.size();
        MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
        // Decode the file into a char buffer
        indexFile = decoder.decode(bb);

        fc.close();
    }

    /**
     * Search for occurrences in the given file of the offset, then find the
     * appropriate lemma.
     * 
     * @param f
     * @param synsetOffset
     * @return
     * @throws IOException
     */
    public static List grep(String synsetOffset) throws IOException {

        compile(synsetOffset);

        // Perform the search
        List matches = grep();

        return matches;
    }

    /**
     * Search for occurrences in the given file of the offset, then find the
     * appropriate lemma.
     * 
     * @param f
     * @param synsetOffset
     * @return
     * @throws IOException
     */
    public static String grep(String synsetOffset, String lemma) throws IOException {

        compile(synsetOffset);
        String m = "";
        // Perform the search
        List matches = grep();
        for (int i = 0; i < matches.size(); i++) {
            String match = (String) matches.get(i);
            if (match.indexOf(lemma) != -1) {
                m = match;
            }
        }

        return m;
    }

}