Find a pattern within a file : Text Read Write « File Input Output « Java






Find a pattern within a file

      

/*
 * @(#)Grep.java  1.3 01/12/13
 * Search a list of files for lines that match a given regular-expression
 * pattern.  Demonstrates NIO mapped byte buffers, charsets, and regular
 * expressions.
 *
 * Copyright 2001-2002 Sun Microsystems, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or 
 * without modification, are permitted provided that the following 
 * conditions are met:
 * 
 * -Redistributions of source code must retain the above copyright  
 * notice, this  list of conditions and the following disclaimer.
 * 
 * -Redistribution in binary form must reproduct the above copyright 
 * notice, this list of conditions and the following disclaimer in 
 * the documentation and/or other materials provided with the 
 * distribution.
 * 
 * Neither the name of Sun Microsystems, Inc. or the names of 
 * contributors may be used to endorse or promote products derived 
 * from this software without specific prior written permission.
 * 
 * This software is provided "AS IS," without a warranty of any 
 * kind. ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND 
 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY 
 * EXCLUDED. SUN AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY 
 * DAMAGES OR LIABILITIES  SUFFERED BY LICENSEE AS A RESULT OF  OR 
 * RELATING TO USE, MODIFICATION OR DISTRIBUTION OF THE SOFTWARE OR 
 * ITS DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE 
 * FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, 
 * SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER 
 * CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF 
 * THE USE OF OR INABILITY TO USE SOFTWARE, EVEN IF SUN HAS BEEN 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 * 
 * You acknowledge that Software is not designed, licensed or 
 * intended for use in the design, construction, operation or 
 * maintenance of any nuclear facility. 
 */

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * This is a utility class to find a pattern within a file, specifically the
 * sense.idx file.
 * 
 * @author brett
 * 
 */
public class Grep {

  /**
   * Set the character set for the file.
   */
  private static Charset charset = Charset.forName("ISO-8859-15");

  /**
   * The decoder for the file.
   */
  private static CharsetDecoder decoder = charset.newDecoder();

  /**
   * Line parsing pattern.
   */
  private static Pattern linePattern = Pattern.compile(".*\r?\n");

  /**
   * Input pattern we're looking for.
   */
  private static Pattern pattern;

  /**
   * The character buffer reference.
   */
  private static CharBuffer indexFile;

  /**
   * Compiles the pattern.
   * 
   * @param pat
   *          regex
   */
  private static void compile(String pat) {
    try {
      pattern = Pattern.compile(pat);
    } catch (PatternSyntaxException x) {
      System.err.println(x.getMessage());
    }
  }

  /**
   * Use the linePattern to break the given CharBuffer into lines, applying the
   * input pattern to each line to see if we have a match
   */
  private static List grep() {
    List matches = new ArrayList();
    Matcher lm = linePattern.matcher(indexFile); // Line matcher
    Matcher pm = null; // Pattern matcher
    int lines = 0;
    while (lm.find()) {
      lines++;
      CharSequence cs = lm.group(); // The current line
      if (pm == null)
        pm = pattern.matcher(cs);
      else
        pm.reset(cs);
      if (pm.find()) {
        matches.add(cs.toString());
      }
      if (lm.end() == indexFile.limit())
        break;
    }
    return matches;
  }

  public static void setFile(File f) throws IOException {
    FileInputStream fis = new FileInputStream(f);
    FileChannel fc = fis.getChannel();

    // Get the file's size and then map it into memory
    int sz = (int) fc.size();
    MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
    // Decode the file into a char buffer
    indexFile = decoder.decode(bb);

    fc.close();
  }

  /**
   * Search for occurrences in the given file of the offset, then find the
   * appropriate lemma.
   * 
   * @param f
   * @param synsetOffset
   * @return
   * @throws IOException
   */
  public static List grep(String synsetOffset) throws IOException {

    compile(synsetOffset);

    // Perform the search
    List matches = grep();

    return matches;
  }

  /**
   * Search for occurrences in the given file of the offset, then find the
   * appropriate lemma.
   * 
   * @param f
   * @param synsetOffset
   * @return
   * @throws IOException
   */
  public static String grep(String synsetOffset, String lemma) throws IOException {

    compile(synsetOffset);
    String m = "";
    // Perform the search
    List matches = grep();
    for (int i = 0; i < matches.size(); i++) {
      String match = (String) matches.get(i);
      if (match.indexOf(lemma) != -1) {
        m = match;
      }
    }

    return m;
  }

}

   
    
    
    
    
    
  








Related examples in the same category

1.Data Text Writer
2.Load File As Text
3.Load file content to List
4.Load file line by line
5.Java File Generator
6.Writing delimited text data to a file or a stream
7.Searches case sensitively in a file
8.To Hex String and char
9.CRLF Print Writer
10.CRLF Terminated Reader
11.Gets the content from a File as String Array List
12.Dump a String to a text file with encoding.
13.Load a text file contents as a String.
14.An iterator that breaks text into lines. The result is equal to BufferedReader.readLine().
15.Compare text file line by lineCompare text file line by line
16.Read and return the entire contents of the supplied File.
17.Allows reading and writing to a plain text file via a list of lines.Allows reading and writing to a plain text file via a list of lines.
18.Text File Handler