Find a pattern within a file
/*
* @(#)Grep.java 1.3 01/12/13
* Search a list of files for lines that match a given regular-expression
* pattern. Demonstrates NIO mapped byte buffers, charsets, and regular
* expressions.
*
* Copyright 2001-2002 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* -Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* -Redistribution in binary form must reproduct the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* Neither the name of Sun Microsystems, Inc. or the names of
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* This software is provided "AS IS," without a warranty of any
* kind. ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND
* WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY
* EXCLUDED. SUN AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY
* DAMAGES OR LIABILITIES SUFFERED BY LICENSEE AS A RESULT OF OR
* RELATING TO USE, MODIFICATION OR DISTRIBUTION OF THE SOFTWARE OR
* ITS DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE
* FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT,
* SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER
* CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF
* THE USE OF OR INABILITY TO USE SOFTWARE, EVEN IF SUN HAS BEEN
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*
* You acknowledge that Software is not designed, licensed or
* intended for use in the design, construction, operation or
* maintenance of any nuclear facility.
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* This is a utility class to find a pattern within a file, specifically the
* sense.idx file.
*
* @author brett
*
*/
public class Grep {
/**
* Set the character set for the file.
*/
private static Charset charset = Charset.forName("ISO-8859-15");
/**
* The decoder for the file.
*/
private static CharsetDecoder decoder = charset.newDecoder();
/**
* Line parsing pattern.
*/
private static Pattern linePattern = Pattern.compile(".*\r?\n");
/**
* Input pattern we're looking for.
*/
private static Pattern pattern;
/**
* The character buffer reference.
*/
private static CharBuffer indexFile;
/**
* Compiles the pattern.
*
* @param pat
* regex
*/
private static void compile(String pat) {
try {
pattern = Pattern.compile(pat);
} catch (PatternSyntaxException x) {
System.err.println(x.getMessage());
}
}
/**
* Use the linePattern to break the given CharBuffer into lines, applying the
* input pattern to each line to see if we have a match
*/
private static List grep() {
List matches = new ArrayList();
Matcher lm = linePattern.matcher(indexFile); // Line matcher
Matcher pm = null; // Pattern matcher
int lines = 0;
while (lm.find()) {
lines++;
CharSequence cs = lm.group(); // The current line
if (pm == null)
pm = pattern.matcher(cs);
else
pm.reset(cs);
if (pm.find()) {
matches.add(cs.toString());
}
if (lm.end() == indexFile.limit())
break;
}
return matches;
}
public static void setFile(File f) throws IOException {
FileInputStream fis = new FileInputStream(f);
FileChannel fc = fis.getChannel();
// Get the file's size and then map it into memory
int sz = (int) fc.size();
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
// Decode the file into a char buffer
indexFile = decoder.decode(bb);
fc.close();
}
/**
* Search for occurrences in the given file of the offset, then find the
* appropriate lemma.
*
* @param f
* @param synsetOffset
* @return
* @throws IOException
*/
public static List grep(String synsetOffset) throws IOException {
compile(synsetOffset);
// Perform the search
List matches = grep();
return matches;
}
/**
* Search for occurrences in the given file of the offset, then find the
* appropriate lemma.
*
* @param f
* @param synsetOffset
* @return
* @throws IOException
*/
public static String grep(String synsetOffset, String lemma) throws IOException {
compile(synsetOffset);
String m = "";
// Perform the search
List matches = grep();
for (int i = 0; i < matches.size(); i++) {
String match = (String) matches.get(i);
if (match.indexOf(lemma) != -1) {
m = match;
}
}
return m;
}
}
Related examples in the same category