Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.path; import java.io.IOException; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.util.AttributeFactory; /** * Tokenizer for path-like hierarchies. * <p> * Take something like: * * <pre> * /something/something/else * </pre> * * and make: * * <pre> * /something * /something/something * /something/something/else * </pre> */ public class PathHierarchyTokenizer extends Tokenizer { public PathHierarchyTokenizer() { this(DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); } public PathHierarchyTokenizer(int skip) { this(DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip); } public PathHierarchyTokenizer(int bufferSize, char delimiter) { this(bufferSize, delimiter, delimiter, DEFAULT_SKIP); } public PathHierarchyTokenizer(char delimiter, char replacement) { this(DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP); } public PathHierarchyTokenizer(char delimiter, char replacement, int skip) { this(DEFAULT_BUFFER_SIZE, delimiter, replacement, skip); } public PathHierarchyTokenizer(AttributeFactory factory, char delimiter, char replacement, int skip) { this(factory, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip); } public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) { this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip); } public PathHierarchyTokenizer(AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) { super(factory); if (bufferSize < 0) { throw new IllegalArgumentException("bufferSize cannot be negative"); } if (skip < 0) { throw new IllegalArgumentException("skip cannot be negative"); } termAtt.resizeBuffer(bufferSize); this.delimiter = delimiter; this.replacement = replacement; this.skip = skip; resultToken = new StringBuilder(bufferSize); } private static final int DEFAULT_BUFFER_SIZE = 1024; public static final char DEFAULT_DELIMITER = '/'; public static final int DEFAULT_SKIP = 0; private final char delimiter; private final char replacement; private final int skip; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); private int startPosition = 0; private int skipped = 0; private boolean endDelimiter = false; private StringBuilder resultToken; private int charsRead = 0; @Override public final boolean incrementToken() throws IOException { clearAttributes(); termAtt.append(resultToken); if (resultToken.length() == 0) { posAtt.setPositionIncrement(1); } else { posAtt.setPositionIncrement(0); } int length = 0; boolean added = false; if (endDelimiter) { termAtt.append(replacement); length++; endDelimiter = false; added = true; } while (true) { int c = input.read(); if (c >= 0) { charsRead++; } else { if (skipped > skip) { length += resultToken.length(); termAtt.setLength(length); offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length)); if (added) { resultToken.setLength(0); resultToken.append(termAtt.buffer(), 0, length); } return added; } else { return false; } } if (!added) { added = true; skipped++; if (skipped > skip) { termAtt.append(c == delimiter ? replacement : (char) c); length++; } else { startPosition++; } } else { if (c == delimiter) { if (skipped > skip) { endDelimiter = true; break; } skipped++; if (skipped > skip) { termAtt.append(replacement); length++; } else { startPosition++; } } else { if (skipped > skip) { termAtt.append((char) c); length++; } else { startPosition++; } } } } length += resultToken.length(); termAtt.setLength(length); offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length)); resultToken.setLength(0); resultToken.append(termAtt.buffer(), 0, length); return true; } @Override public final void end() throws IOException { super.end(); // set final offset int finalOffset = correctOffset(charsRead); offsetAtt.setOffset(finalOffset, finalOffset); } @Override public void reset() throws IOException { super.reset(); resultToken.setLength(0); charsRead = 0; endDelimiter = false; skipped = 0; startPosition = 0; } }