com.jaeksoft.searchlib.crawler.web.database.pattern.PatternManager.java Source code

Java tutorial

Introduction

Here is the source code for com.jaeksoft.searchlib.crawler.web.database.pattern.PatternManager.java

Source

/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2008-2014 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see <http://www.gnu.org/licenses/>.
 **/

package com.jaeksoft.searchlib.crawler.web.database.pattern;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.crawler.web.database.pattern.PatternItem.Status;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.IOUtils;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;

public class PatternManager {

    final private ReadWriteLock rwl = new ReadWriteLock();

    private final Set<String> patternSet;

    private final File patternFile;

    private PatternListMatcher patternListMatcher;

    public PatternManager(File indexDir, String filename) throws SearchLibException {
        patternFile = new File(indexDir, filename);
        patternSet = new TreeSet<String>();
        patternListMatcher = null;
        try {
            load();
        } catch (ParserConfigurationException e) {
            throw new SearchLibException(e);
        } catch (SAXException e) {
            throw new SearchLibException(e);
        } catch (IOException e) {
            throw new SearchLibException(e);
        } catch (XPathExpressionException e) {
            throw new SearchLibException(e);
        } catch (URISyntaxException e) {
            throw new SearchLibException(e);
        }
    }

    private void load() throws ParserConfigurationException, SAXException, IOException, XPathExpressionException,
            SearchLibException, URISyntaxException {
        if (!patternFile.exists())
            return;
        XPathParser xpp = new XPathParser(patternFile);
        NodeList nodeList = xpp.getNodeList("/patterns/pattern");
        int l = nodeList.getLength();
        List<PatternItem> patternList = new ArrayList<PatternItem>(l);
        for (int i = 0; i < l; i++)
            patternList.add(new PatternItem(DomUtils.getText(nodeList.item(i))));
        addListWithoutStoreAndLock(patternList, true);
    }

    private void store() throws IOException, TransformerConfigurationException, SAXException {
        if (!patternFile.exists())
            patternFile.createNewFile();
        PrintWriter pw = new PrintWriter(patternFile);
        try {
            XmlWriter xmlWriter = new XmlWriter(pw, "UTF-8");
            xmlWriter.startElement("patterns");
            for (String pattern : patternSet) {
                xmlWriter.startElement("pattern");
                xmlWriter.textNode(pattern);
                xmlWriter.endElement();
            }
            xmlWriter.endElement();
            xmlWriter.endDocument();
        } finally {
            pw.close();
        }
    }

    private void addListWithoutStoreAndLock(List<PatternItem> patternList, boolean bDeleteAll)
            throws SearchLibException, MalformedURLException, URISyntaxException {
        patternListMatcher = null;
        if (bDeleteAll)
            patternSet.clear();
        if (patternList == null)
            return;
        for (PatternItem item : patternList)
            addPatternWithoutLock(item);
    }

    public void addList(List<PatternItem> patternList, boolean bDeleteAll) throws SearchLibException {
        rwl.w.lock();
        try {
            addListWithoutStoreAndLock(patternList, bDeleteAll);
            store();
        } catch (IOException e) {
            throw new SearchLibException(e);
        } catch (TransformerConfigurationException e) {
            throw new SearchLibException(e);
        } catch (SAXException e) {
            throw new SearchLibException(e);
        } catch (URISyntaxException e) {
            throw new SearchLibException(e);
        } finally {
            rwl.w.unlock();
        }
    }

    private int delPatternWithoutLock(String sPattern) throws MalformedURLException, URISyntaxException {
        if (sPattern == null)
            return 0;
        sPattern = sPattern.trim();
        if (!patternSet.remove(sPattern))
            return 0;
        patternListMatcher = null;
        return 1;
    }

    public int delPattern(Collection<String> patterns) throws SearchLibException {
        rwl.w.lock();
        try {
            int count = 0;
            for (String pattern : patterns)
                count += delPatternWithoutLock(pattern);
            store();
            return count;
        } catch (MalformedURLException e) {
            throw new SearchLibException(e);
        } catch (TransformerConfigurationException e) {
            throw new SearchLibException(e);
        } catch (IOException e) {
            throw new SearchLibException(e);
        } catch (SAXException e) {
            throw new SearchLibException(e);
        } catch (URISyntaxException e) {
            throw new SearchLibException(e);
        } finally {
            rwl.w.unlock();
        }
    }

    public void delPatternItem(Collection<String> patterns) throws SearchLibException {
        rwl.w.lock();
        try {
            for (String pattern : patterns)
                delPatternWithoutLock(pattern);
            store();
        } catch (MalformedURLException e) {
            throw new SearchLibException(e);
        } catch (TransformerConfigurationException e) {
            throw new SearchLibException(e);
        } catch (IOException e) {
            throw new SearchLibException(e);
        } catch (SAXException e) {
            throw new SearchLibException(e);
        } catch (URISyntaxException e) {
            throw new SearchLibException(e);
        } finally {
            rwl.w.unlock();
        }

    }

    private void addPatternWithoutLock(PatternItem patternItem) throws MalformedURLException, URISyntaxException {
        PatternMatcher matcher = patternItem.getMatcher();
        if (matcher == null) {
            patternItem.setStatus(Status.ERROR);
            return;
        }
        if (patternSet.add(matcher.sPattern)) {
            patternListMatcher = null;
            patternItem.setStatus(Status.INJECTED);
        } else
            patternItem.setStatus(Status.ALREADY);
    }

    public void addPattern(PatternItem patternItem) throws SearchLibException {
        rwl.w.lock();
        try {
            addPatternWithoutLock(patternItem);
            store();
        } catch (TransformerConfigurationException e) {
            throw new SearchLibException(e);
        } catch (IOException e) {
            throw new SearchLibException(e);
        } catch (SAXException e) {
            throw new SearchLibException(e);
        } catch (URISyntaxException e) {
            throw new SearchLibException(e);
        } finally {
            rwl.w.unlock();
        }
    }

    public int getPatterns(String startsWith, long start, long rows, List<String> patternList)
            throws SearchLibException {
        rwl.r.lock();
        try {
            if (StringUtils.isEmpty(startsWith))
                startsWith = null;
            long end = start + rows;
            int pos = 0;
            int total = 0;
            for (String pattern : patternSet) {
                if (startsWith != null) {
                    if (!pattern.startsWith(startsWith)) {
                        pos++;
                        continue;
                    }
                }
                if (rows == 0 || pos < end) {
                    if (pos >= start)
                        patternList.add(pattern);
                }
                total++;
                pos++;
            }
            return total;
        } finally {
            rwl.r.unlock();
        }
    }

    public PatternListMatcher getPatternListMatcher() {
        rwl.r.lock();
        try {
            if (patternListMatcher != null)
                return patternListMatcher;
        } finally {
            rwl.r.unlock();
        }
        rwl.w.lock();
        try {
            if (patternListMatcher != null)
                return patternListMatcher;
            patternListMatcher = new PatternListMatcher(patternSet);
            return patternListMatcher;
        } finally {
            rwl.w.unlock();
        }
    }

    public int getPatterns(String startsWith, List<String> patternList) throws SearchLibException {
        rwl.r.lock();
        try {
            if (StringUtils.isEmpty(startsWith))
                startsWith = null;
            int total = 0;
            for (String pattern : patternSet) {
                if (startsWith != null)
                    if (!pattern.startsWith(startsWith))
                        continue;
                patternList.add(pattern);
                total++;
            }
            return total;
        } finally {
            rwl.r.unlock();
        }
    }

    final private static void addLine(List<PatternItem> list, String pattern) {
        pattern = pattern.trim();
        if (pattern.length() == 0)
            return;
        if (pattern.indexOf(':') == -1)
            pattern = "http://" + pattern;
        PatternItem item = new PatternItem();
        item.setPattern(pattern);
        list.add(item);
    }

    final private static void addLines(List<PatternItem> list, String lines) throws IOException {
        if (lines == null)
            return;
        StringReader sr = null;
        BufferedReader br = null;
        try {
            sr = new StringReader(lines);
            br = new BufferedReader(sr);
            String line;
            while ((line = br.readLine()) != null)
                addLine(list, line);
        } finally {
            IOUtils.close(br, sr);
        }
    }

    public static List<PatternItem> getPatternList(String pattern) throws IOException {
        List<PatternItem> patternList = new ArrayList<PatternItem>(0);
        addLines(patternList, pattern);
        return patternList;
    }

    public static List<PatternItem> getPatternList(List<String> patterns) throws IOException {
        List<PatternItem> patternList = new ArrayList<PatternItem>(0);
        if (patterns != null)
            for (String sPattern : patterns)
                addLines(patternList, sPattern);
        return patternList;
    }

    public static List<PatternItem> getPatternList(BufferedReader reader) throws IOException {
        List<PatternItem> patternList = new ArrayList<PatternItem>();
        String line;
        while ((line = reader.readLine()) != null)
            addLine(patternList, line);
        return patternList;
    }

    public static String getStringPatternList(List<PatternItem> patternList) {
        StringWriter sw = null;
        PrintWriter pw = null;
        try {
            sw = new StringWriter();
            pw = new PrintWriter(sw);
            for (PatternItem item : patternList)
                pw.println(item.getPattern());
            return sw.toString();
        } finally {
            if (pw != null)
                IOUtils.closeQuietly(pw);
            if (sw != null)
                IOUtils.closeQuietly(sw);
        }
    }

    public static final int countStatus(List<PatternItem> patternList, PatternItem.Status status) {
        if (patternList == null)
            return 0;
        int count = 0;
        for (PatternItem patternItem : patternList)
            if (patternItem.getStatus() == status)
                count++;
        return count;
    }

}