org.aliuge.crawler.extractor.selector.IFConditions.java Source code

Java tutorial

Introduction

Here is the source code for org.aliuge.crawler.extractor.selector.IFConditions.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.aliuge.crawler.extractor.selector;

import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.TreeMap;

import org.aliuge.crawler.exception.ExtractException;
import org.aliuge.crawler.extractor.selector.expression.SimpleExpression;
import org.aliuge.crawler.extractor.selector.factory.ElementCssSelectorFactory;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;

/**
 * @author whiteme
 * @date 20131015
 * @desc ??
 */
public class IFConditions {

    /**
     * ?AND OR = 
     */
    private String conditions;

    public IFConditions(String conditions) {
        super();
        this.conditions = conditions;
    }

    /**
     * ??
     */
    protected List<AbstractElementCssSelector> selectors = Lists.newArrayList();
    /**
     * ???
     */
    private Set<String> operations = Sets.newHashSet("=", "!=", ">", "<", ">=", "<=");
    /**
     * ?
     */
    private Set<String> cond = Sets.newHashSet(" and ", " or ", " AND ", " OR ");

    /**
     * ???
     * 
     * @param depend
     * @return
     */
    public boolean test(Map<String, Object> selectContent) throws ExtractException {
        TreeMap<Integer, String> conIndex = Maps.newTreeMap();
        Queue<SimpleExpression> expressionQueue = Queues.newArrayDeque();
        Queue<String> logicQueue = Queues.newArrayDeque();
        // a=b and c=d or c=e or x=y
        int index = 0;
        for (String co : cond) {
            index = 0;
            while ((index = conditions.indexOf(co, index + 1)) > -1) {
                int i = index;
                conIndex.put(i, co);
            }
        }
        index = 0;
        for (Entry<Integer, String> entry : conIndex.entrySet()) {
            String subExp = conditions.substring(index, entry.getKey());
            for (String op : operations) {
                int i = subExp.indexOf(op);
                if (i > -1) {
                    String[] ss = subExp.split(op);
                    if (null == selectContent.get(ss[0].trim())) {
                        throw new ExtractException("?????["
                                + this.conditions + "] " + ss[0]);
                    }
                    expressionQueue
                            .add(new SimpleExpression(StringUtils.trim((String) selectContent.get(ss[0].trim())),
                                    StringUtils.trim(ss[1]), op));
                    logicQueue.add(StringUtils.trim(entry.getValue()));
                }
            }
            index = entry.getKey() + entry.getValue().length();
        }
        // ??
        String subExp = conditions.substring(index);
        for (String op : operations) {
            int i = subExp.indexOf(op);
            if (i > -1) {
                String[] ss = subExp.split(op);
                if (null == selectContent.get(ss[0].trim())) {
                    throw new ExtractException("?????[" + this.conditions
                            + "] " + ss[0]);
                }
                expressionQueue.add(new SimpleExpression(StringUtils.trim((String) selectContent.get(ss[0].trim())),
                        StringUtils.trim(ss[1]), op));
            }
        }
        boolean b;
        try {
            b = expressionQueue.poll().test();
            while (!expressionQueue.isEmpty()) {
                b = cacl(b, logicQueue.poll(), expressionQueue.poll());
            }
            return b;
        } catch (Exception e) {
            e.printStackTrace();
        }

        return false;
    }

    /**
     * ?
     * @return
     */
    private boolean cacl(boolean left, String logic, SimpleExpression right) {
        try {
            if ("and".equals(logic.toLowerCase())) {
                return left && right.test();
            } else {
                return left || right.test();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * ??
     * @return
     */
    @SuppressWarnings("unchecked")
    public Map<String, Object> getContent(Document document) throws ExtractException {
        if (null != selectors && selectors.size() > 0) {
            Map<String, Object> content = Maps.newHashMap();
            for (AbstractElementCssSelector<?> selector : selectors) {
                if (selector instanceof FileElementCssSelector) {
                    Map m = ((FileElementCssSelector) selector).setResult(content).setDocument(document)
                            .getContentMap();
                    if (null == m || m.size() > 0 && selector.isRequired())
                        return null;
                    content.putAll(m);
                } else {
                    Map m = selector.setDocument(document).getContentMap();
                    if (null == m || m.size() > 0 && selector.isRequired())
                        return null;
                    content.putAll(m);
                }
            }
            return content;
        }
        return Maps.newHashMap();
    }

    public List<AbstractElementCssSelector> getSelectors() {
        return selectors;
    }

    public void setSelectors(List<AbstractElementCssSelector> selectors) {
        this.selectors = selectors;
    }

    public void addSelector(AbstractElementCssSelector selector) {
        this.selectors.add(selector);
    }

    public String getConditions() {
        return conditions;
    }

    public void setConditions(String conditions) {
        this.conditions = conditions;
    }

    public static void main(String[] args) {
        //      String exp = "a= sd ea and  c= c bc d  and c=e and x=y";
        //      
        //      IFConditions ic = new IFConditions(exp);
        //      try {
        //         Map<String, Object> map = Maps.newHashMap();
        //         map.put("a", "sd");
        //         map.put("c", "c bc d");
        ////         map.put("x", "y");
        //         System.out.println(ic.test(map));
        //      } catch (Exception e) {
        //         e.printStackTrace();
        //      }

        String exp = "category= or category=";
        IFConditions ic = new IFConditions(exp);
        try {
            Map<String, Object> map = Maps.newHashMap();
            map.put("category", "");
            System.out.println(ic.test(map));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    ////////////////////////////////////////////////////////////////////
    ///         IFC 
    ///////////////////////////////////////////////////////////////////
    /**
     * ?<b>elementIf?</br>
     * ??
     * @param document
     * @return
     */
    public static IFConditions create(Element element) {
        if (element != null) {
            String exp = element.attr("test");
            IFConditions iFconditions = new IFConditions(exp);
            Elements selectElements = element.children();
            for (Element e : selectElements) {
                if (e.tagName().equals("element")) {
                    iFconditions.addSelector(ElementCssSelectorFactory.create(e));
                }
            }
            return iFconditions;
        }
        return null;
    }
}