Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.aliuge.crawler.extractor.selector; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Queue; import java.util.Set; import java.util.TreeMap; import org.aliuge.crawler.exception.ExtractException; import org.aliuge.crawler.extractor.selector.expression.SimpleExpression; import org.aliuge.crawler.extractor.selector.factory.ElementCssSelectorFactory; import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Queues; import com.google.common.collect.Sets; /** * @author whiteme * @date 20131015 * @desc ?? */ public class IFConditions { /** * ?AND OR = */ private String conditions; public IFConditions(String conditions) { super(); this.conditions = conditions; } /** * ?? */ protected List<AbstractElementCssSelector> selectors = Lists.newArrayList(); /** * ??? */ private Set<String> operations = Sets.newHashSet("=", "!=", ">", "<", ">=", "<="); /** * ? */ private Set<String> cond = Sets.newHashSet(" and ", " or ", " AND ", " OR "); /** * ??? * * @param depend * @return */ public boolean test(Map<String, Object> selectContent) throws ExtractException { TreeMap<Integer, String> conIndex = Maps.newTreeMap(); Queue<SimpleExpression> expressionQueue = Queues.newArrayDeque(); Queue<String> logicQueue = Queues.newArrayDeque(); // a=b and c=d or c=e or x=y int index = 0; for (String co : cond) { index = 0; while ((index = conditions.indexOf(co, index + 1)) > -1) { int i = index; conIndex.put(i, co); } } index = 0; for (Entry<Integer, String> entry : conIndex.entrySet()) { String subExp = conditions.substring(index, entry.getKey()); for (String op : operations) { int i = subExp.indexOf(op); if (i > -1) { String[] ss = subExp.split(op); if (null == selectContent.get(ss[0].trim())) { throw new ExtractException("?????[" + this.conditions + "] " + ss[0]); } expressionQueue .add(new SimpleExpression(StringUtils.trim((String) selectContent.get(ss[0].trim())), StringUtils.trim(ss[1]), op)); logicQueue.add(StringUtils.trim(entry.getValue())); } } index = entry.getKey() + entry.getValue().length(); } // ?? String subExp = conditions.substring(index); for (String op : operations) { int i = subExp.indexOf(op); if (i > -1) { String[] ss = subExp.split(op); if (null == selectContent.get(ss[0].trim())) { throw new ExtractException("?????[" + this.conditions + "] " + ss[0]); } expressionQueue.add(new SimpleExpression(StringUtils.trim((String) selectContent.get(ss[0].trim())), StringUtils.trim(ss[1]), op)); } } boolean b; try { b = expressionQueue.poll().test(); while (!expressionQueue.isEmpty()) { b = cacl(b, logicQueue.poll(), expressionQueue.poll()); } return b; } catch (Exception e) { e.printStackTrace(); } return false; } /** * ? * @return */ private boolean cacl(boolean left, String logic, SimpleExpression right) { try { if ("and".equals(logic.toLowerCase())) { return left && right.test(); } else { return left || right.test(); } } catch (Exception e) { e.printStackTrace(); } return false; } /** * ?? * @return */ @SuppressWarnings("unchecked") public Map<String, Object> getContent(Document document) throws ExtractException { if (null != selectors && selectors.size() > 0) { Map<String, Object> content = Maps.newHashMap(); for (AbstractElementCssSelector<?> selector : selectors) { if (selector instanceof FileElementCssSelector) { Map m = ((FileElementCssSelector) selector).setResult(content).setDocument(document) .getContentMap(); if (null == m || m.size() > 0 && selector.isRequired()) return null; content.putAll(m); } else { Map m = selector.setDocument(document).getContentMap(); if (null == m || m.size() > 0 && selector.isRequired()) return null; content.putAll(m); } } return content; } return Maps.newHashMap(); } public List<AbstractElementCssSelector> getSelectors() { return selectors; } public void setSelectors(List<AbstractElementCssSelector> selectors) { this.selectors = selectors; } public void addSelector(AbstractElementCssSelector selector) { this.selectors.add(selector); } public String getConditions() { return conditions; } public void setConditions(String conditions) { this.conditions = conditions; } public static void main(String[] args) { // String exp = "a= sd ea and c= c bc d and c=e and x=y"; // // IFConditions ic = new IFConditions(exp); // try { // Map<String, Object> map = Maps.newHashMap(); // map.put("a", "sd"); // map.put("c", "c bc d"); //// map.put("x", "y"); // System.out.println(ic.test(map)); // } catch (Exception e) { // e.printStackTrace(); // } String exp = "category= or category="; IFConditions ic = new IFConditions(exp); try { Map<String, Object> map = Maps.newHashMap(); map.put("category", ""); System.out.println(ic.test(map)); } catch (Exception e) { e.printStackTrace(); } } //////////////////////////////////////////////////////////////////// /// IFC /////////////////////////////////////////////////////////////////// /** * ?<b>elementIf?</br> * ?? * @param document * @return */ public static IFConditions create(Element element) { if (element != null) { String exp = element.attr("test"); IFConditions iFconditions = new IFConditions(exp); Elements selectElements = element.children(); for (Element e : selectElements) { if (e.tagName().equals("element")) { iFconditions.addSelector(ElementCssSelectorFactory.create(e)); } } return iFconditions; } return null; } }