Java tutorial
/** * * APDPlat - Application Product Development Platform * Copyright (c) 2013, ??, yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.extractor.html; import org.apache.commons.lang.StringUtils; import org.apdplat.extractor.html.model.CssPath; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * ???? * 1?deleteChild(div.ep-source) * 2?removeText() * 3?substring(0,19) ????+(+??+) * * @author ?? * */ public class ExtractFunctionExecutor { public static final Logger LOGGER = LoggerFactory.getLogger(ExtractFunctionExecutor.class); /** * ? * * @param text CSS?? * @param doc * @param cssPath CSS * @param parseExpression ? * @return ??? */ public static String execute(String text, Document doc, CssPath cssPath, String parseExpression) { if (parseExpression.startsWith("deleteChild")) { return executeDeleteChild(text, doc, cssPath, parseExpression); } if (parseExpression.startsWith("removeText")) { return executeRemoveText(text, parseExpression); } if (parseExpression.startsWith("substring")) { return executeSubstring(text, parseExpression); } return null; } /** * ? substring(0,19) * ??2?0190?19,?[0 - 19) * * @param text CSS?? * @param parseExpression ? * @return ??? */ public static String executeSubstring(String text, String parseExpression) { LOGGER.debug("substring??" + text); String parameter = parseExpression.replace("substring(", ""); parameter = parameter.substring(0, parameter.length() - 1); String[] attr = parameter.split(","); if (attr != null && attr.length == 2) { int beginIndex = Integer.parseInt(attr[0]); int endIndex = Integer.parseInt(attr[1]); text = text.substring(beginIndex, endIndex); } LOGGER.debug("substring??" + text); return text; } /** * removeText() ??CSS?? * * @param text CSS?? * @param parseExpression ? * @return ??? */ public static String executeRemoveText(String text, String parseExpression) { LOGGER.debug("removeText??" + text); String parameter = parseExpression.replace("removeText(", ""); parameter = parameter.substring(0, parameter.length() - 1); text = text.replace(parameter, ""); LOGGER.debug("removeText??" + text); return text; } /** * ?CSS deleteChild(div.ep-source) * ??CSS?CSS??? * * @param text CSS?? * @param doc * @param cssPath CSS * @param parseExpression ? * @return ??? */ public static String executeDeleteChild(String text, Document doc, CssPath cssPath, String parseExpression) { LOGGER.debug("deleteChild??" + text); String parameter = parseExpression.replace("deleteChild(", ""); parameter = parameter.substring(0, parameter.length() - 1); Elements elements = doc.select(cssPath.getCssPath() + " " + parameter); for (Element element : elements) { String t = element.text(); if (StringUtils.isNotBlank(t)) { LOGGER.debug("deleteChild?" + t); text = text.replace(t, ""); } } LOGGER.debug("deleteChild??" + text); return text; } }