org.apdplat.extractor.html.ExtractFunctionExecutor.java Source code

Java tutorial

Introduction

Here is the source code for org.apdplat.extractor.html.ExtractFunctionExecutor.java

Source

/**
 * 
 * APDPlat - Application Product Development Platform
 * Copyright (c) 2013, ??, yang-shangchuan@qq.com
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 */

package org.apdplat.extractor.html;

import org.apache.commons.lang.StringUtils;
import org.apdplat.extractor.html.model.CssPath;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * ???? 
 * 1?deleteChild(div.ep-source) 
 * 2?removeText()
 * 3?substring(0,19) ????+(+??+)
 *
 * @author ??
 *
 */
public class ExtractFunctionExecutor {
    public static final Logger LOGGER = LoggerFactory.getLogger(ExtractFunctionExecutor.class);

    /**
     * ?
     *
     * @param text CSS??
     * @param doc 
     * @param cssPath CSS
     * @param parseExpression ?
     * @return ???
     */
    public static String execute(String text, Document doc, CssPath cssPath, String parseExpression) {
        if (parseExpression.startsWith("deleteChild")) {
            return executeDeleteChild(text, doc, cssPath, parseExpression);
        }
        if (parseExpression.startsWith("removeText")) {
            return executeRemoveText(text, parseExpression);
        }
        if (parseExpression.startsWith("substring")) {
            return executeSubstring(text, parseExpression);
        }

        return null;
    }

    /**
     * ? substring(0,19)
     * ??2?0190?19,?[0 - 19)
     *
     * @param text CSS??
     * @param parseExpression ?
     * @return ???
     */
    public static String executeSubstring(String text, String parseExpression) {
        LOGGER.debug("substring??" + text);
        String parameter = parseExpression.replace("substring(", "");
        parameter = parameter.substring(0, parameter.length() - 1);
        String[] attr = parameter.split(",");
        if (attr != null && attr.length == 2) {
            int beginIndex = Integer.parseInt(attr[0]);
            int endIndex = Integer.parseInt(attr[1]);
            text = text.substring(beginIndex, endIndex);
        }
        LOGGER.debug("substring??" + text);
        return text;
    }

    /**
     *  removeText() ??CSS??
     *
     * @param text CSS??
     * @param parseExpression ?
     * @return ???
     */
    public static String executeRemoveText(String text, String parseExpression) {
        LOGGER.debug("removeText??" + text);
        String parameter = parseExpression.replace("removeText(", "");
        parameter = parameter.substring(0, parameter.length() - 1);
        text = text.replace(parameter, "");
        LOGGER.debug("removeText??" + text);
        return text;
    }

    /**
     * ?CSS deleteChild(div.ep-source)
     * ??CSS?CSS???
     *
     * @param text CSS??
     * @param doc 
     * @param cssPath CSS
     * @param parseExpression ?
     * @return ???
     */
    public static String executeDeleteChild(String text, Document doc, CssPath cssPath, String parseExpression) {
        LOGGER.debug("deleteChild??" + text);
        String parameter = parseExpression.replace("deleteChild(", "");
        parameter = parameter.substring(0, parameter.length() - 1);
        Elements elements = doc.select(cssPath.getCssPath() + " " + parameter);
        for (Element element : elements) {
            String t = element.text();
            if (StringUtils.isNotBlank(t)) {
                LOGGER.debug("deleteChild?" + t);
                text = text.replace(t, "");
            }
        }
        LOGGER.debug("deleteChild??" + text);
        return text;
    }
}