Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text() 

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:net.trustie.model.OpenHubProject_Model.java

private void handleNutShell1(String nutshell) {
    Elements eles = getAElements(nutshell);
    Element ele = null;
    ele = eles.get(0);/*from w w w. j  a  v a2  s  . co m*/
    this.mostWrittenIn = StringHandler.removeHeader(ele.text(), OpenHubProject_Model.mostWrittenHeader).trim();
    if (eles.size() > 1) {
        ele = eles.get(1);
        // System.out.println(ele.text());
        this.commentsPercentage = StringHandler
                .removeTail(ele.text(), OpenHubProject_Model.commentsPercentageTail).trim();
        // System.out.println(this.commentsPercentage);
        this.commentsPercentage = StringHandler.removeIndefiniteArticles(this.commentsPercentage).trim();
        // System.out.println(this.commentsPercentage);
    }
}

From source file:net.trustie.model.OpenHubProject_Model.java

private void handleNutShell2(String nutshell) {
    Elements eles = getAElements(nutshell);
    Element ele = null;
    if (eles.size() > 0) {
        //      ele = eles.get(0);
        //      this.codebaseStatus = ele.text();
        //      ele = eles.get(1);
        //      this.teamScale = ele.text();
        //      ele = eles.get(2);
        //      this.commitStatus = ele.text();
        if (eles.size() == 1) {
            ele = eles.get(0);/* w ww .  j  av  a  2s . c o  m*/
            this.codebaseStatus = ele.text();
        } else if (eles.size() == 2) {
            ele = eles.get(0);
            this.codebaseStatus = ele.text();
            ele = eles.get(1);
            this.teamScale = ele.text();
        } else {
            ele = eles.get(0);
            this.codebaseStatus = ele.text();
            ele = eles.get(1);
            this.teamScale = ele.text();
            ele = eles.get(2);
            this.commitStatus = ele.text();
        }
    }
}

From source file:net.trustie.model.OpenHubProject_Model.java

private void handleNutShell3(String nutshell, Date date) {
    Elements eles = getAElements(nutshell);
    Element ele = null;
    if (eles.size() > 0) {
        if (eles.size() == 1) {
            ele = eles.get(0);/*from w w  w. j  av a  2  s  . c om*/
            this.estimateEffortTime = StringHandler
                    .removeTail(ele.text(), OpenHubProject_Model.estimateEffortTimeTail).trim();
        } else if (eles.size() == 2) {
            ele = eles.get(0);
            this.estimateEffortTime = StringHandler
                    .removeTail(ele.text(), OpenHubProject_Model.estimateEffortTimeTail).trim();
            ele = eles.get(1);
            String firstCommitAt = StringHandler
                    .removeHeader(ele.text(), OpenHubProject_Model.firstCommitTimeHeader).trim();
            ansTmp = handleDateAt(firstCommitAt).toString();
            this.firstCommitTime = handleDateAt(firstCommitAt);
        } else {
            ele = eles.get(0);
            this.estimateEffortTime = StringHandler
                    .removeTail(ele.text(), OpenHubProject_Model.estimateEffortTimeTail).trim();
            ele = eles.get(1);
            String firstCommitAt = StringHandler
                    .removeHeader(ele.text(), OpenHubProject_Model.firstCommitTimeHeader).trim();
            ansTmp = handleDateAt(firstCommitAt).toString();
            this.firstCommitTime = handleDateAt(firstCommitAt);
            ele = eles.get(2);
            String lastCommitAt = StringHandler
                    .removeHeader(ele.text(), OpenHubProject_Model.lastCommitTimeHeader).trim();
            // System.out.println(lastCommitAt);
            lastCommitAt = StringHandler.removePreposition(lastCommitAt);
            // SimpleDateFormat simpleDateFormat = new
            // SimpleDateFormat("yyy-MM-dd HH:mm:ss");            
            //SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyy-MM-dd HH:mm:ss");
            this.lastCommitTime = DateHandler.stringToDate(DateHandler.formatAllTypeDate(lastCommitAt, date));//handleDateBefore(lastCommitAt);
        }
    }
}

From source file:nl.sidn.pcap.ip.GoogleResolverCheck.java

@Override
protected void init() {
    String url = Settings.getInstance().getSetting(Settings.RESOLVER_LIST_GOOGLE);
    LOGGER.info("Load Google resolver addresses from url: " + url);

    Document doc = null;//  w  ww .  java  2  s .c o m
    try {
        doc = Jsoup.connect(url).get();
    } catch (Exception e) {
        LOGGER.error("Problem while getting Google resolvers url: " + url);
        return;
    }

    Elements tags = doc.getElementsByTag("pre");
    if (tags.size() == 2) {
        Element resolvers = tags.get(0);
        //Element resolver = codes.get(0);
        String[] ips = StringUtils.split(resolvers.text(), '\n');
        for (String ip : ips) {
            String[] parts = StringUtils.split(ip, ' ');
            if (parts.length == 2) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Add Google resolver IP range: " + parts[0]);
                }

                try {
                    bit_subnets.add(Subnet.createInstance(parts[0]));
                    subnets.add(parts[0]);
                } catch (UnknownHostException e) {
                    LOGGER.error("Problem while adding Google resolver IP range: " + parts[0] + e);
                }
            }
        }

        if (subnets.size() == 0) {
            LOGGER.error("No Google resolvers found at url: " + url);
        }
    } else {
        LOGGER.error("No Google resolvers found at url: " + url);
    }
}

From source file:org.aliuge.crawler.extractor.selector.AbstractElementCssSelector.java

/**
 * ????/*from   www  .j  a va 2  s .c  o  m*/
 * @param elements
 * @return
 */
protected String getExtractText(Elements elements) {
    if (elements.size() == 0)
        return null;
    String temp = "";

    if (attr.equalsIgnoreCase("tostring")) {
        return temp = elements.toString();
    } else {
        if (index == -1 && StringUtils.isNotBlank(this.regex)) {
            for (Element e : elements) {
                Element element = e;
                if (element.select(this.regex).size() > 0) {
                    return temp = e.text();
                }
            }
            return temp;
        } else {
            if (index > -1 && index < elements.size()) {
                return elements.get(index).text();
            }
        }
        return elements.first().text();
    }

    /*if(attr.equals("tostring")){
       if(index==0 || index>elements.size())
    temp = elements.first().toString();
       else
    temp = elements.get(index).toString();
    }else{
       if(index==0 || index>elements.size())
    temp = elements.first().text();
       else
    temp = elements.get(index).text();
    }
            
    if(null!=pattern){
       Matcher m = pattern.matcher(temp);
       if(m.find()){
    temp = m.group(1);
       }
    }*/
    //return temp;
}

From source file:org.aliuge.crawler.jobconf.ExtractConfig.java

/**
 * ????/* www. j a v a  2 s.co  m*/
 * @param doc
 * @return
 * @throws ConfigurationException
 */
public ExtractConfig loadConfig(Document doc) {
    Elements extractElement = doc.select("extract");
    super.setJobName(doc.select("job").attr("name"));
    super.setIndexName(doc.select("job").attr("indexName"));
    String temp = extractElement.select("threadNum").text();
    if (StringUtils.isNotBlank(temp)) {
        this.threadNum = Integer.parseInt(temp);
    }

    Elements templateElement = extractElement.select("extract").select("template");
    Iterator<Element> it = templateElement.iterator();

    while (it.hasNext()) {
        Element template = it.next();
        ExtractTemplate extractTemplate = new ExtractTemplate();
        // ?Url????
        Elements urlPatternElement = template.select("url");
        List<Pattern> patterns = Lists.newArrayList();
        for (Element urlElement : urlPatternElement) {
            patterns.add(Pattern.compile(urlElement.text()));
        }
        extractTemplate.setUrlPattern(patterns);
        extractTemplate.setName(template.attr("name"));
        // ???
        Elements selectElement = template.select("elements").first().children();
        for (Element element : selectElement) {
            if ("element".equals(element.tagName())) {
                AbstractElementCssSelector<?> selector = ElementCssSelectorFactory.create(element);
                extractTemplate.addCssSelector(selector);
            } else if ("if".equals(element.tagName())) {
                IFConditions ifConditions = IFConditions.create(element);
                extractTemplate.addConditions(ifConditions);
            }
        }
        super.setExtractConfig(this);
        this.templates.add(extractTemplate);
    }
    //super.setExtractConfig(this);
    return this;
}

From source file:org.aliuge.crawler.jobconf.FetchConfig.java

/**
 * ???//from   w  w w .  ja  v a  2  s  .  c o  m
 * 
 * @param confFile
 * @return
 */
@SuppressWarnings("unchecked")
public FetchConfig loadConfig(Document confDoc) throws ConfigurationException {
    try {
        Document doc = confDoc;
        super.setJobName(doc.select("job").attr("name"));
        super.setIndexName(doc.select("job").attr("indexName"));
        Elements e = doc.select("fetch");
        this.type = e.select("type").text();
        this.agent = e.select("agent").text();
        String temp = e.select("threadNum").text();
        if (StringUtils.isNotBlank(temp)) {
            this.threadNum = Integer.parseInt(temp);
        }

        temp = e.select("delayBetweenRequests").text();
        if (StringUtils.isNotBlank(temp)) {
            this.delayBetweenRequests = Integer.parseInt(temp);
        }

        temp = e.select("maxDepthOfCrawling").text();
        if (StringUtils.isNotBlank(temp)) {
            this.maxDepthOfCrawling = Integer.parseInt(temp);
        }

        temp = e.select("fetchBinaryContent").text();
        if (StringUtils.isNotBlank(temp)) {
            this.fetchBinaryContent = Boolean.parseBoolean(temp);
        }

        if (StringUtils.isNotBlank(e.select("maxOutgoingLinksToFollow").text())) {
            this.maxOutgoingLinksToFollow = Integer.parseInt(e.select("maxOutgoingLinksToFollow").text());
        }

        temp = e.select("fileSuffix").text();
        if (StringUtils.isNotBlank(temp)) {
            this.fileSuffix = temp;
        }

        temp = e.select("maxDownloadSizePerPage").text();
        if (StringUtils.isNotBlank(temp)) {
            this.maxDownloadSizePerPage = Integer.parseInt(temp);
        }

        temp = e.select("https").text();
        if (StringUtils.isNotBlank(temp)) {
            this.https = Boolean.parseBoolean(temp);
        }

        temp = e.select("onlyDomain").text();
        if (StringUtils.isNotBlank(temp)) {
            this.onlyDomain = Boolean.parseBoolean(temp);
        }

        temp = e.select("socketTimeoutMilliseconds").text();
        if (StringUtils.isNotBlank(temp)) {
            this.socketTimeoutMilliseconds = Integer.parseInt(temp);
        }

        temp = e.select("connectionTimeout").text();
        if (StringUtils.isNotBlank(temp)) {
            this.connectionTimeout = Integer.parseInt(temp);
        }

        temp = e.select("maxTotalConnections").text();
        if (StringUtils.isNotBlank(temp)) {
            this.maxTotalConnections = Integer.parseInt(temp);
        }

        temp = e.select("maxConnectionsPerHost").text();
        if (StringUtils.isNotBlank(temp)) {
            this.maxConnectionsPerHost = Integer.parseInt(e.select("maxConnectionsPerHost").text());
        }

        temp = e.select("maxConnectionsPerHost").text();
        if (StringUtils.isNotBlank(temp)) {
            this.maxConnectionsPerHost = Integer.parseInt(temp);
        }

        temp = e.select("proxy").text();
        if (StringUtils.isNotBlank(temp)) {
            Properties p = PropertyConfigurationHelper.getProperties(temp);
            this.proxyIps = Lists.newLinkedList();
            for (Object o : p.keySet()) {
                proxyIps.add((String) p.get(o));
            }

        }

        // seed
        Elements seeds = doc.select("fetch seeds seed");
        for (Element element : seeds) {
            // WebURL url = new WebURL();
            String url = element.text();
            if (StringUtils.isBlank(url)) {
                continue;
            }
            url = url.trim();
            String area = element.attr("area");
            this.seeds.add(url);

            WebURL areaUrl = new WebURL(area, url);

            try {
                PendingManager.getPendingArea(super.getJobName()).addElement(areaUrl);
            } catch (QueueException e1) {
                log.error("", e1);
                e1.printStackTrace();
            }
            // BloomfilterHelper.getInstance().add(url.getURL());

        }

        /*
         * ??Url
         */
        Elements fetchUrlFilters = doc.select("fetchUrlFilters filter");
        for (Element element : fetchUrlFilters) {
            String tmp = element.text();
            if (StringUtils.isNoneBlank(tmp))
                this.fetchUrlFilters.add(element.text());
        }
        /*
         * ?????Url
         */
        Elements extractUrlfilters = doc.select("extractUrlfilters filter");
        for (Element element : extractUrlfilters) {
            String tmp = element.text();
            String tmp_rep = element.attr("replace");
            if (StringUtils.isNoneBlank(tmp))
                this.extractUrlfilters.add(new KeyValue(tmp, tmp_rep));
        }
    } catch (NumberFormatException e) {
        throw new ConfigurationException("?" + e.getMessage());
    }
    // super.setFetchConfig(this);
    return this;
}

From source file:org.angellist.angellistmobile.FeedJSONAdapter.java

public String replaceUserAndStartupLinks(String descriptionHtml) {
    // get the description and remove all tags

    String description = Html.fromHtml(descriptionHtml).toString();

    // for each link, replace it with a link
    Document doc = Jsoup.parse(descriptionHtml);
    Elements links = doc.select("a[href]");
    for (Element link : links) {
        if ("User".equals(link.attr("data-type"))) {
            description = description.replace(link.text(), "<a href=\"org.angellist.angellistmobile.user://"
                    + link.attr("data-id") + "\">" + link.text() + "</a>");
        } else if ("Startup".equals(link.attr("data-type"))) {
            description = description.replace(link.text(), "<a href=\"org.angellist.angellistmobile.startup://"
                    + link.attr("data-id") + "\">" + link.text() + "</a>");
        }// w  w w .  j a v a  2s.co  m
    }

    return description;
}

From source file:org.apache.marmotta.ldclient.provider.html.mapping.CssTextLiteralMapper.java

@Override
public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
    final String value = cleanValue(elem.text());
    if (StringUtils.isBlank(value))
        return Collections.emptyList();
    if (language != null)
        return Collections.singletonList((Value) factory.createLiteral(value, language.toString()));
    if (datatype != null)
        return Collections.singletonList(
                (Value) factory.createLiteral(value, factory.createURI(Namespaces.NS_XSD + datatype)));
    else/*  w w  w .  jav a  2s .  c  o m*/
        return Collections.singletonList((Value) factory.createLiteral(value));
}

From source file:org.apache.nifi.GetHTMLElement.java

/**
 * Extracts the HTML value based on the configuration values.
 *
 * @return value from the parsed HTML element
 *//*from  ww  w  . jav  a  2 s.co m*/
private String extractElementValue(String prependValue, final String outputType, String appendValue,
        final Element ele, final String attrKey) {
    if (StringUtils.isEmpty(prependValue)) {
        prependValue = "";
    }
    if (StringUtils.isEmpty(appendValue)) {
        appendValue = "";
    }

    switch (outputType) {
    case ELEMENT_HTML:
        return prependValue + ele.html() + appendValue;
    case ELEMENT_TEXT:
        return prependValue + ele.text() + appendValue;
    case ELEMENT_DATA:
        return prependValue + ele.data() + appendValue;
    case ELEMENT_ATTRIBUTE:
        return prependValue + ele.attr(attrKey) + appendValue;
    default:
        return prependValue + ele.html() + appendValue;
    }
}