org.loklak.api.search.WordpressCrawlerService.java Source code

Java tutorial

Introduction

Here is the source code for org.loklak.api.search.WordpressCrawlerService.java

Source

/**
 *  Wordpress Crawler
 *  Copyright 08.06.2016 by Jigyasa Grover, @jig08
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *  
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *  
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package org.loklak.api.search;

import java.io.IOException;

import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.loklak.server.APIException;
import org.loklak.server.APIHandler;
import org.loklak.server.AbstractAPIHandler;
import org.loklak.server.Authorization;
import org.loklak.server.BaseUserRole;
import org.loklak.server.Query;
import org.loklak.susi.SusiThought;
import org.loklak.tools.storage.JSONObjectWithDefault;

import javax.servlet.http.HttpServletResponse;

public class WordpressCrawlerService extends AbstractAPIHandler implements APIHandler {

    private static final long serialVersionUID = -5357182691897402354L;

    @Override
    public String getAPIPath() {
        return "/api/wordpresscrawler.json";
    }

    @Override
    public BaseUserRole getMinimalBaseUserRole() {
        return BaseUserRole.ANONYMOUS;
    }

    @Override
    public JSONObject getDefaultPermissions(BaseUserRole baseUserRole) {
        return null;
    }

    @Override
    public JSONObject serviceImpl(Query call, HttpServletResponse response, Authorization rights,
            JSONObjectWithDefault permissions) throws APIException {
        String url = call.get("url", "");
        return crawlWordpress(url);
    }

    public static SusiThought crawlWordpress(String blogURL) {
        Document blogHTML = null;

        Elements articles = null;
        Elements articleList_title = null;
        Elements articleList_content = null;
        Elements articleList_dateTime = null;
        Elements articleList_author = null;

        String[][] blogPosts = new String[100][4];

        // blogPosts[][0] = Blog Title
        // blogPosts[][1] = Posted On
        // blogPosts[][2] = Author
        // blogPosts[][3] = Blog Content

        Integer numberOfBlogs = 0;
        Integer iterator = 0;

        try {
            blogHTML = Jsoup.connect(blogURL).get();
        } catch (IOException e) {
            e.printStackTrace();
        }

        articles = blogHTML.getElementsByTag("article");

        iterator = 0;
        for (Element article : articles) {

            articleList_title = article.getElementsByClass("entry-title");
            for (Element blogs : articleList_title) {
                blogPosts[iterator][0] = blogs.text().toString();
            }

            articleList_dateTime = article.getElementsByClass("posted-on");
            for (Element blogs : articleList_dateTime) {
                blogPosts[iterator][1] = blogs.text().toString();
            }

            articleList_author = article.getElementsByClass("byline");
            for (Element blogs : articleList_author) {
                blogPosts[iterator][2] = blogs.text().toString();
            }

            articleList_content = article.getElementsByClass("entry-content");
            for (Element blogs : articleList_content) {
                blogPosts[iterator][3] = blogs.text().toString();
            }

            iterator++;

        }

        numberOfBlogs = iterator;

        JSONArray blog = new JSONArray();

        for (int k = 0; k < numberOfBlogs; k++) {
            JSONObject blogpost = new JSONObject();
            blogpost.put("blog_url", blogURL);
            blogpost.put("title", blogPosts[k][0]);
            blogpost.put("posted_on", blogPosts[k][1]);
            blogpost.put("author", blogPosts[k][2]);
            blogpost.put("content", blogPosts[k][3]);
            blog.put(blogpost);
        }

        SusiThought json = new SusiThought();
        json.setData(blog);
        return json;

    }

}