com.nttec.everychan.chans.cirno.MikubaReader.java Source code

Java tutorial

Introduction

Here is the source code for com.nttec.everychan.chans.cirno.MikubaReader.java

Source

/*
 * Everychan Android (Meta Imageboard Client)
 * Copyright (C) 2014-2016  miku-nyan <https://github.com/miku-nyan>
 *     
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.nttec.everychan.chans.cirno;

import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringEscapeUtils;

import com.nttec.everychan.api.models.AttachmentModel;
import com.nttec.everychan.api.models.PostModel;
import com.nttec.everychan.api.models.ThreadModel;
import com.nttec.everychan.api.util.CryptoUtils;
import com.nttec.everychan.common.Logger;

/**
 * ? ?  hatsune.ru
 * @author miku-nyan
 *
 */
public class MikubaReader implements Closeable {
    private static final String TAG = "MikubaReader";

    private static final DateFormat DATEFORMAT;
    static {
        DATEFORMAT = new SimpleDateFormat("EEE dd MMM yyyy hh:mm:ss", Locale.US);
        DATEFORMAT.setTimeZone(TimeZone.getTimeZone("GMT+3"));
    }

    private static final char[] DATA_START = "<div id=\"page\">".toCharArray();

    private static final int FILTER_PAGE_END = 0;
    private static final int FILTER_THREAD_END = 1;
    private static final int FILTER_ATTACHMENT = 2;
    private static final int FILTER_POSTNUMBER_OP = 3;
    private static final int FILTER_POSTNUMBER = 4;
    private static final int FILTER_SUBJECT = 5;
    private static final int FILTER_ENDDATE = 6;
    private static final int FILTER_START_COMMENT = 7;

    private static final char[][] FILTERS_OPEN = { "<center>".toCharArray(), "<hr".toCharArray(),
            "<td class=\"image\"".toCharArray(), "<td class=\"post\" id=\"".toCharArray(),
            "<td class=\"reply\" id=\"".toCharArray(), "<span class=\"replytitle\">".toCharArray(),
            "</label>".toCharArray(), "<blockquote".toCharArray() };

    private static final char[][] FILTERS_CLOSE = { null, null, "</td>".toCharArray(), "\"".toCharArray(),
            "\"".toCharArray(), "</span>".toCharArray(), null, ">".toCharArray() };

    //in comment
    private static final char[] BLOCKQUOTE_OPEN = "<blockquote".toCharArray();
    private static final char[] BLOCKQUOTE_CLOSE = "</blockquote>".toCharArray();
    private static final char[] OMITTED_OPEN = "<span class=\"omitted\">".toCharArray();

    private static final char[] OMITTED_CLOSE = "</span>".toCharArray();

    private static final Pattern POST_REFERENCE = Pattern.compile("<a href=\"/reply/(\\d+)");

    private final Reader _in;

    private StringBuilder readBuffer = new StringBuilder();
    private List<ThreadModel> threads;
    private ThreadModel currentThread;
    private List<PostModel> postsBuf;
    private Set<String> postsNumBuf;
    private PostModel currentPost;
    private boolean inDate;
    private StringBuilder dateBuffer = new StringBuilder();
    private StringBuilder commentBuffer = new StringBuilder();
    private StringBuilder omittedDigitsBuffer = new StringBuilder();
    private List<AttachmentModel> currentAttachments;

    public MikubaReader(InputStream in) {
        _in = new BufferedReader(new InputStreamReader(in));
    }

    private void initThreadModel() {
        currentThread = new ThreadModel();
        currentThread.postsCount = 0;
        currentThread.attachmentsCount = -1;
        postsBuf = new ArrayList<>();
        postsNumBuf = new HashSet<>();
    }

    private void initPostModel() {
        currentPost = new PostModel();
        currentPost.name = "";
        currentPost.email = "";
        currentPost.trip = "";
        currentAttachments = new ArrayList<AttachmentModel>();
        inDate = false;
        dateBuffer.setLength(0);
    }

    private void finalizeThread() {
        if (postsBuf.size() > 0) {
            currentThread.posts = postsBuf.toArray(new PostModel[postsBuf.size()]);
            currentThread.threadNumber = currentThread.posts[0].number;
            for (PostModel post : currentThread.posts)
                post.parentThread = currentThread.threadNumber;
            threads.add(currentThread);
            initThreadModel();
        }
    }

    private void finalizePost() {
        if (currentPost.number != null && currentPost.number.length() > 0) {
            ++currentThread.postsCount;
            currentPost.attachments = currentAttachments.toArray(new AttachmentModel[currentAttachments.size()]);
            if (currentPost.subject == null)
                currentPost.subject = "";
            if (currentPost.comment == null)
                currentPost.comment = "";
            postsBuf.add(currentPost);
            postsNumBuf.add(currentPost.number);
        }
        initPostModel();
    }

    public ThreadModel[] readPage() throws IOException {
        threads = new ArrayList<ThreadModel>();
        initThreadModel();
        initPostModel();

        skipUntilSequence(DATA_START);
        readData();

        return threads.toArray(new ThreadModel[threads.size()]);
    }

    private void readData() throws IOException {
        int filtersCount = FILTERS_OPEN.length;
        int[] pos = new int[filtersCount];
        int[] len = new int[filtersCount];
        for (int i = 0; i < filtersCount; ++i)
            len[i] = FILTERS_OPEN[i].length;

        int curChar;
        while ((curChar = _in.read()) != -1) {
            if (inDate)
                dateBuffer.append((char) curChar);
            for (int i = 0; i < filtersCount; ++i) {
                if (curChar == FILTERS_OPEN[i][pos[i]]) {
                    ++pos[i];
                    if (pos[i] == len[i]) {
                        if (i == FILTER_PAGE_END) {
                            finalizeThread();
                            return;
                        }
                        handleFilter(i);
                        pos[i] = 0;
                    }
                } else {
                    if (pos[i] != 0)
                        pos[i] = curChar == FILTERS_OPEN[i][0] ? 1 : 0;
                }
            }
        }
        finalizeThread();
    }

    private void handleFilter(int filterIndex) throws IOException {
        if (inDate && filterIndex != FILTER_ENDDATE)
            dateBuffer.setLength(0);
        switch (filterIndex) {
        case FILTER_THREAD_END:
            finalizeThread();
            break;
        case FILTER_ATTACHMENT:
            parseAttachment(readUntilSequence(FILTERS_CLOSE[filterIndex]));
            break;
        case FILTER_POSTNUMBER:
        case FILTER_POSTNUMBER_OP:
            currentPost.number = readUntilSequence(FILTERS_CLOSE[filterIndex]).trim().substring(1);
            break;
        case FILTER_SUBJECT:
            currentPost.subject = StringEscapeUtils.unescapeHtml4(readUntilSequence(FILTERS_CLOSE[filterIndex]))
                    .trim();
            currentPost.subject = CryptoUtils.fixCloudflareEmails(currentPost.subject);
            inDate = true;
            break;
        case FILTER_ENDDATE:
            if (dateBuffer.length() > FILTERS_OPEN[FILTER_ENDDATE].length) {
                String date = dateBuffer.substring(0, dateBuffer.length() - FILTERS_OPEN[FILTER_ENDDATE].length)
                        .trim();
                if (date.length() > 0) {
                    try {
                        currentPost.timestamp = DATEFORMAT.parse(date).getTime();
                    } catch (Exception e) {
                        Logger.e(TAG, "cannot parse date; make sure you choose the right DateFormat for this chan",
                                e);
                    }
                }
            }
            inDate = false;
            dateBuffer.setLength(0);
            break;
        case FILTER_START_COMMENT:
            skipUntilSequence(FILTERS_CLOSE[filterIndex]);
            currentPost.comment = readPostComment();
            finalizePost();
            break;
        }
    }

    private String readPostComment() throws IOException {
        commentBuffer.setLength(0);
        int len1 = BLOCKQUOTE_OPEN.length;
        int len2 = BLOCKQUOTE_CLOSE.length;
        int len3 = OMITTED_OPEN.length;
        int pos1 = 0;
        int pos2 = 0;
        int pos3 = 0;
        int tagCounter = 1;
        int curChar;
        while ((curChar = _in.read()) != -1) {
            commentBuffer.append((char) curChar);

            if (curChar == BLOCKQUOTE_OPEN[pos1]) {
                ++pos1;
                if (pos1 == len1) {
                    ++tagCounter;
                    pos1 = 0;
                }
            } else {
                if (pos1 != 0)
                    pos1 = curChar == BLOCKQUOTE_OPEN[0] ? 1 : 0;
            }

            if (curChar == BLOCKQUOTE_CLOSE[pos2]) {
                ++pos2;
                if (pos2 == len2) {
                    --tagCounter;
                    if (tagCounter == 0)
                        break;
                    pos2 = 0;
                }
            } else {
                if (pos2 != 0)
                    pos2 = curChar == BLOCKQUOTE_CLOSE[0] ? 1 : 0;
            }

            if (curChar == OMITTED_OPEN[pos3]) {
                ++pos3;
                if (pos3 == len3) {
                    parseOmittedString(readUntilSequence(OMITTED_CLOSE));
                    pos3 = 0;
                }
            } else {
                if (pos3 != 0)
                    pos3 = curChar == BLOCKQUOTE_OPEN[0] ? 1 : 0;
            }
        }
        int buflen = commentBuffer.length();
        if (buflen > len2) {
            commentBuffer.setLength(buflen - len2);
            return CryptoUtils.fixCloudflareEmails(fixPostRefs(commentBuffer));
        } else {
            return "";
        }
    }

    private String fixPostRefs(StringBuilder commentBuffer) {
        String comment = commentBuffer.toString();
        commentBuffer.setLength(0);
        if (postsBuf == null || postsBuf.size() == 0)
            return comment;
        Matcher matcher = POST_REFERENCE.matcher(comment);
        if (!matcher.find())
            return comment;
        String threadNum = postsBuf.get(0).number;
        int appendPos = 0;
        boolean replacements = false;
        do {
            String num = matcher.group(1);
            commentBuffer.append(comment, appendPos, matcher.start(1));
            appendPos = matcher.end();
            if (!num.equals(threadNum) && postsNumBuf.contains(num)) {
                replacements = true;
                commentBuffer.append(threadNum);
            } else {
                commentBuffer.append(num);
            }
        } while (matcher.find());
        commentBuffer.append(comment, appendPos, comment.length());
        if (replacements)
            comment = commentBuffer.toString();
        commentBuffer.setLength(0);
        return comment;
    }

    private void parseOmittedString(String omitted) {
        try {
            int len = omitted.length();
            for (int i = 0; i <= len; ++i) {
                char ch = i == len ? ' ' : omitted.charAt(i);
                if (ch >= '0' && ch <= '9') {
                    omittedDigitsBuffer.append(ch);
                } else {
                    if (omittedDigitsBuffer.length() > 0) {
                        currentThread.postsCount += Integer.parseInt(omittedDigitsBuffer.toString());
                        omittedDigitsBuffer.setLength(0);
                        break;
                    }
                }
            }
        } catch (NumberFormatException e) {
        }
    }

    private void parseAttachment(String html) {
        int index = html.indexOf("<img");
        if (index != -1) {
            index = html.indexOf("src=\"", index + 4);
            if (index != -1) {
                int start = index + 5;
                int end = html.indexOf("\"", start);
                if (end != -1) {
                    AttachmentModel attachment = new AttachmentModel();
                    attachment.size = -1;
                    attachment.thumbnail = html.substring(start, end);
                    if (attachment.thumbnail.contains("/thu/")) {
                        attachment.path = attachment.thumbnail.replace("/thu/", "/src/");
                        attachment.type = attachment.path.toLowerCase(Locale.US).endsWith(".gif")
                                ? AttachmentModel.TYPE_IMAGE_GIF
                                : AttachmentModel.TYPE_IMAGE_STATIC;
                    } else {
                        attachment.path = attachment.thumbnail;
                        attachment.type = AttachmentModel.TYPE_OTHER_FILE;
                        int startHref, endHref;
                        if ((startHref = html.indexOf("href=\"")) != -1
                                && (endHref = html.indexOf('\"', startHref + 6)) != -1) {
                            attachment.path = html.substring(startHref + 6, endHref);
                            String pathLower = attachment.path.toLowerCase(Locale.US);
                            if (pathLower.endsWith(".mp3") || pathLower.endsWith(".ogg"))
                                attachment.type = AttachmentModel.TYPE_AUDIO;
                        }
                    }
                    currentAttachments.add(attachment);
                    return;
                }
            }
        }

        index = html.indexOf("<embed");
        if (index != -1) {
            index = html.indexOf("src=\"", index + 6);
            if (index != -1) {
                int start = index + 5;
                int end = html.indexOf("\"", start);
                if (end != -1) {
                    AttachmentModel attachment = new AttachmentModel();
                    attachment.size = -1;
                    attachment.path = html.substring(start, end);
                    if (attachment.path.contains("youtube")) {
                        int youtubeIdIndex = attachment.path.indexOf("/v/");
                        if (youtubeIdIndex != -1) {
                            String youtubeId = attachment.path.substring(youtubeIdIndex + 3);
                            attachment.path = "http://youtube.com/watch?v=" + youtubeId;
                            attachment.thumbnail = "http://img.youtube.com/vi/" + youtubeId + "/default.jpg";
                        }
                    }
                    attachment.type = AttachmentModel.TYPE_OTHER_NOTFILE;
                    currentAttachments.add(attachment);
                }
            }
        }
    }

    private void skipUntilSequence(char[] sequence) throws IOException {
        int len = sequence.length;
        if (len == 0)
            return;
        int pos = 0;
        int curChar;
        while ((curChar = _in.read()) != -1) {
            if (curChar == sequence[pos]) {
                ++pos;
                if (pos == len)
                    break;
            } else {
                if (pos != 0)
                    pos = curChar == sequence[0] ? 1 : 0;
            }
        }
    }

    private String readUntilSequence(char[] sequence) throws IOException {
        int len = sequence.length;
        if (len == 0)
            return "";
        readBuffer.setLength(0);
        int pos = 0;
        int curChar;
        while ((curChar = _in.read()) != -1) {
            readBuffer.append((char) curChar);
            if (curChar == sequence[pos]) {
                ++pos;
                if (pos == len)
                    break;
            } else {
                if (pos != 0)
                    pos = curChar == sequence[0] ? 1 : 0;
            }
        }
        int buflen = readBuffer.length();
        if (buflen >= len) {
            readBuffer.setLength(buflen - len);
            return readBuffer.toString();
        } else {
            return "";
        }
    }

    @Override
    public void close() throws IOException {
        _in.close();
    }

}