org.b3log.wordman.word.Main.java Source code

Java tutorial

Introduction

Here is the source code for org.b3log.wordman.word.Main.java

Source

/*
 * Copyright (c) 2014, B3log Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.b3log.wordman.word;

import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import org.apache.commons.io.IOUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * ???.
 *
 * @author <a href="http://88250.b3log.org">Liang Ding</a>
 * @version 1.2.1.1, Jul 15, 2014
 * @since 1.0.0
 */
public final class Main {

    /**
     * .
     */
    private static final int TIMEOUT = 10000;

    /**
     * .
     */
    private static final int PAGE = 2;

    /**
     * .
     */
    private static final int CLASS_NUM = 228;

    /**
     * ???.
     */
    private static final String CLASS_NAME = "??";

    /**
     * ? id.
     */
    private static final String CLASS_ID = "15";

    /**
     * ?.
     */
    private Main() {
    }

    /**
     * ?.
     *
     * @param args ?
     * @throws java.lang.Exception 
     */
    public static void main(final String[] args) throws Exception {
        final Clazz clazz = new Clazz();
        clazz.setId(CLASS_ID);
        clazz.setName(CLASS_NAME);
        final List<Word> classWords = new ArrayList<Word>();
        clazz.setWords(classWords);

        for (int clazzNum = 1; clazzNum <= CLASS_NUM; clazzNum++) {
            final Connection.Response response = Jsoup
                    .connect("http://word.iciba.com/?action=words&class=" + clazz.getId() + "&course=" + clazzNum)
                    .userAgent("Mozilla").timeout(TIMEOUT).execute();

            final Document document = response.parse();

            int classWordCnt = 0;
            for (int i = 1; i <= PAGE; i++) {
                final Elements wordList = document.select("ul#word_list_" + i);
                final Elements wordLi = wordList.select("li");

                for (final Element li : wordLi) {
                    final Word word = new Word();
                    word.setId(UUID.randomUUID().toString().replaceAll("-", ""));

                    final Element w = li.select("div.word_main_list_w").get(0);
                    String spell = w.select("span").get(0).attr("title");

                    // ??
                    spell = spell.replace("*", "").replaceAll("\\(.*\\)", "").replace("\\", "");

                    spell = spell.trim();

                    word.setWord(spell);
                    if (!checkWord(spell)) { // 
                        throw new IllegalStateException(" [" + spell + ']');
                    }

                    final Element y = li.select("div.word_main_list_y").get(0);
                    word.setPhon(y.select("strong").get(0).text());
                    word.setPron(y.select("a").get(0).id());

                    final Element s = li.select("div.word_main_list_s").get(0);
                    word.setPara(s.select("span").get(0).text());

                    // ???
                    word.setBuild("");
                    word.setExample("");

                    // System.out.println(word.toString());
                    classWords.add(word);
                    classWordCnt++;
                }
            }

            System.out.println("? [" + clazzNum + "] ??? [" + classWordCnt + "]");
        }

        final StringBuilder sqlBuilder = new StringBuilder();

        final List<String> sqls = clazz.toSQLs();
        for (final String sql : sqls) {
            System.out.println(sql);
            sqlBuilder.append(sql).append(IOUtils.LINE_SEPARATOR);
        }

        final OutputStream outputStream = new FileOutputStream(new File("C:\\" + CLASS_NAME + ".sql"));
        IOUtils.write(sqlBuilder.toString(), outputStream, "UTF-8");
        IOUtils.closeQuietly(outputStream);
    }

    /**
     * ?????.
     *
     * <p>
     * 
     * <ul>
     * <li>26 ? a-z?</li>
     * <li>10  0-9</li>
     * <li>?? . </li>
     * <li>? , </li>
     * <li>? ( </li>
     * <li>?? ) </li>
     * <li>?? ' </li>
     * <li> </li>
     * <li>?? - </li>
     * </ul>
     * </p>
     *
     * @param word ??
     * @return {@code true} ???? {@code false}
     */
    private static boolean checkWord(final String word) {
        final int length = word.length();

        for (int i = 0; i < length; i++) {
            final char ch = word.charAt(i);

            if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z') && (ch < '0' || ch > '9') && (ch != '.')
                    && (ch != ',') && (ch != '\'') && (ch != ' ') && (ch != '(') && (ch != ')') && (ch != '-')) {
                return false;
            }
        }

        return true;
    }
}