org.fastcatsearch.util.HTMLTagRemoverTest.java Source code

Java tutorial

Introduction

Here is the source code for org.fastcatsearch.util.HTMLTagRemoverTest.java

Source

/*
 * Copyright (c) 2013 Websquared, Inc.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v2.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 * 
 * Contributors:
 *     swsong - initial API and implementation
 */

package org.fastcatsearch.util;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

import junit.framework.TestCase;

import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HTTP;
import org.fastcatsearch.ir.common.IRException;
import org.junit.Test;

public class HTMLTagRemoverTest extends TestCase {
    public void test1() {
        HttpClient httpclient = new DefaultHttpClient();
        ResponseHandler<String> responseHandler = new BasicResponseHandler();
        HttpPost httpost = new HttpPost("http://www.fastcatsearch.org/");
        HttpGet httpGet = new HttpGet("http://www.fastcatsearch.org/");
        List<NameValuePair> nvps = new ArrayList<NameValuePair>();

        try {
            httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));

            String responseBody = httpclient.execute(httpGet, responseHandler);

            System.out.println(HTMLTagRemover.clean(responseBody));

        } catch (UnsupportedEncodingException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ClientProtocolException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IRException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public void test2() {

        try {
            System.out.println(HTMLTagRemover.clean("<img src=\"sdfsdfds.jp\"> ? "));
        } catch (IRException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

    @Test
    public void test3() {
        String str = "_|232L|2|:17.4kwh()||+?|,?|?:?|<IMG src=\"http://office.danawa.com/prod_img/500000/975/502/img/1502975_1.jpg?time=1348054028\" style=\"FILTER: RevealTrans(duration=0,transition=X)\" OnmouseOver=\"this.filters[0].apply(); this.src='http:";

        try {
            str = HTMLTagRemover.clean(str);
            System.out.println(str);
        } catch (IRException e) {
            e.printStackTrace();
        }
    }

    @Test
    public void test4() {
        String str = "abc4.0  qwe 3.0 tyu 9.0 \n123   \n\n456\n789";

        try {
            str = HTMLTagRemover.clean(str);
            System.out.println(str);
        } catch (IRException e) {
            e.printStackTrace();
        }
    }

    @Test
    public void testfile() throws Exception {
        String strFilePath = "/Users/swsong/Desktop/a.html";

        StringBuilder sb = new StringBuilder();
        BufferedReader reader = null;
        try {
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(strFilePath), "UTF-8"));
            String line = null;
            while ((line = reader.readLine()) != null) {
                sb.append(line).append("\r");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        String str = HTMLTagRemover.clean(sb.toString());
        System.out.println(str);
    }
}