Java tutorial
/* * Copyright 2010-2012 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.htmlparser.jericho.Element; import net.htmlparser.jericho.HTMLElementName; import net.htmlparser.jericho.Source; import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.PropertiesCredentials; import com.amazonaws.services.ec2.AmazonEC2; import com.amazonaws.services.ec2.AmazonEC2Client; import com.amazonaws.services.ec2.model.DescribeAvailabilityZonesResult; import com.amazonaws.services.ec2.model.DescribeInstancesResult; import com.amazonaws.services.ec2.model.Instance; import com.amazonaws.services.ec2.model.Reservation; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.Bucket; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.amazonaws.services.simpledb.AmazonSimpleDB; import com.amazonaws.services.simpledb.AmazonSimpleDBClient; import com.amazonaws.services.simpledb.model.DomainMetadataRequest; import com.amazonaws.services.simpledb.model.DomainMetadataResult; import com.amazonaws.services.simpledb.model.ListDomainsRequest; import com.amazonaws.services.simpledb.model.ListDomainsResult; import com.dto.common.SqlLiteFactory; /** * Welcome to your new AWS Java SDK based project! * * This class is meant as a starting point for your console-based application * that makes one or more calls to the AWS services supported by the Java SDK, * such as EC2, SimpleDB, and S3. * * In order to use the services in this sample, you need: * * - A valid Amazon Web Services account. You can register for AWS at: * https://aws-portal.amazon.com/gp/aws/developer/registration/index.html * * - Your account's Access Key ID and Secret Access Key: * http://aws.amazon.com/security-credentials * * - A subscription to Amazon EC2. You can sign up for EC2 at: * http://aws.amazon.com/ec2/ * * - A subscription to Amazon SimpleDB. You can sign up for Simple DB at: * http://aws.amazon.com/simpledb/ * * - A subscription to Amazon S3. You can sign up for S3 at: * http://aws.amazon.com/s3/ */ public class ClienTopTen { /* * Important: Be sure to fill in your AWS access credentials in the * AwsCredentials.properties file before you try to run this sample. * http://aws.amazon.com/security-credentials */ static AmazonEC2 ec2; static AmazonS3 s3; static AmazonSimpleDB sdb; private static final String clien_table_array[] = { "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=news&page=1", "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=news&page=2", "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=park&page=1", "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=park&page=2", "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=park&page=3", "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=park&page=4", "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=park&page=5" }; static SqlLiteFactory sqlLiteFactory = new SqlLiteFactory(); /** * The only information needed to create a client are security credentials * consisting of the AWS Access Key ID and Secret Access Key. All other * configuration, such as the service endpoints, are performed * automatically. Client parameters, such as proxies, can be specified in an * optional ClientConfiguration object when constructing a client. * * @see com.amazonaws.auth.BasicAWSCredentials * @see com.amazonaws.auth.PropertiesCredentials * @see com.amazonaws.ClientConfiguration */ private static void init() throws Exception { AWSCredentials credentials = new PropertiesCredentials( ClienTopTen.class.getResourceAsStream("AwsCredentials.properties")); ec2 = new AmazonEC2Client(credentials); // s3 = new AmazonS3Client(credentials); // sdb = new AmazonSimpleDBClient(credentials); } private static void Bot(String sUrl, String sCategory) { // String sUrl = // "http://clien.career.co.kr/cs2/bbs/board.php?bo_table=news"; String trText = ""; try { Source src = new Source(new URL(sUrl)); src.fullSequentialParse(); Element tbody = src.getAllElements(HTMLElementName.TBODY).get(0); int trCount = tbody.getAllElements(HTMLElementName.TR).size(); List trData = new ArrayList(); HashMap<String, String> tdData = null; Element tr = null; String rTitle = ""; String rTitleLink = ""; String rCommentCount = ""; String rDate = ""; for (int i = 0; i < trCount; i++) { tdData = new HashMap<String, String>(); tr = tbody.getAllElements(HTMLElementName.TR).get(i); trText = tbody.getAllElements(HTMLElementName.TR).get(i).toString(); try { rTitle = ((Element) tr.getAllElements(HTMLElementName.TD).get(1) .getAllElements(HTMLElementName.A).get(0)).getContent().toString(); } catch (IndexOutOfBoundsException e) { // e.printStackTrace(); // System.out.println(tr.toString()); // Exception . continue; } rTitleLink = ((Element) tr.getAllElements(HTMLElementName.TD).get(1) .getAllElements(HTMLElementName.A).get(0)).getAttributeValue("href").toString(); if (tr.getAllElements(HTMLElementName.TD).get(1).getAllElements(HTMLElementName.SPAN).isEmpty()) { rCommentCount = "0"; } else { rCommentCount = ((Element) tr.getAllElements(HTMLElementName.TD).get(1) .getAllElements(HTMLElementName.SPAN).get(0)).getContent().toString(); rCommentCount = rCommentCount.replace("[", ""); rCommentCount = rCommentCount.replace("]", ""); } rDate = ((Element) tr.getAllElements(HTMLElementName.TD).get(3)).getContent().toString(); rDate = stripTags("<span title=\"([^\"]*)\">", rDate); tdData.put("WR_ID", ((Element) tr.getAllElements(HTMLElementName.TD).get(0)).getContent().toString()); tdData.put("CATEGORY", sCategory); tdData.put("TITLE", rTitle); tdData.put("LINK", rTitleLink); tdData.put("COMMENT_CNT", rCommentCount); tdData.put("WRITER", ((Element) tr.getAllElements(HTMLElementName.TD).get(2)).getContent().toString()); tdData.put("WR_DATE", rDate); tdData.put("READ_CNT", ((Element) tr.getAllElements(HTMLElementName.TD).get(4)).getContent().toString()); tdData.put("TR_TEXT", trText); /* * System.out.println( * "=================================================================" * ); * System.out.println("WR_ID : ".concat(tdData.get("WR_ID" * ))); * System.out.println("TITLE : ".concat(tdData.get("TITLE" * ))); * System.out.println("LINK : ".concat(tdData.get("LINK" * ))); System.out.println("COMMENT_CNT: ".concat(tdData.get( * "COMMENT_CNT"))); * System.out.println("WRITER : ".concat(tdData * .get("WRITER"))); * System.out.println("DATE : ".concat(tdData * .get("DATE"))); * System.out.println("READ_COUNT : ".concat(tdData * .get("READ_COUNT"))); * System.out.println("CATEGORY : ".concat * (tdData.get("CATEGORY"))); System.out.println( * "=================================================================" * ); */ // [0-9]*$ if ("<img src=\"../skin/board/cheditor/img/icon_board_notice.gif\" title=\"\" />" .equals(tdData.get("WR_ID"))) { continue; } else { trData.add(tdData); } } // END OF FOR LOOP try { sqlLiteFactory.createClienTopTen(trData); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } private static String stripTags(String pattern, String tags) { String sReturn = ""; Pattern p = Pattern.compile(pattern); Matcher m = p.matcher(tags); while (m.find()) { // System.out.println(m.group(1)); sReturn = m.group(1); } return sReturn; } public static void main(String[] args) throws Exception { // System.out.println("==========================================="); // System.out.println("Welcome to the AWS Java SDK!"); // System.out.println("==========================================="); init(); sqlLiteFactory.deleteClienTopTen(); for (int idx = 0; idx < clien_table_array.length; idx++) { if (idx < 2) { Bot(clien_table_array[idx], "NEWS"); } else { Bot(clien_table_array[idx], "FREE"); } } }// END OF MAIN public static void botRun() throws Exception { init(); sqlLiteFactory.deleteClienTopTen(); for (int idx = 0; idx < clien_table_array.length; idx++) { if (idx < 2) { Bot(clien_table_array[idx], "NEWS"); } else { Bot(clien_table_array[idx], "FREE"); } } } }