Java tutorial
package siddur.solidtrust.bi.data; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.lang3.time.DateUtils; import org.apache.log4j.Logger; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import siddur.solidtrust.FreePersister; import siddur.solidtrust.bi.entity.CarInfo; import siddur.solidtrust.bi.entity.CurrencyRate; import siddur.solidtrust.bi.entity.DataSource; import siddur.solidtrust.bi.entity.WebSource; import siddur.solidtrust.bi.entity.WebSourceID; import siddur.solidtrust.scrape.ScrapeLog; @Service public class DataPump { private static final Logger log4j = Logger.getLogger(DataPump.class); protected static final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private List<WebSource> webSources; @PersistenceContext private EntityManager em; @Autowired private FreePersister persister; private String yesterday; //the last past 24 hours private Map<String, Float> currencyRates = new HashMap<>(); public void pump(String tablename) { webSources = em.createQuery("from WebSource", WebSource.class).getResultList(); List<CurrencyRate> rates = em.createQuery("from CurrencyRate", CurrencyRate.class).getResultList(); for (CurrencyRate r : rates) { currencyRates.put(r.getCurrency(), r.getRate()); } Date d = new Date(); d = DateUtils.addDays(DateUtils.truncate(d, Calendar.DATE), -1); yesterday = sdf.format(d); for (WebSource webSource : webSources) { if (tablename == null || tablename.equalsIgnoreCase(webSource.getTablename())) { log4j.info("Start to synchronize " + webSource.getTablename()); int total = pumpEach(webSource); log4j.info("Synchronized " + total + " records"); } } } private int pumpEach(WebSource webSource) { int maxId = maxId(webSource); ScrapeLog log = new ScrapeLog(); log.setTablename(webSource.getTablename()); log.setStartAt(new Date()); log.setStartId(maxId); persister.save(log); int total = 0; Connection conn = null; try { conn = getConnection(webSource.getDataSource()); log4j.info("[" + webSource.getTablename() + "]Last sync: " + maxId); String baseSql = buildBaseSql(webSource); log4j.info(baseSql); while (true) { Statement stmt = conn.createStatement(); ResultSet result = stmt.executeQuery(buildSql(baseSql, maxId, 1000)); if (result.last()) { maxId = result.getInt("ID"); result.first(); log4j.info("Fetch from " + result.getInt("ID")); result.beforeFirst(); } else { break; } List<CarInfo> list = getCars(result, webSource.getTablename()); stmt.close(); if (!list.isEmpty()) { total += list.size(); persister.saveBatch(list); } } log4j.info("[" + webSource.getTablename() + "]Finish sync: " + total); //the last past 24 hours List<CarInfo> list = getDateRemoved(webSource, conn); if (list != null) { int s = list.size(); int r = s % 1000; int x = (s - r) / 1000; for (int i = 0; i < x; i++) { persister.saveBatch(list.subList(i * 1000, (i + 1) * 1000)); log4j.info("Update removed records:" + 1000); } if (x * 1000 != list.size()) { persister.saveBatch(list.subList(x * 1000, s)); log4j.info("Update removed records:" + (s - x * 1000)); } log4j.info("[" + webSource.getTablename() + "]Updated removed records: " + s); } } catch (Exception e1) { log.setError(e1.getMessage()); log4j.warn(e1.getMessage(), e1); } finally { if (conn != null) { try { conn.close(); } catch (SQLException e) { log4j.warn(e.getMessage(), e); } } } log.setEndAt(new Date()); log.setEndId(maxId); log.setAmount(total); persister.merge(log); try { sort(); } catch (Exception e) { log4j.warn(e.getMessage(), e); } return total; } public void sort() throws Exception { log4j.info("Start to sort"); log4j.info("Finish sort"); } private List<CarInfo> getDateRemoved(WebSource webSource, Connection conn) { log4j.info("start to fetch sold cars from " + yesterday); String sql = MessageFormat.format("select id, {1} from {0} where {1} > \"{2}\"", webSource.getTablename(), webSource.getDateRemoved(), yesterday); Statement stmt = null; try { stmt = conn.createStatement(); ResultSet result = stmt.executeQuery(sql); List<CarInfo> list = new ArrayList<CarInfo>(); while (result.next()) { CarInfo item = em.find(CarInfo.class, new WebSourceID(result.getInt(1), webSource.getTablename())); if (item != null) { try { item.setDateRemoved(result.getString(2).substring(0, 10)); list.add(item); } catch (Exception e) { log4j.error(result.getInt(1) + " " + result.getString(2)); log4j.error(e.getMessage(), e); } } } log4j.info("fetch " + list.size() + " sold cars"); return list; } catch (Exception e) { e.printStackTrace(); } finally { if (stmt != null) { try { stmt.close(); } catch (SQLException e) { } } } return null; } public List<CarInfo> getCars(ResultSet result, String tablename) throws SQLException { int count = 0; List<CarInfo> list = new ArrayList<CarInfo>(); while (result.next()) { String brand = trim(result.getString(2)); String model = trim(result.getString(3)); if (StringUtils.isEmpty(brand) || StringUtils.isEmpty(model)) { continue; } DataWrapper item = new DataWrapper(currencyRates); item.setTablename(tablename); count++; item.setId(result.getInt(1)); item.setBrand(brand); item.setModel(model); item.setType(trim(result.getString(4))); item.setBuild(getInt(result.getObject(5))); item.setArrangement(trim(result.getString(6))); int price = getInt(result.getObject(7)); String currency = trim(result.getString(8)); item.setPrice(price, currency); item.setMileage(getInt(result.getObject(9))); item.setEngineSize(getInt(result.getObject(10))); item.setFuelType(trim(result.getString(11))); item.setColor(trim(result.getString(12))); try { item.setPostDate(result.getString(13).substring(0, 10)); } catch (Exception e1) { } try { item.setDateRemoved(result.getString(14).substring(0, 10)); } catch (Exception e) { } try { item.setDateScraped(result.getString(15).substring(0, 10)); } catch (Exception e1) { } item.setLicensePlate(trim(result.getString(16))); list.add(item.getCar()); } log4j.info("Syschronized " + count + " records"); return list; } private String buildBaseSql(WebSource webSource) { String ql = "SELECT id, {0} brand, {1} model, {2} type, " + "{3} build, {4} arragement, {5} price, " + "{6} currency, {7} mileage, {8} engineSize, " + "{9} fuelType, {10} color, {11} sellerType, " + "{12} postDate, {13} dateRemoved, {14} dateScraped, " + "{15} licensePlate"; StringBuilder sql = new StringBuilder(); sql.append(MessageFormat.format(ql, webSource.getBrand(), webSource.getModel(), getStringField(webSource.getType()), webSource.getBuildYear(), getStringField(webSource.getArrangement()), webSource.getPrice(), getStringField(webSource.getCurrency()), webSource.getMileage(), webSource.getEngineSize(), getStringField(webSource.getFuelType()), getStringField(webSource.getColor()), getIntField(webSource.getSellerType()), getStringField(webSource.getPostDate()), webSource.getDateRemoved(), webSource.getDateScraped(), getStringField(webSource.getLicensePlate()))); sql.append(" FROM " + webSource.getTablename() + " c"); return sql.toString(); } private String buildSql(String sql, int startID, int limit) { sql += (" WHERE c.id > "); sql += (startID); sql += (" LIMIT " + limit); return sql.toString(); } private int maxId(WebSource webSource) { String queryMaxId = "select max(c.id) from CarInfo c where c.tablename = '" + webSource.getTablename() + "'"; int maxId = 0; try { maxId = em.createQuery(queryMaxId, Integer.class).getSingleResult(); } catch (Exception e) { } return maxId; } private Connection getConnection(DataSource ds) throws Exception { Class.forName("com.mysql.jdbc.Driver"); return DriverManager.getConnection(ds.getMysqlUrl(), ds.getUsername(), ds.getPassword()); } private static String getStringField(String s) { if (s == null) { return "NULL"; } return s; } private static String getIntField(String s) { if (s == null) { return "0"; } return s; } private static String trim(String s) { if (s == null) { return null; } return s.trim(); } private static int getInt(Object obj) { if (obj instanceof String) { String s = (String) obj; if (StringUtils.isBlank(s)) { return 0; } s = s.trim().replace(".", ""); if (NumberUtils.isDigits(s)) { return Integer.parseInt(s); } s = StringUtils.split(s.trim())[0].replace(".", ""); if (NumberUtils.isDigits(s)) { return Integer.parseInt(s); } return 0; } return (int) obj; } }