siddur.solidtrust.bi.data.DataPump.java Source code

Java tutorial

Introduction

Here is the source code for siddur.solidtrust.bi.data.DataPump.java

Source

package siddur.solidtrust.bi.data;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.MessageFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import siddur.solidtrust.FreePersister;
import siddur.solidtrust.bi.entity.CarInfo;
import siddur.solidtrust.bi.entity.CurrencyRate;
import siddur.solidtrust.bi.entity.DataSource;
import siddur.solidtrust.bi.entity.WebSource;
import siddur.solidtrust.bi.entity.WebSourceID;
import siddur.solidtrust.scrape.ScrapeLog;

@Service
public class DataPump {
    private static final Logger log4j = Logger.getLogger(DataPump.class);

    protected static final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    private List<WebSource> webSources;
    @PersistenceContext
    private EntityManager em;
    @Autowired
    private FreePersister persister;
    private String yesterday; //the last past 24 hours
    private Map<String, Float> currencyRates = new HashMap<>();

    public void pump(String tablename) {
        webSources = em.createQuery("from WebSource", WebSource.class).getResultList();
        List<CurrencyRate> rates = em.createQuery("from CurrencyRate", CurrencyRate.class).getResultList();
        for (CurrencyRate r : rates) {
            currencyRates.put(r.getCurrency(), r.getRate());
        }

        Date d = new Date();
        d = DateUtils.addDays(DateUtils.truncate(d, Calendar.DATE), -1);
        yesterday = sdf.format(d);

        for (WebSource webSource : webSources) {
            if (tablename == null || tablename.equalsIgnoreCase(webSource.getTablename())) {
                log4j.info("Start to synchronize " + webSource.getTablename());
                int total = pumpEach(webSource);
                log4j.info("Synchronized " + total + " records");
            }
        }
    }

    private int pumpEach(WebSource webSource) {
        int maxId = maxId(webSource);

        ScrapeLog log = new ScrapeLog();
        log.setTablename(webSource.getTablename());
        log.setStartAt(new Date());
        log.setStartId(maxId);
        persister.save(log);

        int total = 0;
        Connection conn = null;
        try {
            conn = getConnection(webSource.getDataSource());
            log4j.info("[" + webSource.getTablename() + "]Last sync: " + maxId);
            String baseSql = buildBaseSql(webSource);
            log4j.info(baseSql);
            while (true) {
                Statement stmt = conn.createStatement();
                ResultSet result = stmt.executeQuery(buildSql(baseSql, maxId, 1000));
                if (result.last()) {
                    maxId = result.getInt("ID");
                    result.first();
                    log4j.info("Fetch from " + result.getInt("ID"));
                    result.beforeFirst();
                } else {
                    break;
                }
                List<CarInfo> list = getCars(result, webSource.getTablename());
                stmt.close();
                if (!list.isEmpty()) {
                    total += list.size();
                    persister.saveBatch(list);
                }
            }
            log4j.info("[" + webSource.getTablename() + "]Finish sync: " + total);

            //the last past 24 hours
            List<CarInfo> list = getDateRemoved(webSource, conn);
            if (list != null) {
                int s = list.size();
                int r = s % 1000;
                int x = (s - r) / 1000;
                for (int i = 0; i < x; i++) {
                    persister.saveBatch(list.subList(i * 1000, (i + 1) * 1000));
                    log4j.info("Update removed records:" + 1000);
                }

                if (x * 1000 != list.size()) {
                    persister.saveBatch(list.subList(x * 1000, s));
                    log4j.info("Update removed records:" + (s - x * 1000));
                }

                log4j.info("[" + webSource.getTablename() + "]Updated removed records: " + s);
            }

        } catch (Exception e1) {
            log.setError(e1.getMessage());
            log4j.warn(e1.getMessage(), e1);
        } finally {
            if (conn != null) {
                try {
                    conn.close();
                } catch (SQLException e) {
                    log4j.warn(e.getMessage(), e);
                }
            }
        }

        log.setEndAt(new Date());
        log.setEndId(maxId);
        log.setAmount(total);
        persister.merge(log);

        try {
            sort();
        } catch (Exception e) {
            log4j.warn(e.getMessage(), e);
        }
        return total;
    }

    public void sort() throws Exception {
        log4j.info("Start to sort");
        log4j.info("Finish sort");
    }

    private List<CarInfo> getDateRemoved(WebSource webSource, Connection conn) {
        log4j.info("start to fetch sold cars from " + yesterday);
        String sql = MessageFormat.format("select id, {1} from {0} where {1} > \"{2}\"", webSource.getTablename(),
                webSource.getDateRemoved(), yesterday);
        Statement stmt = null;
        try {
            stmt = conn.createStatement();
            ResultSet result = stmt.executeQuery(sql);
            List<CarInfo> list = new ArrayList<CarInfo>();
            while (result.next()) {
                CarInfo item = em.find(CarInfo.class, new WebSourceID(result.getInt(1), webSource.getTablename()));
                if (item != null) {
                    try {
                        item.setDateRemoved(result.getString(2).substring(0, 10));
                        list.add(item);
                    } catch (Exception e) {
                        log4j.error(result.getInt(1) + " " + result.getString(2));
                        log4j.error(e.getMessage(), e);
                    }
                }
            }
            log4j.info("fetch " + list.size() + " sold cars");
            return list;
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (stmt != null) {
                try {
                    stmt.close();
                } catch (SQLException e) {
                }
            }
        }

        return null;
    }

    public List<CarInfo> getCars(ResultSet result, String tablename) throws SQLException {
        int count = 0;

        List<CarInfo> list = new ArrayList<CarInfo>();
        while (result.next()) {
            String brand = trim(result.getString(2));
            String model = trim(result.getString(3));
            if (StringUtils.isEmpty(brand) || StringUtils.isEmpty(model)) {
                continue;
            }

            DataWrapper item = new DataWrapper(currencyRates);
            item.setTablename(tablename);
            count++;
            item.setId(result.getInt(1));
            item.setBrand(brand);
            item.setModel(model);
            item.setType(trim(result.getString(4)));
            item.setBuild(getInt(result.getObject(5)));
            item.setArrangement(trim(result.getString(6)));

            int price = getInt(result.getObject(7));
            String currency = trim(result.getString(8));
            item.setPrice(price, currency);

            item.setMileage(getInt(result.getObject(9)));
            item.setEngineSize(getInt(result.getObject(10)));
            item.setFuelType(trim(result.getString(11)));
            item.setColor(trim(result.getString(12)));

            try {
                item.setPostDate(result.getString(13).substring(0, 10));
            } catch (Exception e1) {
            }
            try {
                item.setDateRemoved(result.getString(14).substring(0, 10));
            } catch (Exception e) {
            }
            try {
                item.setDateScraped(result.getString(15).substring(0, 10));
            } catch (Exception e1) {
            }

            item.setLicensePlate(trim(result.getString(16)));
            list.add(item.getCar());
        }

        log4j.info("Syschronized " + count + " records");
        return list;
    }

    private String buildBaseSql(WebSource webSource) {
        String ql = "SELECT id, {0} brand, {1} model, {2} type, " + "{3} build, {4} arragement, {5} price, "
                + "{6} currency, {7} mileage, {8} engineSize, " + "{9} fuelType, {10} color, {11} sellerType, "
                + "{12} postDate, {13} dateRemoved, {14} dateScraped, " + "{15} licensePlate";

        StringBuilder sql = new StringBuilder();
        sql.append(MessageFormat.format(ql, webSource.getBrand(), webSource.getModel(),
                getStringField(webSource.getType()), webSource.getBuildYear(),
                getStringField(webSource.getArrangement()), webSource.getPrice(),
                getStringField(webSource.getCurrency()), webSource.getMileage(), webSource.getEngineSize(),
                getStringField(webSource.getFuelType()), getStringField(webSource.getColor()),
                getIntField(webSource.getSellerType()), getStringField(webSource.getPostDate()),
                webSource.getDateRemoved(), webSource.getDateScraped(),
                getStringField(webSource.getLicensePlate())));

        sql.append(" FROM " + webSource.getTablename() + " c");
        return sql.toString();
    }

    private String buildSql(String sql, int startID, int limit) {

        sql += (" WHERE c.id > ");
        sql += (startID);
        sql += (" LIMIT " + limit);
        return sql.toString();
    }

    private int maxId(WebSource webSource) {
        String queryMaxId = "select max(c.id) from CarInfo c where c.tablename = '" + webSource.getTablename()
                + "'";
        int maxId = 0;
        try {
            maxId = em.createQuery(queryMaxId, Integer.class).getSingleResult();
        } catch (Exception e) {
        }

        return maxId;
    }

    private Connection getConnection(DataSource ds) throws Exception {
        Class.forName("com.mysql.jdbc.Driver");
        return DriverManager.getConnection(ds.getMysqlUrl(), ds.getUsername(), ds.getPassword());
    }

    private static String getStringField(String s) {
        if (s == null) {
            return "NULL";
        }
        return s;
    }

    private static String getIntField(String s) {
        if (s == null) {
            return "0";
        }
        return s;
    }

    private static String trim(String s) {
        if (s == null) {
            return null;
        }
        return s.trim();
    }

    private static int getInt(Object obj) {
        if (obj instanceof String) {
            String s = (String) obj;
            if (StringUtils.isBlank(s)) {
                return 0;
            }
            s = s.trim().replace(".", "");
            if (NumberUtils.isDigits(s)) {
                return Integer.parseInt(s);
            }
            s = StringUtils.split(s.trim())[0].replace(".", "");
            if (NumberUtils.isDigits(s)) {
                return Integer.parseInt(s);
            }
            return 0;

        }
        return (int) obj;
    }

}