com.candy.middle.FinReportDownload.java Source code

Java tutorial

Introduction

Here is the source code for com.candy.middle.FinReportDownload.java

Source

/*
 * Copyright (C) 2014 Zhou_Rui
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.candy.middle;

import com.candy.common.CandyUtil;
import com.candy.db.FundamentalDataProc;
import com.candy.db.Xbrl2DisplayProc;
//import static com.candy.middle.FundaDataCache.httpDownload;
import com.candy.xbrl2.XCalcRule;
import com.candy.xbrl2.XPreType;
import com.candy.xbrl2.XReportProc;
import com.candy.xbrl2.XbrlFileSet;
import com.candy.xbrl2.XbrlParser;
import com.candy.xbrl2.XbrlParser.REPORT_TYPE;
import com.candy.xbrl2.XbrlParser.Report;
import com.google.common.collect.Multimap;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javafx.concurrent.Task;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 *
 * @author Zhou Rui <wirelesser at hotmail.com>
 */
public class FinReportDownload extends Task {

    public double getCurrentProgress() {
        return currentProgress;
    }

    public void setCurrentProgress(double currentProgress) {
        this.currentProgress = currentProgress;
    }

    // proc    
    private FundamentalDataProc fdDbProc = FundamentalDataProc.getInstance();
    private ArrayList<String> symbolLst = new ArrayList();
    private HashMap<String, String> xbrlMapping = new HashMap();
    private Xbrl2DisplayProc xbrl2DisplayProc = Xbrl2DisplayProc.getInstance();
    private XbrlFileSet xset = XbrlFileSet.getInstance();
    private XbrlParser xbrlParser = XbrlParser.getInstance();
    private XCalcRule xcalRule = XCalcRule.getInstance();
    // private XReportProc xreportProc = XReportProc.getInstance();
    private XPreType preType = XPreType.getInstance();
    private double currentProgress = 0.0, totalProgress = 0.0;

    public double getTotalProgress() {
        return totalProgress;
    }

    public void setTotalProgress(double totalProgress) {
        this.totalProgress = totalProgress;
    }

    private static String SECURL = "http://www.sec.gov/cgi-bin/browse-edgar?Find=Search&owner=exclude&action=getcompany&type=10%25&dateb=&owner=exclude&start=0&count=100&output=atom&CIK=";

    /**
     * get the company sec filing list
     * @param symbol
     * @return 
     */
    private List<String> getCompany10KFilingList(String symbol) {
        // filing must have both xbrl_href and filing-href        
        try {
            Document doc = Jsoup.connect(SECURL + symbol).ignoreContentType(true).get();
            Elements contents = doc.select("content"); // selector is more powerful
            if (contents != null) {
                ArrayList<String> retLst = new ArrayList();
                for (Element item : contents) {
                    // System.out.println(item);
                    Elements xbrlLinks = item.select("xbrl_href");
                    if (!xbrlLinks.isEmpty()) {
                        Elements filingLinks = item.select("filing-href");
                        if (!filingLinks.isEmpty()) {
                            for (Element link : filingLinks) {
                                String linkText = link.text();
                                retLst.add(linkText);
                            }
                        }
                    }
                }
                return retLst;
            }
            return null;
        } catch (Exception e) {
            return null;
        }
    }

    static public boolean httpDownload(String httpUrl, String saveFile) {
        int bytesum = 0;
        int byteread = 0;

        URL url = null;
        try {
            url = new URL(httpUrl);
        } catch (MalformedURLException e1) {
            // TODO Auto-generated catch block  
            e1.printStackTrace();
            return false;
        }
        try {
            URLConnection conn = url.openConnection();
            InputStream inStream = conn.getInputStream();
            FileOutputStream fs = new FileOutputStream(saveFile);

            byte[] buffer = new byte[1204];
            while ((byteread = inStream.read(buffer)) != -1) {
                bytesum += byteread;
                fs.write(buffer, 0, byteread);
            }
            return true;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
            return false;
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }

    /* format like this
     <table class="tableFile" summary="Data Files">
     <tr>
        <th scope="col" style="width: 5%;"><acronym title="Sequence Number">Seq</acronym></th>
        <th scope="col" style="width: 40%;">Description</th>
        <th scope="col" style="width: 20%;">Document</th>
        <th scope="col" style="width: 10%;">Type</th>
        <th scope="col">Size</th>
     </tr>
     <tr>
        <td scope="row">9</td>
        <td scope="row">EX-101.INS</td>
        <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630.xml">ibm-20090630.xml</a></td>
        <td scope="row">EX-101.INS</td>
        <td scope="row">1567593</td>
     </tr>
     <tr class="blueRow">
        <td scope="row">10</td>
        <td scope="row">EX-101.SCH</td>
        <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630.xsd">ibm-20090630.xsd</a></td>
        <td scope="row">EX-101.SCH</td>
        <td scope="row">17409</td>
     </tr>
     <tr>
        <td scope="row">11</td>
        <td scope="row">EX-101.CAL</td>
        <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_cal.xml">ibm-20090630_cal.xml</a></td>
        <td scope="row">EX-101.CAL</td>
        <td scope="row">56606</td>
     </tr>
     <tr class="blueRow">
        <td scope="row">12</td>
        <td scope="row">EX-101.DEF</td>
        <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_def.xml">ibm-20090630_def.xml</a></td>
        <td scope="row">EX-101.DEF</td>
        <td scope="row">86590</td>
     </tr>
     <tr>
        <td scope="row">13</td>
        <td scope="row">EX-101.LAB</td>
        <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_lab.xml">ibm-20090630_lab.xml</a></td>
        <td scope="row">EX-101.LAB</td>
        <td scope="row">277614</td>
     </tr>
     <tr class="blueRow">
        <td scope="row">14</td>
        <td scope="row">EX-101.PRE</td>
        <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_pre.xml">ibm-20090630_pre.xml</a></td>
        <td scope="row">EX-101.PRE</td>
        <td scope="row">122873</td>
     </tr>
      </table>   
    */
    /*
    * return 
    */
    private Map<String, String> getXbrlFileSet(String url) {
        try {
            Document doc = Jsoup.connect(url).ignoreContentType(true).get();
            // <table class="tableFile" summary="Data Files">
            Elements content = doc.select("table[summary=Data Files]");
            if (content != null) {
                for (Element item : content) {
                    Elements links = item.select("a[href]");
                    if (links != null) {
                        Map<String, String> lst = new HashMap();
                        for (Element link : links) {
                            String linkHref = link.attr("href");
                            String linkText = link.text();
                            lst.put(linkText, linkHref); // filename, url                            
                        }
                        return lst;
                    }
                }
            }
            return null;
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * check if record exist in DB
     * @param fileName
     * @return is rec exist
     */
    //    private boolean isRecInDBbyFileName0(String fileName) {
    //        // ibm-20090630_cal.xml -> ibm, 20090630, cal
    //        String i1 = fileName.replaceAll("-", "_");
    //        String i2 = i1.replaceAll("\\.", "_");
    //        String tokens[] = i2.split("_");
    //        if (tokens.length < 3)
    //            return false;        
    //        String symbol = tokens[0];
    //        String dateStr = tokens[1];        
    //        try {
    //            Date date = CandyUtil.getInstance().YYYYMMDD.parse(dateStr);            
    //            Calendar cal = Calendar.getInstance();
    //            cal.setTime(date);
    //            int year = cal.get(Calendar.YEAR);
    //            int quarter = cal.get(Calendar.MONTH) / 3 + 1;
    //            boolean exist = true;
    //            for (REPORT_TYPE rt : REPORT_TYPE.values()) {
    //                if (rt == REPORT_TYPE.eUNKNOWN)
    //                    continue;
    //                if (!fdDbProc.isDataExist(symbol, rt.ordinal() , year, quarter)) {
    //                    exist = false;break;
    //                }
    //            }
    //            return exist;
    //            
    //        } catch (ParseException e) {
    //            return false;
    //        }
    //    }

    private String verifyXbrlFile(String fileName) {
        // ibm-20090630_cal.xml -> ibm, 20090630, cal
        fileName = fileName.replaceAll("-", "_");
        fileName = fileName.replaceAll("\\.", "_");
        String tokens[] = fileName.split("_");
        if (tokens.length < 2)
            return null;
        // take second token as filename 
        return tokens[1];
    }

    private boolean isRecInDBbyFileName(String symbol, String fileName) {
        boolean exist = true;
        for (REPORT_TYPE rt : REPORT_TYPE.values()) {
            if (rt == REPORT_TYPE.eUNKNOWN)
                continue;
            if (!fdDbProc.isDataExistByFileName(symbol, fileName, rt.ordinal())) {
                exist = false;
                break;
            }
        }
        return exist;
    }

    private void getFundaFromNet(String company) {
        // ("http://www.sec.gov/ +  Archives/edgar/data/51143/000104746914001302/0001047469-14-001302-index.htm");
        List<String> filingLst = getCompany10KFilingList(company);
        double beforePg = 0.2;
        double afterPg = 0.8;
        updateProgress(beforePg);
        if (filingLst != null) {
            // create folder if not exist
            String folder = System.getProperty("user.dir") + "/secfiles";
            Path path = FileSystems.getDefault().getPath(folder);
            if (Files.notExists(path)) {
                File fileDir = new File(folder);
                fileDir.mkdirs();
            }
            // we have filing url, need to check each filing
            int numFiling = filingLst.size();
            int currFiling = 0;
            for (String filingUrl : filingLst) {
                currFiling++;
                updateProgress(beforePg + (afterPg - beforePg) * currFiling / numFiling);
                // key = filename, value = http relative path
                Map<String, String> xbrlLst = getXbrlFileSet(filingUrl);
                // is it in DB?
                if (xbrlLst != null && !xbrlLst.isEmpty()) {
                    // test any file
                    String fileName = xbrlLst.keySet().iterator().next();
                    String fnStr = verifyXbrlFile(fileName);
                    if (fnStr != null) {
                        if (isRecInDBbyFileName(company, fileName)) {
                            System.out.println("WARN - record exist " + fileName);
                            continue;
                        }
                    } else {
                        continue; // invalid filename
                    }

                } else {
                    System.out.println("ERROR - SEC filling url contains empty fileset " + filingUrl);
                    continue;
                }

                // not in db, download from net                
                for (Map.Entry pair : xbrlLst.entrySet()) {
                    String fullPath = folder + "/" + pair.getKey();
                    if (new File(fullPath).canRead()) {
                        System.out.println("INFO - the file " + pair.getValue() + " exist");
                    } else {
                        // download it
                        if (!httpDownload("http://www.sec.gov" + pair.getValue(), fullPath)) {
                            System.out.println("ERROR - unable download " + pair.getValue());
                        } else {
                            System.out.println("DONE - downloaded to " + fullPath);
                        }
                    }
                }
                // verify xbrl files
                xset.reset();
                Multimap<REPORT_TYPE, Report> reports = null;
                boolean validXbrlSet = false;
                for (Map.Entry pair : xbrlLst.entrySet()) {
                    String fullPath = folder + "/" + pair.getKey();
                    if (xset.verifyXbrlFile(fullPath)) {
                        reports = xbrlParser.parse(xset);
                        // save all xbrl mapping to hashmap
                        for (Report rp : reports.values()) {
                            for (XbrlParser.Report.IdNameValue item : rp.getIdNameValues()) {
                                xbrlMapping.put(item.getIdHref(), item.getDisplay());
                            }
                        }
                        writeToDB(company, reports, xset.getDateStr());
                        validXbrlSet = true;
                        break;
                    }
                }
                if (!validXbrlSet) {
                    for (Map.Entry pair : xbrlLst.entrySet()) {
                        System.out.println("ERROR - the xbrl file " + pair.getKey() + " is invalid");
                    }
                }
            } // end for
              // write xbrl2display to DB
            xbrl2DisplayProc.writeMultiRecords(xbrlMapping);
        }
    }

    /**
     * write to database
     * @param reports 
     */
    private void writeToDB(String symbol, Multimap<REPORT_TYPE, Report> reports, String dateStr) {
        for (Report rp : reports.values()) {
            if (!fdDbProc.writeData(symbol, rp.getYear(), rp.getQuarter(), rp.getReportType().ordinal(), dateStr,
                    rp.getNameValues())) {
                System.out.println("ERROR - write financial report to DB (" + rp.getReportTypeStr() + " )");
            }
        }
    }

    /**
     * get the last 4 quarter
     * @param year
     * @param quarter
     * @param yarr
     * @param qarr 
     */
    private void getLast4Quarter(int year, int quarter, int yarr[], int qarr[]) {
        if (quarter == 4) {
            qarr[0] = 4;
            qarr[1] = 3;
            qarr[2] = 2;
            qarr[3] = 1;
            yarr[0] = year;
            yarr[1] = year;
            yarr[2] = year;
            yarr[3] = year;
        } else if (quarter == 3) {
            qarr[0] = 3;
            qarr[1] = 2;
            qarr[2] = 1;
            qarr[3] = 4;
            yarr[0] = year;
            yarr[1] = year;
            yarr[2] = year;
            yarr[3] = year - 1;
        } else if (quarter == 2) {
            qarr[0] = 2;
            qarr[1] = 1;
            qarr[2] = 4;
            qarr[3] = 3;
            yarr[0] = year;
            yarr[1] = year;
            yarr[2] = year - 1;
            yarr[3] = year - 1;
        } else if (quarter == 1) {
            qarr[0] = 1;
            qarr[1] = 4;
            qarr[2] = 3;
            qarr[3] = 2;
            yarr[0] = year;
            yarr[1] = year - 1;
            yarr[2] = year - 1;
            yarr[3] = year - 1;
        }
    }

    /**
     * usually e10k report doesn't have full last Q data,have to calculate ourselves
     * @param symbol
     * @param lastQOffset 
     */
    private void calculateLastQ(String symbol, int lastQOffset) {
        ArrayList<FundamentalDataProc.FundamentalDataRec> lastQLst = fdDbProc.getLastQData(symbol, lastQOffset);
        // UpdateHandler uh = fdDbProc.getLastQ(symbol,lastQOffset);
        // ArrayList<FundamentalDataProc.FundamentalDataRec> lastQLst = uh.fdrLst;
        if (lastQLst == null || lastQLst.isEmpty())
            return;
        int qarr[] = new int[4];
        int yarr[] = new int[4];
        for (FundamentalDataProc.FundamentalDataRec rec4Q : lastQLst) {
            getLast4Quarter(rec4Q.getYear(), rec4Q.getQuarter(), yarr, qarr);
            // get previous 3  quarter data from db
            // should have a API to retrieve N number of data prior to / start from
            ArrayList<FundamentalDataProc.FundamentalDataRec> prev3QLst = new ArrayList();
            for (int i = 1; i < 4; i++) {
                FundamentalDataProc.FundamentalDataRec rec = fdDbProc.readDataByDate(symbol, rec4Q.getType(),
                        yarr[i], qarr[i]);
                if (rec != null)
                    prev3QLst.add(rec);
                else {
                    System.out.println(
                            "ERROR - no previous quarter found" + yarr[i] + "," + qarr[i] + "," + rec4Q.getType());
                }
            }
            if (prev3QLst.size() != 3)
                continue;
            // get annual data
            // get yearly / quarter data respectly
            FundamentalDataProc.FundamentalDataRec recAnnual = fdDbProc.readDataByDate(symbol, rec4Q.getType(),
                    rec4Q.getYear(), 0);

            if (recAnnual != null && rec4Q != null) { //TODO delete rec4Q
                LinkedHashMap<String, Double> annualItems = recAnnual.getNameValues();
                LinkedHashMap<String, Double> q4Items = rec4Q.getNameValues();
                for (Map.Entry<String, Double> pair : annualItems.entrySet()) {
                    String nameAnnual = pair.getKey();
                    Double valAnnual = pair.getValue();
                    Double val4Q = rec4Q.getDataByName(nameAnnual);
                    if (valAnnual == null) {
                        continue;
                    }
                    // if there is data in 4Q then we don't calculat eagain
                    if (val4Q != null) {
                        //q4Items.put(nameAnnual, val4Q);
                        continue;
                    }

                    // has 4Q data?
                    // TODO directly write to 4Q structure
                    if (q4Items.get(nameAnnual) == null) {
                        double val3Q = 0.0;
                        boolean haveAll3Q = true;
                        LABEL_CALC_3Q: for (FundamentalDataProc.FundamentalDataRec rec : prev3QLst) {
                            Double qVal = rec.getDataByName(nameAnnual);
                            if (qVal == null) {
                                // one quarter data is missing
                                haveAll3Q = false;
                                break LABEL_CALC_3Q;
                            } else {
                                val3Q += qVal;
                            }
                        }
                        if (haveAll3Q) {
                            // we got all other 3 quarters data                        
                            if (xcalRule.checkRule(nameAnnual) == XCalcRule.CALC_RULE.eSUBTRACT)
                                q4Items.put(nameAnnual, valAnnual - val3Q);
                            else
                                q4Items.put(nameAnnual, valAnnual);
                        }
                    }
                }
                // debug
                // System.out.println();
                for (Map.Entry<String, Double> entry : q4Items.entrySet()) {
                    String name = entry.getKey();
                    String display = xbrl2DisplayProc.readRecord(name);
                    String outputStr = String.format("%80s -->", display);
                    for (FundamentalDataProc.FundamentalDataRec rec : prev3QLst) {
                        outputStr = String.format("%s%s,", outputStr, rec.getDataByName(name));
                    }
                    outputStr = String.format("%s%s --> %s,%s", outputStr, entry.getValue(),
                            rec4Q.getDataByName(name), recAnnual.getDataByName(name));
                    System.out.print(outputStr);
                    System.out.print("\n");
                }
                // update back to db
                fdDbProc.deleteQData(symbol, rec4Q.getYear(), rec4Q.getQuarter() + lastQOffset, rec4Q.getType());
                fdDbProc.writeData(symbol, rec4Q.getYear(), rec4Q.getQuarter(), rec4Q.getType(), xset.getDateStr(),
                        rec4Q.getNameValues());

            } else {
                System.out.println("ERROR - Annual or 4Q rec is null!");
            }
        }

        // fdDbProc.updateLastQ(uh);
    }

    /**
     * download symbol
     * @param symbol 
     */
    private void download(String symbol) {
        // get current year and previous quarter
        Calendar c = Calendar.getInstance();
        int quarter = c.get(Calendar.MONTH) / 3 + 1;
        int year = c.get(Calendar.YEAR);
        // now - 2014,Q3, check if 2014 Q2 existed
        if (quarter == 1) {
            quarter = 4;
            year = year - 1;
        } else {
            quarter = quarter - 1;
        }
        // process all lastQ data
        xbrlMapping.clear();
        preType.loadTypeDefFile(System.getProperty("user.dir") + GlobalConfig.XBRL_TYPE_RULE_FILE);
        // xreportProc.loadXbrlTypeRuleFile(System.getProperty("user.dir") + GlobalConfig.XBRL_TYPE_RULE_FILE);
        calculateLastQ(symbol, xbrlParser.getLastQOffset());
        updateProgress(0.1);
        getFundaFromNet(symbol);
        updateProgress(0.8);
        calculateLastQ(symbol, xbrlParser.getLastQOffset());
        updateProgress(1.0);
    }

    /**
     * update the progress
     * @param inc 
     */
    private void updateProgress(double inc) {
        this.updateProgress(this.getCurrentProgress() + inc, this.getTotalProgress());
    }

    @Override
    public Object call() throws Exception {
        this.setTotalProgress((double) symbolLst.size());
        for (int i = 0; i < symbolLst.size(); i++) {
            String symbol = symbolLst.get(i);
            this.setCurrentProgress((double) i);
            download(symbol);
        }
        updateMessage("DONE");
        return true;
    }

    /**
     * copy the symbol list into local list
     * @param symbolLst 
     */
    public void initSymbolLst(ArrayList<String> symbolLst) {
        this.symbolLst.clear();
        this.symbolLst.addAll(symbolLst);
    }

    public void initSymbol(String symbol) {
        this.symbolLst.clear();
        this.symbolLst.add(symbol);
    }

    public void startTask() {
        new Thread(this).start();
    }
}