Java tutorial
/* * Copyright (C) 2014 Zhou_Rui * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.candy.middle; import com.candy.common.CandyUtil; import com.candy.db.FundamentalDataProc; import com.candy.db.Xbrl2DisplayProc; //import static com.candy.middle.FundaDataCache.httpDownload; import com.candy.xbrl2.XCalcRule; import com.candy.xbrl2.XPreType; import com.candy.xbrl2.XReportProc; import com.candy.xbrl2.XbrlFileSet; import com.candy.xbrl2.XbrlParser; import com.candy.xbrl2.XbrlParser.REPORT_TYPE; import com.candy.xbrl2.XbrlParser.Report; import com.google.common.collect.Multimap; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; import java.text.ParseException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import javafx.concurrent.Task; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * * @author Zhou Rui <wirelesser at hotmail.com> */ public class FinReportDownload extends Task { public double getCurrentProgress() { return currentProgress; } public void setCurrentProgress(double currentProgress) { this.currentProgress = currentProgress; } // proc private FundamentalDataProc fdDbProc = FundamentalDataProc.getInstance(); private ArrayList<String> symbolLst = new ArrayList(); private HashMap<String, String> xbrlMapping = new HashMap(); private Xbrl2DisplayProc xbrl2DisplayProc = Xbrl2DisplayProc.getInstance(); private XbrlFileSet xset = XbrlFileSet.getInstance(); private XbrlParser xbrlParser = XbrlParser.getInstance(); private XCalcRule xcalRule = XCalcRule.getInstance(); // private XReportProc xreportProc = XReportProc.getInstance(); private XPreType preType = XPreType.getInstance(); private double currentProgress = 0.0, totalProgress = 0.0; public double getTotalProgress() { return totalProgress; } public void setTotalProgress(double totalProgress) { this.totalProgress = totalProgress; } private static String SECURL = "http://www.sec.gov/cgi-bin/browse-edgar?Find=Search&owner=exclude&action=getcompany&type=10%25&dateb=&owner=exclude&start=0&count=100&output=atom&CIK="; /** * get the company sec filing list * @param symbol * @return */ private List<String> getCompany10KFilingList(String symbol) { // filing must have both xbrl_href and filing-href try { Document doc = Jsoup.connect(SECURL + symbol).ignoreContentType(true).get(); Elements contents = doc.select("content"); // selector is more powerful if (contents != null) { ArrayList<String> retLst = new ArrayList(); for (Element item : contents) { // System.out.println(item); Elements xbrlLinks = item.select("xbrl_href"); if (!xbrlLinks.isEmpty()) { Elements filingLinks = item.select("filing-href"); if (!filingLinks.isEmpty()) { for (Element link : filingLinks) { String linkText = link.text(); retLst.add(linkText); } } } } return retLst; } return null; } catch (Exception e) { return null; } } static public boolean httpDownload(String httpUrl, String saveFile) { int bytesum = 0; int byteread = 0; URL url = null; try { url = new URL(httpUrl); } catch (MalformedURLException e1) { // TODO Auto-generated catch block e1.printStackTrace(); return false; } try { URLConnection conn = url.openConnection(); InputStream inStream = conn.getInputStream(); FileOutputStream fs = new FileOutputStream(saveFile); byte[] buffer = new byte[1204]; while ((byteread = inStream.read(buffer)) != -1) { bytesum += byteread; fs.write(buffer, 0, byteread); } return true; } catch (FileNotFoundException e) { e.printStackTrace(); return false; } catch (IOException e) { e.printStackTrace(); return false; } } /* format like this <table class="tableFile" summary="Data Files"> <tr> <th scope="col" style="width: 5%;"><acronym title="Sequence Number">Seq</acronym></th> <th scope="col" style="width: 40%;">Description</th> <th scope="col" style="width: 20%;">Document</th> <th scope="col" style="width: 10%;">Type</th> <th scope="col">Size</th> </tr> <tr> <td scope="row">9</td> <td scope="row">EX-101.INS</td> <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630.xml">ibm-20090630.xml</a></td> <td scope="row">EX-101.INS</td> <td scope="row">1567593</td> </tr> <tr class="blueRow"> <td scope="row">10</td> <td scope="row">EX-101.SCH</td> <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630.xsd">ibm-20090630.xsd</a></td> <td scope="row">EX-101.SCH</td> <td scope="row">17409</td> </tr> <tr> <td scope="row">11</td> <td scope="row">EX-101.CAL</td> <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_cal.xml">ibm-20090630_cal.xml</a></td> <td scope="row">EX-101.CAL</td> <td scope="row">56606</td> </tr> <tr class="blueRow"> <td scope="row">12</td> <td scope="row">EX-101.DEF</td> <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_def.xml">ibm-20090630_def.xml</a></td> <td scope="row">EX-101.DEF</td> <td scope="row">86590</td> </tr> <tr> <td scope="row">13</td> <td scope="row">EX-101.LAB</td> <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_lab.xml">ibm-20090630_lab.xml</a></td> <td scope="row">EX-101.LAB</td> <td scope="row">277614</td> </tr> <tr class="blueRow"> <td scope="row">14</td> <td scope="row">EX-101.PRE</td> <td scope="row"><a href="/Archives/edgar/data/51143/000110465909045198/ibm-20090630_pre.xml">ibm-20090630_pre.xml</a></td> <td scope="row">EX-101.PRE</td> <td scope="row">122873</td> </tr> </table> */ /* * return */ private Map<String, String> getXbrlFileSet(String url) { try { Document doc = Jsoup.connect(url).ignoreContentType(true).get(); // <table class="tableFile" summary="Data Files"> Elements content = doc.select("table[summary=Data Files]"); if (content != null) { for (Element item : content) { Elements links = item.select("a[href]"); if (links != null) { Map<String, String> lst = new HashMap(); for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text(); lst.put(linkText, linkHref); // filename, url } return lst; } } } return null; } catch (Exception e) { return null; } } /** * check if record exist in DB * @param fileName * @return is rec exist */ // private boolean isRecInDBbyFileName0(String fileName) { // // ibm-20090630_cal.xml -> ibm, 20090630, cal // String i1 = fileName.replaceAll("-", "_"); // String i2 = i1.replaceAll("\\.", "_"); // String tokens[] = i2.split("_"); // if (tokens.length < 3) // return false; // String symbol = tokens[0]; // String dateStr = tokens[1]; // try { // Date date = CandyUtil.getInstance().YYYYMMDD.parse(dateStr); // Calendar cal = Calendar.getInstance(); // cal.setTime(date); // int year = cal.get(Calendar.YEAR); // int quarter = cal.get(Calendar.MONTH) / 3 + 1; // boolean exist = true; // for (REPORT_TYPE rt : REPORT_TYPE.values()) { // if (rt == REPORT_TYPE.eUNKNOWN) // continue; // if (!fdDbProc.isDataExist(symbol, rt.ordinal() , year, quarter)) { // exist = false;break; // } // } // return exist; // // } catch (ParseException e) { // return false; // } // } private String verifyXbrlFile(String fileName) { // ibm-20090630_cal.xml -> ibm, 20090630, cal fileName = fileName.replaceAll("-", "_"); fileName = fileName.replaceAll("\\.", "_"); String tokens[] = fileName.split("_"); if (tokens.length < 2) return null; // take second token as filename return tokens[1]; } private boolean isRecInDBbyFileName(String symbol, String fileName) { boolean exist = true; for (REPORT_TYPE rt : REPORT_TYPE.values()) { if (rt == REPORT_TYPE.eUNKNOWN) continue; if (!fdDbProc.isDataExistByFileName(symbol, fileName, rt.ordinal())) { exist = false; break; } } return exist; } private void getFundaFromNet(String company) { // ("http://www.sec.gov/ + Archives/edgar/data/51143/000104746914001302/0001047469-14-001302-index.htm"); List<String> filingLst = getCompany10KFilingList(company); double beforePg = 0.2; double afterPg = 0.8; updateProgress(beforePg); if (filingLst != null) { // create folder if not exist String folder = System.getProperty("user.dir") + "/secfiles"; Path path = FileSystems.getDefault().getPath(folder); if (Files.notExists(path)) { File fileDir = new File(folder); fileDir.mkdirs(); } // we have filing url, need to check each filing int numFiling = filingLst.size(); int currFiling = 0; for (String filingUrl : filingLst) { currFiling++; updateProgress(beforePg + (afterPg - beforePg) * currFiling / numFiling); // key = filename, value = http relative path Map<String, String> xbrlLst = getXbrlFileSet(filingUrl); // is it in DB? if (xbrlLst != null && !xbrlLst.isEmpty()) { // test any file String fileName = xbrlLst.keySet().iterator().next(); String fnStr = verifyXbrlFile(fileName); if (fnStr != null) { if (isRecInDBbyFileName(company, fileName)) { System.out.println("WARN - record exist " + fileName); continue; } } else { continue; // invalid filename } } else { System.out.println("ERROR - SEC filling url contains empty fileset " + filingUrl); continue; } // not in db, download from net for (Map.Entry pair : xbrlLst.entrySet()) { String fullPath = folder + "/" + pair.getKey(); if (new File(fullPath).canRead()) { System.out.println("INFO - the file " + pair.getValue() + " exist"); } else { // download it if (!httpDownload("http://www.sec.gov" + pair.getValue(), fullPath)) { System.out.println("ERROR - unable download " + pair.getValue()); } else { System.out.println("DONE - downloaded to " + fullPath); } } } // verify xbrl files xset.reset(); Multimap<REPORT_TYPE, Report> reports = null; boolean validXbrlSet = false; for (Map.Entry pair : xbrlLst.entrySet()) { String fullPath = folder + "/" + pair.getKey(); if (xset.verifyXbrlFile(fullPath)) { reports = xbrlParser.parse(xset); // save all xbrl mapping to hashmap for (Report rp : reports.values()) { for (XbrlParser.Report.IdNameValue item : rp.getIdNameValues()) { xbrlMapping.put(item.getIdHref(), item.getDisplay()); } } writeToDB(company, reports, xset.getDateStr()); validXbrlSet = true; break; } } if (!validXbrlSet) { for (Map.Entry pair : xbrlLst.entrySet()) { System.out.println("ERROR - the xbrl file " + pair.getKey() + " is invalid"); } } } // end for // write xbrl2display to DB xbrl2DisplayProc.writeMultiRecords(xbrlMapping); } } /** * write to database * @param reports */ private void writeToDB(String symbol, Multimap<REPORT_TYPE, Report> reports, String dateStr) { for (Report rp : reports.values()) { if (!fdDbProc.writeData(symbol, rp.getYear(), rp.getQuarter(), rp.getReportType().ordinal(), dateStr, rp.getNameValues())) { System.out.println("ERROR - write financial report to DB (" + rp.getReportTypeStr() + " )"); } } } /** * get the last 4 quarter * @param year * @param quarter * @param yarr * @param qarr */ private void getLast4Quarter(int year, int quarter, int yarr[], int qarr[]) { if (quarter == 4) { qarr[0] = 4; qarr[1] = 3; qarr[2] = 2; qarr[3] = 1; yarr[0] = year; yarr[1] = year; yarr[2] = year; yarr[3] = year; } else if (quarter == 3) { qarr[0] = 3; qarr[1] = 2; qarr[2] = 1; qarr[3] = 4; yarr[0] = year; yarr[1] = year; yarr[2] = year; yarr[3] = year - 1; } else if (quarter == 2) { qarr[0] = 2; qarr[1] = 1; qarr[2] = 4; qarr[3] = 3; yarr[0] = year; yarr[1] = year; yarr[2] = year - 1; yarr[3] = year - 1; } else if (quarter == 1) { qarr[0] = 1; qarr[1] = 4; qarr[2] = 3; qarr[3] = 2; yarr[0] = year; yarr[1] = year - 1; yarr[2] = year - 1; yarr[3] = year - 1; } } /** * usually e10k report doesn't have full last Q data,have to calculate ourselves * @param symbol * @param lastQOffset */ private void calculateLastQ(String symbol, int lastQOffset) { ArrayList<FundamentalDataProc.FundamentalDataRec> lastQLst = fdDbProc.getLastQData(symbol, lastQOffset); // UpdateHandler uh = fdDbProc.getLastQ(symbol,lastQOffset); // ArrayList<FundamentalDataProc.FundamentalDataRec> lastQLst = uh.fdrLst; if (lastQLst == null || lastQLst.isEmpty()) return; int qarr[] = new int[4]; int yarr[] = new int[4]; for (FundamentalDataProc.FundamentalDataRec rec4Q : lastQLst) { getLast4Quarter(rec4Q.getYear(), rec4Q.getQuarter(), yarr, qarr); // get previous 3 quarter data from db // should have a API to retrieve N number of data prior to / start from ArrayList<FundamentalDataProc.FundamentalDataRec> prev3QLst = new ArrayList(); for (int i = 1; i < 4; i++) { FundamentalDataProc.FundamentalDataRec rec = fdDbProc.readDataByDate(symbol, rec4Q.getType(), yarr[i], qarr[i]); if (rec != null) prev3QLst.add(rec); else { System.out.println( "ERROR - no previous quarter found" + yarr[i] + "," + qarr[i] + "," + rec4Q.getType()); } } if (prev3QLst.size() != 3) continue; // get annual data // get yearly / quarter data respectly FundamentalDataProc.FundamentalDataRec recAnnual = fdDbProc.readDataByDate(symbol, rec4Q.getType(), rec4Q.getYear(), 0); if (recAnnual != null && rec4Q != null) { //TODO delete rec4Q LinkedHashMap<String, Double> annualItems = recAnnual.getNameValues(); LinkedHashMap<String, Double> q4Items = rec4Q.getNameValues(); for (Map.Entry<String, Double> pair : annualItems.entrySet()) { String nameAnnual = pair.getKey(); Double valAnnual = pair.getValue(); Double val4Q = rec4Q.getDataByName(nameAnnual); if (valAnnual == null) { continue; } // if there is data in 4Q then we don't calculat eagain if (val4Q != null) { //q4Items.put(nameAnnual, val4Q); continue; } // has 4Q data? // TODO directly write to 4Q structure if (q4Items.get(nameAnnual) == null) { double val3Q = 0.0; boolean haveAll3Q = true; LABEL_CALC_3Q: for (FundamentalDataProc.FundamentalDataRec rec : prev3QLst) { Double qVal = rec.getDataByName(nameAnnual); if (qVal == null) { // one quarter data is missing haveAll3Q = false; break LABEL_CALC_3Q; } else { val3Q += qVal; } } if (haveAll3Q) { // we got all other 3 quarters data if (xcalRule.checkRule(nameAnnual) == XCalcRule.CALC_RULE.eSUBTRACT) q4Items.put(nameAnnual, valAnnual - val3Q); else q4Items.put(nameAnnual, valAnnual); } } } // debug // System.out.println(); for (Map.Entry<String, Double> entry : q4Items.entrySet()) { String name = entry.getKey(); String display = xbrl2DisplayProc.readRecord(name); String outputStr = String.format("%80s -->", display); for (FundamentalDataProc.FundamentalDataRec rec : prev3QLst) { outputStr = String.format("%s%s,", outputStr, rec.getDataByName(name)); } outputStr = String.format("%s%s --> %s,%s", outputStr, entry.getValue(), rec4Q.getDataByName(name), recAnnual.getDataByName(name)); System.out.print(outputStr); System.out.print("\n"); } // update back to db fdDbProc.deleteQData(symbol, rec4Q.getYear(), rec4Q.getQuarter() + lastQOffset, rec4Q.getType()); fdDbProc.writeData(symbol, rec4Q.getYear(), rec4Q.getQuarter(), rec4Q.getType(), xset.getDateStr(), rec4Q.getNameValues()); } else { System.out.println("ERROR - Annual or 4Q rec is null!"); } } // fdDbProc.updateLastQ(uh); } /** * download symbol * @param symbol */ private void download(String symbol) { // get current year and previous quarter Calendar c = Calendar.getInstance(); int quarter = c.get(Calendar.MONTH) / 3 + 1; int year = c.get(Calendar.YEAR); // now - 2014,Q3, check if 2014 Q2 existed if (quarter == 1) { quarter = 4; year = year - 1; } else { quarter = quarter - 1; } // process all lastQ data xbrlMapping.clear(); preType.loadTypeDefFile(System.getProperty("user.dir") + GlobalConfig.XBRL_TYPE_RULE_FILE); // xreportProc.loadXbrlTypeRuleFile(System.getProperty("user.dir") + GlobalConfig.XBRL_TYPE_RULE_FILE); calculateLastQ(symbol, xbrlParser.getLastQOffset()); updateProgress(0.1); getFundaFromNet(symbol); updateProgress(0.8); calculateLastQ(symbol, xbrlParser.getLastQOffset()); updateProgress(1.0); } /** * update the progress * @param inc */ private void updateProgress(double inc) { this.updateProgress(this.getCurrentProgress() + inc, this.getTotalProgress()); } @Override public Object call() throws Exception { this.setTotalProgress((double) symbolLst.size()); for (int i = 0; i < symbolLst.size(); i++) { String symbol = symbolLst.get(i); this.setCurrentProgress((double) i); download(symbol); } updateMessage("DONE"); return true; } /** * copy the symbol list into local list * @param symbolLst */ public void initSymbolLst(ArrayList<String> symbolLst) { this.symbolLst.clear(); this.symbolLst.addAll(symbolLst); } public void initSymbol(String symbol) { this.symbolLst.clear(); this.symbolLst.add(symbol); } public void startTask() { new Thread(this).start(); } }