Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.webfront.app.utils; import com.webfront.bean.LedgerManager; import com.webfront.model.Config; import com.webfront.model.Ledger; import com.webfront.model.LedgerEntry; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.text.ParseException; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; import org.jdom2.Content; import org.jdom2.DataConversionException; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.input.SAXBuilder; /** * * @author rlittle */ public class PDFImporter extends Importer { protected String fileName; SAXBuilder jdomBuilder; Document xmlDoc; private String txtOutput; private final Config cfg = Config.getInstance(); private BufferedReader txtReader; Float lastBalance; Integer currentLine; class ElementNotFoundException extends Exception { public ElementNotFoundException(String msg) { super(msg); } }; ArrayList<LedgerEntry> entries; HashMap<Integer, String> buffer; HashMap<String, Integer> markers; TreeSet sectionMarks; public PDFImporter(String fileName, int accountId) { super(fileName, accountId); this.fileName = fileName; entries = new ArrayList<>(); lastBalance = new Float(0.0); buffer = new HashMap<>(); markers = new HashMap<>(); currentLine = 0; LedgerManager mgr = new LedgerManager(); int lastId = mgr.getLastId(accountId); if (lastId > 0) { Ledger item = mgr.getItem(lastId); if (item != null) { lastBalance += item.getTransBal(); } } } @Override public void doImport(BufferedReader reader) throws IOException, ParseException { PDFTextStripper pdfStripper = new PDFTextStripper(); BufferedInputStream inStream = new BufferedInputStream(new FileInputStream(fileName)); try (PDDocument document = PDDocument.load(inStream)) { txtOutput = pdfStripper.getText(document); try (FileWriter writer = new FileWriter(cfg.getTmpDir() + cfg.getFileSep() + "pdfOut.txt")) { writer.write(txtOutput); writer.close(); txtReader = new BufferedReader(new FileReader(cfg.getTmpDir() + cfg.getFileSep() + "pdfOut.txt")); String text = ""; while (text != null) { text = txtReader.readLine(); buffer.put(currentLine++, text); } } getConfig(); currentLine = 0; Element root = xmlDoc.getRootElement(); int maxLines = buffer.size() - 3; int markedLine = 0; // Scan the output and mark the start of each section for (Element el : root.getChildren()) { for (Element section : el.getChildren()) { String sectionName = section.getAttributeValue("content"); Element startElement = section.getChild("start"); Element endElement = section.getChild("end"); if (startElement != null) { boolean endHasBounds = true; if (endElement.getAttribute("bounded") != null) { String bounds = endElement.getAttributeValue("bounded"); if (bounds.equals("false")) { endHasBounds = false; } } Pattern linePattern = Pattern.compile(startElement.getText()); String text = ""; boolean elementFound = false; while (currentLine < maxLines) { text = buffer.get(currentLine++); if (linePattern.matcher(text).matches()) { markedLine = currentLine - 1; markers.put(sectionName, markedLine); elementFound = true; if (!endHasBounds) { currentLine--; } break; } } if (!elementFound) { currentLine = markedLine; } } } } ArrayList<Integer> lineNumbers = new ArrayList<>(markers.values()); lineNumbers.sort(new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return o1.compareTo(o2); } }); sectionMarks = new TreeSet(markers.values()); currentLine = 0; for (Element element : root.getChildren()) { int lines = 0; if (element.getAttribute("lines") != null) { lines = element.getAttribute("lines").getIntValue(); } for (Element section : element.getChildren()) { String contentDesc; contentDesc = (section.getAttribute("content") == null ? "" : section.getAttributeValue("content")); if (markers.containsKey(contentDesc)) { currentLine = markers.get(contentDesc); processSection(section); } } } } catch (DataConversionException ex) { Logger.getLogger(PDFImporter.class.getName()).log(Level.SEVERE, null, ex); } catch (ElementNotFoundException ex) { Logger.getLogger(PDFImporter.class.getName()).log(Level.SEVERE, null, ex); } entries.sort(LedgerEntry.LedgerComparator); for (LedgerEntry item : entries) { java.util.Date date = new java.util.Date(DateConvertor.toLong(item.getDate(), "MM/dd/yyyy")); String amountString = item.getAmount(); boolean isCredit = true; if (item.getAmount().startsWith("-")) { isCredit = false; String amt = item.getAmount().replaceFirst("-", ""); item.setAmount(amt); } float amount = Float.parseFloat(amountString); if (isCredit) { lastBalance += amount; totalDeposits += amount; } else { lastBalance -= amount; totalWithdrawals += amount; } Ledger ledger = new Ledger(null, date, amount, lastBalance, accountId); if (item.getDescription().length() > 120) { item.setDescription(item.getDescription().substring(0, 119)); } if (item.getCheckNumber() != null && !item.getCheckNumber().isEmpty()) { ledger.setCheckNum(item.getCheckNumber()); totalChecks -= amount; } ledger.setTransDesc(item.getDescription()); getItemList().add(ledger); } } public void processSection(Element section) throws ElementNotFoundException { String name = section.getName(); boolean hasEntry = false; String contentDesc = (section.getAttribute("content") == null ? "" : section.getAttributeValue("content")); int lines = 1; int maxLines = 0; int linesProcessed = 0; boolean endHasBounds = true; Element startElement = section.getChild("start"); Element endElement = section.getChild("end"); Element dataDefinition = section.getChild("data"); Element lineDefinition = section.getChild("line"); Pattern dataPattern = null; Pattern linePattern = null; String transType = (section.getAttribute("type") == null ? "info" : section.getAttributeValue("type")); String prevLine = ""; int nextSection = buffer.size() - 1; if (sectionMarks.higher(currentLine) != null) { nextSection = (int) sectionMarks.higher(currentLine); } if (startElement.getAttributeValue("lines") != null) { lines = Integer.parseInt(startElement.getAttributeValue("lines")); currentLine += lines; } if (lineDefinition != null) { linePattern = Pattern.compile(lineDefinition.getText()); if (lineDefinition.getAttribute("lines") != null) { String l = lineDefinition.getAttributeValue("lines"); if (!l.equals("+")) { maxLines = Integer.parseInt(lineDefinition.getAttributeValue("lines")); } } } if (endElement.getAttribute("bounded") != null) { String bounds = endElement.getAttributeValue("bounded"); if (bounds.equals("false")) { endHasBounds = false; } } Pattern endPattern = Pattern.compile(endElement.getText()); while (currentLine < nextSection) { prevLine = buffer.get(currentLine - 1); String text = buffer.get(currentLine++); Matcher lineMatcher = null; if (lineDefinition != null) { lineMatcher = linePattern.matcher(text); } Matcher endMatcher = endPattern.matcher(text); if (endMatcher.matches()) { if (!endHasBounds) { currentLine -= 1; } break; } else { if (currentLine >= buffer.size()) { throw new ElementNotFoundException("Not found"); } } if (lineMatcher != null && lineMatcher.matches()) { if (!contentDesc.equals("discard")) { // System.out.println(text); } hasEntry = false; if (dataDefinition != null) { LedgerEntry entry = new LedgerEntry(); for (Element dataLine : dataDefinition.getChildren()) { String tag = dataLine.getAttributeValue("content"); String regex = dataLine.getText(); Matcher matcher = Pattern.compile(regex).matcher(text); String value = ""; if (matcher.find()) { value = matcher.group(); } switch (tag) { case "beginningBalance": beginningBalance = Float.parseFloat(value.replaceAll(",", "")); break; case "totalDeposits": value.replaceAll(",", ""); totalDeposits = Float.parseFloat(value.replaceAll(",", "")); break; case "totalWithdrawals": value.replaceAll(",", ""); totalWithdrawals = Float.parseFloat(value.replaceAll(",", "")); break; case "endingBalance": value.replaceAll(",", ""); endingBalance = Float.parseFloat(value.replaceAll(",", "")); break; case "accountNumber": summary.put("accountNumber", value); break; case "periodFrom": summary.put("startDate", value); break; case "periodTo": summary.put("endDate", value); break; case "date": entry.setDate(value); text = matcher.replaceFirst(""); break; case "check": entry.setCheckNumber(value); break; case "amount": if (transType.equals("debit")) { value = "-" + value; } entry.setAmount(value); text = matcher.replaceFirst(""); if (section.getAttributeValue("content").equals("fees")) { String amt = entry.getAmount(); amt = amt.replaceAll("-", ""); amt = amt.replaceAll(",", ""); totalFees += Float.parseFloat(amt); } break; case "description": entry.setDescription(value); break; } } if (maxLines > 0 && ++linesProcessed == maxLines) { return; } if (!contentDesc.equals("dailyBalance") && !contentDesc.endsWith("Info")) { entries.add(entry); hasEntry = true; } } } else { if (linePattern.matcher(prevLine).matches() && hasEntry) { if (!contentDesc.equals("dailyBalance")) { int lastEntry = entries.size() - 1; LedgerEntry entry = entries.get(lastEntry); entry.setDescription(entry.getDescription() + " " + text); entries.set(lastEntry, entry); hasEntry = false; } } } } } private void getConfig() { jdomBuilder = new SAXBuilder(); String xmlSource = cfg.getInstallDir() + cfg.getFileSep() + "pnc.xml"; try { xmlDoc = jdomBuilder.build(xmlSource); Element root = xmlDoc.getRootElement(); List<Content> configContent = root.getContent(); List<Element> childElements = root.getChildren(); Element header = root.getChild("header"); for (Element e : header.getChildren()) { String content = e.getAttributeValue("content"); String format = e.getAttributeValue("format"); String text = e.getText(); } } catch (JDOMException ex) { Logger.getLogger(PDFImporter.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(PDFImporter.class.getName()).log(Level.SEVERE, null, ex); } } }