Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package org.shareok.data.sagedata; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.shareok.data.htmlrequest.HtmlParser; import org.shareok.data.htmlrequest.HttpRequestHandler; import org.shareok.data.sagedata.exceptions.EmptyFilePathException; import org.shareok.data.sagedata.exceptions.EmptyJournalDataException; import org.shareok.data.sagedata.exceptions.EmptyProcessorDataException; import org.springframework.beans.BeansException; import org.w3c.dom.Attr; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; /** * * @author Tao Zhao */ public abstract class SageJournalDataProcessorAbstract implements SageJournalDataProcessor { protected String id; protected Map data; protected String journalName; protected HttpRequestHandler htmlRequest; protected SageJournalData journalData; protected ArrayList<SageJournalData> sageJournalDataList; public Map getData() { return data; } public String getJournalName() { return journalName; } public SageJournalData getJournalData() { return journalData; } public ArrayList<SageJournalData> getSageJournalDataList() { return sageJournalDataList; } public String getId() { return id; } @Override public void setData(Map data) { this.data = data; } public void setJournalName(String journalName) { this.journalName = journalName; } public HttpRequestHandler getHtmlRequest() { return htmlRequest; } public void setHtmlRequest(HttpRequestHandler htmlRequest) { this.htmlRequest = htmlRequest; } public void setJournalData(SageJournalData journalData) { this.journalData = journalData; } public void setSageJournalDataList(ArrayList<SageJournalData> sageJournalDataList) { this.sageJournalDataList = sageJournalDataList; } public void setId(String id) { this.id = id; } @Override public void getOutput(String fileName) { try { if (null == fileName || "".equals(fileName)) { throw new EmptyFilePathException("File path is NOT specified!"); } // Every article needs to have an id from DOI setProcessorId(); StringBuffer sb = getArticleResponse(); if (null != sb && sb.length() != 0) { processArticleResponse(sb.toString()); } convertDataToJournalData(); exportXmlByJournalData(fileName); String pdfName = generatePdf(fileName); createDspaceContentsFile(fileName, pdfName); } catch (Exception ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } } @Override public String getArticleTitle() { String title = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); title = (String) data.get("title"); } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return title; } @Override public String getArticleVolume() { String volume = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); volume = (String) data.get("volume"); } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return volume; } @Override public String getArticleIssue() { String issue = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); issue = (String) data.get("issue"); } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return issue; } @Override public String getArticlePages() { String pages = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); pages = (String) data.get("pages"); if (null == pages || "".equals(pages)) { } } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return pages; } @Override public String getArticleYear() { String year = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); year = (String) data.get("year"); } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return year; } @Override public String getArticleCitation() { String citation = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); citation = (String) data.get("citation"); } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return citation; } @Override public Date getArticlePubDate() { String date = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); date = (String) data.get("pubdate"); } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return null; } @Override public String getArticleDoi() { String doi = null; try { if (null == data) throw new EmptyJournalDataException("Journal data is empty!"); doi = (String) data.get("doi"); } catch (EmptyJournalDataException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return doi; } @Override public String getArticleAbstract(String html) { String[] abstracts = HtmlParser.metaDataParserWithElementProperty(html, "p", "id", "p-1"); if (null != abstracts && abstracts.length > 0) { return abstracts[0]; } else { return null; } } @Override public String[] getArticleSubjects(String html) { String[] subjects = null; subjects = HtmlParser.metaDataParserWithElementProperty(html, "a", "class", "kwd-search"); return subjects; } @Override public void convertDataToJournalData() { try { if (null == data) { throw new EmptyJournalDataException("Journal data is empty!"); } Set keys = data.keySet(); Iterator it = keys.iterator(); while (it.hasNext()) { String key = (String) it.next(); if (null != key) { String value = (String) data.get(key); if (null == value) { continue; } if (key.equalsIgnoreCase("journal")) { journalData.setPublisher(value); } else if (key.equalsIgnoreCase("peerReviewNotes")) { journalData.setPeerReview(value); } else if (key.equalsIgnoreCase("doi")) { journalData.setDoi(value); } else if (key.equalsIgnoreCase("citation")) { journalData.setCitation(value); } else if (key.equalsIgnoreCase("subjects")) { String[] valStr = value.split(","); journalData.setSubjects(valStr); } else if (key.equalsIgnoreCase("abstract")) { journalData.setAbstractText(value); } else if (key.equalsIgnoreCase("title")) { journalData.setTitle(value); } else if (key.equalsIgnoreCase("url")) { journalData.setUri(value); } else if (key.equalsIgnoreCase("pubdate")) { // The date string looks like this: mm/dd/yyyy, which needs to be converted to Data type SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yyyy"); Date date = formatter.parse(value); journalData.setDateIssued(date); } else if (key.equalsIgnoreCase("authors")) { String[] valStr = value.split(","); journalData.setAuthors(valStr); } else { continue; } } } } catch (Exception ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } } @Override /** * Convert the article data to dublin core xml metadata and save the the file * * @param String fileName : the root folder contains all the uploading article data */ public void exportXmlByJournalData(String fileName) { try { DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); Document doc = docBuilder.newDocument(); Element rootElement = doc.createElement("dublin_core"); doc.appendChild(rootElement); // Add the type node: Element element = doc.createElement("dcvalue"); element.appendChild(doc.createTextNode("Research Article")); rootElement.appendChild(element); Attr attr = doc.createAttribute("element"); attr.setValue("type"); element.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); element.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("none"); element.setAttributeNode(attr); // Add the abstract node: String abs = journalData.getAbstractText(); if (null != abs) { Element elementAbs = doc.createElement("dcvalue"); elementAbs.appendChild(doc.createTextNode(abs)); rootElement.appendChild(elementAbs); attr = doc.createAttribute("element"); attr.setValue("description"); elementAbs.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementAbs.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("abstract"); elementAbs.setAttributeNode(attr); } // Add the language node: String lang = journalData.getLanguage(); if (null != lang) { Element elementLang = doc.createElement("dcvalue"); elementLang.appendChild(doc.createTextNode(lang)); rootElement.appendChild(elementLang); attr = doc.createAttribute("element"); attr.setValue("language"); elementLang.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementLang.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("iso"); elementLang.setAttributeNode(attr); } // Add the title node: String tit = journalData.getTitle(); if (null != tit) { Element elementTitle = doc.createElement("dcvalue"); elementTitle.appendChild(doc.createTextNode(tit)); rootElement.appendChild(elementTitle); attr = doc.createAttribute("element"); attr.setValue("title"); elementTitle.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementTitle.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("none"); elementTitle.setAttributeNode(attr); } // Add the available date node: // Element elementAvailable = doc.createElement("dcvalue"); // elementAvailable.appendChild(doc.createTextNode(getDateAvailable().toString())); // rootElement.appendChild(elementAvailable); // // attr = doc.createAttribute("element"); // attr.setValue("date"); // elementAvailable.setAttributeNode(attr); // // attr = doc.createAttribute("qualifier"); // attr.setValue("available"); // elementAvailable.setAttributeNode(attr); // Add the issued date node: Date issueDate = journalData.getDateIssued(); if (null != issueDate) { SimpleDateFormat format_issuedDate = new SimpleDateFormat("yyyy-MM-dd"); Element elementIssued = doc.createElement("dcvalue"); elementIssued.appendChild(doc.createTextNode(format_issuedDate.format(issueDate))); rootElement.appendChild(elementIssued); attr = doc.createAttribute("element"); attr.setValue("date"); elementIssued.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("issued"); elementIssued.setAttributeNode(attr); } // Add the author nodes: String[] authorSet = journalData.getAuthors(); if (null != authorSet && authorSet.length > 0) { for (String author : authorSet) { Element elementAuthor = doc.createElement("dcvalue"); elementAuthor.appendChild(doc.createTextNode(author)); rootElement.appendChild(elementAuthor); attr = doc.createAttribute("element"); attr.setValue("contributor"); elementAuthor.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("author"); elementAuthor.setAttributeNode(attr); } } // Add the acknowledgements node: String ack = journalData.getAcknowledgements(); if (null != ack) { Element elementAck = doc.createElement("dcvalue"); elementAck.appendChild(doc.createTextNode(ack)); rootElement.appendChild(elementAck); attr = doc.createAttribute("element"); attr.setValue("description"); elementAck.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementAck.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("none"); elementAck.setAttributeNode(attr); } // Add the author contributions node: String contrib = journalData.getAuthorContributions(); if (null != contrib) { Element elementContribution = doc.createElement("dcvalue"); elementContribution.appendChild(doc.createTextNode(contrib)); rootElement.appendChild(elementContribution); attr = doc.createAttribute("element"); attr.setValue("description"); elementContribution.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementContribution.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("none"); elementContribution.setAttributeNode(attr); } // Add the publisher node: String puber = journalData.getPublisher(); if (null != puber) { Element elementPublisher = doc.createElement("dcvalue"); elementPublisher.appendChild(doc.createTextNode(puber)); rootElement.appendChild(elementPublisher); attr = doc.createAttribute("element"); attr.setValue("publisher"); elementPublisher.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("none"); elementPublisher.setAttributeNode(attr); } // Add the citation node: String cit = journalData.getCitation(); if (null != cit) { Element elementCitation = doc.createElement("dcvalue"); elementCitation.appendChild(doc.createTextNode(cit)); rootElement.appendChild(elementCitation); attr = doc.createAttribute("element"); attr.setValue("identifier"); elementCitation.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementCitation.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("citation"); elementCitation.setAttributeNode(attr); } // Add the rights node: String rit = journalData.getRights(); if (null != rit) { Element elementRights = doc.createElement("dcvalue"); elementRights.appendChild(doc.createTextNode(rit)); rootElement.appendChild(elementRights); attr = doc.createAttribute("element"); attr.setValue("rights"); elementRights.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("none"); elementRights.setAttributeNode(attr); } // Add the rights URI node: String ritUri = journalData.getRightsUri(); if (null != ritUri) { Element elementRightsUri = doc.createElement("dcvalue"); elementRightsUri.appendChild(doc.createTextNode(ritUri)); rootElement.appendChild(elementRightsUri); attr = doc.createAttribute("element"); attr.setValue("rights"); elementRightsUri.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("uri"); elementRightsUri.setAttributeNode(attr); } // Add the rights requestable node: Element elementRightsRequestable = doc.createElement("dcvalue"); elementRightsRequestable .appendChild(doc.createTextNode(Boolean.toString(journalData.isRightsRequestable()))); rootElement.appendChild(elementRightsRequestable); attr = doc.createAttribute("element"); attr.setValue("rights"); elementRightsRequestable.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementRightsRequestable.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("requestable"); elementRightsRequestable.setAttributeNode(attr); // Add the is part of node: String partOf = journalData.getIsPartOfSeries(); if (null != partOf) { Element elementIsPartOf = doc.createElement("dcvalue"); elementIsPartOf.appendChild(doc.createTextNode(partOf)); rootElement.appendChild(elementIsPartOf); attr = doc.createAttribute("element"); attr.setValue("relation"); elementIsPartOf.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("ispartofseries"); elementIsPartOf.setAttributeNode(attr); } // Add the relation uri node: String reUri = journalData.getRelationUri(); if (null != reUri) { Element elementRelationUri = doc.createElement("dcvalue"); elementRelationUri.appendChild(doc.createTextNode(reUri)); rootElement.appendChild(elementRelationUri); attr = doc.createAttribute("element"); attr.setValue("relation"); elementRelationUri.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("uri"); elementRelationUri.setAttributeNode(attr); } // Add the subject nodes: String[] subjectSet = journalData.getSubjects(); if (null != subjectSet && subjectSet.length > 0) { for (String subject : subjectSet) { Element elementSubject = doc.createElement("dcvalue"); elementSubject.appendChild(doc.createTextNode(subject)); rootElement.appendChild(elementSubject); attr = doc.createAttribute("element"); attr.setValue("subject"); elementSubject.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementSubject.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("none"); elementSubject.setAttributeNode(attr); } } // Add the peerReview node: String review = journalData.getPeerReview(); if (null != review) { Element elementPeerReview = doc.createElement("dcvalue"); elementPeerReview.appendChild(doc.createTextNode(review)); rootElement.appendChild(elementPeerReview); attr = doc.createAttribute("element"); attr.setValue("description"); elementPeerReview.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementPeerReview.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("peerreview"); elementPeerReview.setAttributeNode(attr); } // Add the peer review notes node: String peer = journalData.getPeerReviewNotes(); if (null != peer) { Element elementPeerReviewNotes = doc.createElement("dcvalue"); elementPeerReviewNotes.appendChild(doc.createTextNode(peer)); rootElement.appendChild(elementPeerReviewNotes); attr = doc.createAttribute("element"); attr.setValue("description"); elementPeerReviewNotes.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementPeerReviewNotes.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("peerreviewnotes"); elementPeerReviewNotes.setAttributeNode(attr); } // Add the doi node: String doi = journalData.getDoi(); if (null != doi) { Element elementDoi = doc.createElement("dcvalue"); elementDoi.appendChild(doc.createTextNode(doi)); rootElement.appendChild(elementDoi); attr = doc.createAttribute("element"); attr.setValue("identifier"); elementDoi.setAttributeNode(attr); attr = doc.createAttribute("language"); attr.setValue("en_US"); elementDoi.setAttributeNode(attr); attr = doc.createAttribute("qualifier"); attr.setValue("doi"); elementDoi.setAttributeNode(attr); } String folderPath = setOutputPath(fileName); String filePath = folderPath + "/dublin_core.xml"; TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(new File(filePath)); transformer.transform(source, result); } catch (ParserConfigurationException | TransformerException pce) { pce.printStackTrace(); } catch (DOMException | BeansException e) { e.printStackTrace(); } } @Override public String getFullTextLink() { String link = ""; return link; } @Override public String getPdfLink() { String link = ""; try { Map dataTemp = getData(); if (null == dataTemp || !dataTemp.containsKey("url")) { throw new EmptyProcessorDataException( " The data of the processor is empty or it does NOT has the correct information ! "); } String url = dataTemp.get("url").toString(); link = (url.contains(".short")) ? (url.replace(".short", ".full.pdf")) : (url.contains(".abstract") ? url.replace(".abstract", ".full.pdf") : ""); } catch (Exception ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } return link; } @Override public String generatePdf(String outputPath) { try { String link = getPdfLink(); String pdfPath = setOutputPath(outputPath) + "/" + getId() + ".pdf"; getHtmlRequest().getPdfByUrl(link, pdfPath); return getId() + ".pdf"; } catch (Exception ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); return ""; } } @Override public void createDspaceContentsFile(String outputFolder, String pdfName) { if (null == pdfName) { pdfName = ""; } try { String folderPath = setOutputPath(outputFolder); String contentsFilePath = folderPath + "/contents"; File file = new File(contentsFilePath); if (!file.exists()) { file.createNewFile(); } try (PrintWriter writer = new PrintWriter(file, "UTF-8")) { writer.println(pdfName); } } catch (IOException ex) { Logger.getLogger(SageJournalDataProcessorAbstract.class.getName()).log(Level.SEVERE, null, ex); } } /** * Sets up the output path that contains the XML metadata file, the PDF file, and the contents file * @param String outputFolder * @return String folderPath */ private String setOutputPath(String outputFolder) { String articleId = getId(); if (null == articleId || "".equals(articleId)) { setProcessorId(); articleId = getId(); } String folderPath = outputFolder + "/" + articleId; File folder = new File(folderPath); if (!folder.exists()) { if (folder.mkdir()) { System.out.print("The folder for loading article" + articleId + " has been created.\n"); } } return folderPath; } }