List of usage examples for java.util.regex Pattern DOTALL
int DOTALL
To view the source code for java.util.regex Pattern DOTALL.
Click Source Link
From source file:com.novartis.opensource.yada.util.QueryUtils.java
/** * Returns {@code true} if the query content matches an SQL DELETE statement * syntax (see {@link #RX_DELETE}./*ww w. j a v a 2 s .c o m*/ * * @param code * stored code (with YADA markup) * @return {@code true} if the query content matches an SQL DELETE statement * syntax */ public boolean isDelete(String code) { Matcher matcher = Pattern.compile(RX_DELETE, Pattern.DOTALL | Pattern.CASE_INSENSITIVE).matcher(code); return matcher.matches(); }
From source file:de.mpg.escidoc.services.exportmanager.Export.java
/** * Walk around the itemList XML, fetch all files from components via URIs * and put them into the archive {@link OutputStream} aos * // ww w. ja va2 s. c om * @param aos * - array {@link OutputStream} * @param itemList * - XML with the files to be fetched, see NS: * http://www.escidoc.de/schemas/components/0.7 * @throws ExportManagerException */ private void fetchComponentsDo(OutputStream aos, String itemList) throws ExportManagerException { Document doc = parseDocument(itemList); NodeIterator ni = getFilteredNodes(new ComponentNodeFilter(), doc); // login only once String userHandle; try { userHandle = AdminHelper.loginUser(USER_ID, PASSWORD); } catch (Exception e) { throw new ExportManagerException("Cannot login", e); } String fileName; Node n; while ((n = ni.nextNode()) != null) { Element componentElement = (Element) n; NodeList nl = componentElement.getElementsByTagNameNS(COMPONENTS_NS, "content"); Element contentElement = (Element) nl.item(0); if (contentElement == null) { throw new ExportManagerException( "Wrong item XML: {" + COMPONENTS_NS + "}component element doesn't contain content element. " + "Component id: " + componentElement.getAttributeNS(XLINK_NS, "href")); } String href = contentElement.getAttributeNS(XLINK_NS, "href"); String storageStatus = contentElement.getAttribute("storage"); // get file name if ("internal-managed".equals(storageStatus)) { NodeIterator nif = ((DocumentTraversal) doc).createNodeIterator(componentElement, NodeFilter.SHOW_ELEMENT, new FileNameNodeFilter(), true); Node nf; if ((nf = nif.nextNode()) != null) { fileName = ((Element) nf).getTextContent(); // names of files for Matcher m = Pattern.compile("^([\\w.]+?)(\\s+|$)", Pattern.CASE_INSENSITIVE | Pattern.DOTALL) .matcher(fileName); m.find(); fileName = m.group(1); } else { throw new ExportManagerException("Missed file property: {" + COMPONENTS_NS + "}component element doesn't contain file-name element (md-records/md-record/file:file/dc:title). " + "Component id: " + componentElement.getAttributeNS(XLINK_NS, "href")); } } // TODO: the external-managed will be processed later else { throw new ExportManagerException("Missed internal-managed file in {" + COMPONENTS_NS + "}component: components/component/content[@storage=\"internal-managed\"]" + "Component id: " + componentElement.getAttributeNS(XLINK_NS, "href")); } logger.info("link to the content: " + href); logger.info("storage status: " + storageStatus); logger.info("fileName: " + fileName); // get file via URI String url; try { url = ServiceLocator.getFrameworkUrl() + href; } catch (Exception e) { throw new ExportManagerException("Cannot get framework url", e); } logger.info("url=" + url); GetMethod method = new GetMethod(url); method.setFollowRedirects(false); method.setRequestHeader("Cookie", "escidocCookie=" + userHandle); // Execute the method with HttpClient. HttpClient client = new HttpClient(); try { ProxyHelper.executeMethod(client, method); } catch (Exception e) { throw new ExportManagerException("Cannot execute HttpMethod", e); } int status = method.getStatusCode(); logger.info("Status=" + status); if (status != 200) fileName += ".error" + status; byte[] responseBody; try { responseBody = method.getResponseBody(); } catch (Exception e) { throw new ExportManagerException("Cannot get Response Body", e); } InputStream bis = new BufferedInputStream(new ByteArrayInputStream(responseBody)); if (aos instanceof ZipOutputStream) { ZipEntry ze = new ZipEntry(fileName); ze.setSize(responseBody.length); try { ((ZipOutputStream) aos).putNextEntry(ze); writeFromStreamToStream(bis, aos); ((ZipOutputStream) aos).closeEntry(); } catch (Exception e) { throw new ExportManagerException("zip2stream generation problem", e); } } else if (aos instanceof TarOutputStream) { TarEntry te = new TarEntry(fileName); te.setSize(responseBody.length); try { ((TarOutputStream) aos).putNextEntry(te); writeFromStreamToStream(bis, aos); ((TarOutputStream) aos).closeEntry(); } catch (Exception e) { throw new ExportManagerException("tar2stream generation problem", e); } } else { throw new ExportManagerException("Unsupported archive output stream: " + aos.getClass()); } try { bis.close(); } catch (Exception e) { throw new ExportManagerException("Cannot close InputStream", e); } } }
From source file:edu.northwestern.jcr.adapter.fedora.persistence.FedoraConnector.java
/** * Modifies or creates a Dublic Core field/value pair in the DC data stream. * * @param pid pid of the digital object/* w w w. j a va 2 s . c o m*/ * @param field Dublin Core field to be added or modified * @param value new value of the field */ public void modifyDCField(String pid, String field, String value) { String dcXML; byte[] b; String oldValue, newValue; Pattern pattern; Matcher matcher; int index; if (field.equals("identifier")) { // cannot change identifier log.error("attemp to change dc:identifier!"); return; } b = getDataStream(pid, "DC"); dcXML = new String(b); // DOT matches anything including newline characters pattern = Pattern.compile("<dc:" + field + ">.*</dc:" + field + ">", Pattern.DOTALL); matcher = pattern.matcher(dcXML); newValue = "<dc:" + field + ">" + value + "</dc:" + field + ">"; if (matcher.find()) { // replace current value oldValue = matcher.group(); index = matcher.start(); dcXML = dcXML.substring(0, index) + newValue + dcXML.substring(index + oldValue.length()); } else { // add to the end index = dcXML.indexOf("</oai_dc:dc>"); dcXML = dcXML.substring(0, index) + newValue + "\n</oai_dc:dc>"; } modifyDCDataStream(pid, dcXML.getBytes()); }
From source file:eu.semlibproject.annotationserver.repository.SesameRepository.java
/** * {@inheritDoc}/*from w w w. j a v a2 s . com*/ */ public String searchMetadataWithParameters(String jsonParams, int limit, int offset, String orderBy, boolean desc, String acceptedFormat, List<String> notebookIDList) throws RepositoryException { // TODO: at the moment this method implement only the step one described in #74 String finalResults = ""; String qOrderBy = getPropertyForOrdering(orderBy); // Check ordering ASC (default), DESC String orderMode = null; if (desc) { orderMode = "ORDER BY DESC(?c) "; } else { orderMode = "ORDER BY ?c "; } try { JSONObject jsonParameters = new JSONObject(jsonParams); // Process "resources" ========= if (jsonParameters.has(SemlibConstants.JSON_RESOURCES)) { JSONArray jResources = jsonParameters.getJSONArray(SemlibConstants.JSON_RESOURCES); if (jResources.length() > 0) { RepositoryConnection connection = null; try { // This is necessary to restrict the query for a list of notebook ID ====== String restrictionToNotebooks = null; if (notebookIDList != null) { int notebooksItem = notebookIDList.size(); Iterator<String> nIterator = notebookIDList.iterator(); if (notebooksItem > 1) { restrictionToNotebooks = "{ "; } else if (notebooksItem > 0) { restrictionToNotebooks = ""; } while (nIterator.hasNext()) { String cNotebook = Notebook.getURIFromID(nIterator.next()); restrictionToNotebooks += "{ <" + cNotebook + "> <" + OntologyHelper.URI_SEMLIB_INCLUDES + "> ?s } "; if (nIterator.hasNext()) { restrictionToNotebooks += "UNION "; } } if (notebooksItem > 1) { restrictionToNotebooks += "}"; } } // ======================================================================= boolean firstResltSetAdded = false; connection = sesameHTTPRepository.getConnection(); for (int i = 0; i < jResources.length(); i++) { String cUrl = jResources.get(i).toString(); String query = null; if (restrictionToNotebooks != null) { query = "CONSTRUCT {?s ?p ?o . ?s <" + OntologyHelper.URI_DCELEMENT_CREATOR + "> ?authorName } WHERE { " + "{ ?s <" + OntologyHelper.URI_OAC_HASTARGET + "> <" + cUrl + "> . ?s <" + OntologyHelper.URI_RDF_TYPE + "> <" + OntologyHelper.URI_OAC_ANNOTATION + "> . ?s ?p ?o . " + restrictionToNotebooks + " . OPTIONAL {?s <" + OntologyHelper.URI_DC_CREATOR + "> ?author . ?author <" + OntologyHelper.URI_FOAF_NAME + "> ?authorName } } " + "UNION " + "{ ?s <" + OntologyHelper.URI_SEMLIB_HASPAGECONTEXT + "> <" + cUrl + "> . ?s <" + OntologyHelper.URI_RDF_TYPE + "> <" + OntologyHelper.URI_OAC_ANNOTATION + "> . ?s ?p ?o . " + restrictionToNotebooks + " . OPTIONAL {?s <" + OntologyHelper.URI_DC_CREATOR + "> ?author . ?author <" + OntologyHelper.URI_FOAF_NAME + "> ?authorName } } " + "UNION " + "{?s <" + OntologyHelper.URI_OAC_HASTARGET + "> ?x . ?x <" + OntologyHelper.URI_DC_ISPARTOF + "> <" + cUrl + "> . ?s <" + OntologyHelper.URI_RDF_TYPE + "> <" + OntologyHelper.URI_OAC_ANNOTATION + "> . ?s ?p ?o . " + restrictionToNotebooks + " . OPTIONAL {?s <" + OntologyHelper.URI_DC_CREATOR + "> ?author . ?author <" + OntologyHelper.URI_FOAF_NAME + "> ?authorName } } " + "} "; } else { query = "CONSTRUCT {?s ?p ?o . ?s <" + OntologyHelper.URI_DCELEMENT_CREATOR + "> ?authorName } WHERE { " + "{ ?s <" + OntologyHelper.URI_OAC_HASTARGET + "> <" + cUrl + "> . ?s <" + OntologyHelper.URI_RDF_TYPE + "> <" + OntologyHelper.URI_OAC_ANNOTATION + "> . ?s ?p ?o . OPTIONAL {?s <" + OntologyHelper.URI_DC_CREATOR + "> ?author . ?author <" + OntologyHelper.URI_FOAF_NAME + "> ?authorName } } " + "UNION " + "{ ?s <" + OntologyHelper.URI_SEMLIB_HASPAGECONTEXT + "> <" + cUrl + "> . ?s <" + OntologyHelper.URI_RDF_TYPE + "> <" + OntologyHelper.URI_OAC_ANNOTATION + "> . ?s ?p ?o . OPTIONAL {?s <" + OntologyHelper.URI_DC_CREATOR + "> ?author . ?author <" + OntologyHelper.URI_FOAF_NAME + "> ?authorName } } " + "UNION " + "{?s <" + OntologyHelper.URI_OAC_HASTARGET + "> ?x . ?x <" + OntologyHelper.URI_DC_ISPARTOF + "> <" + cUrl + "> . ?s <" + OntologyHelper.URI_RDF_TYPE + "> <" + OntologyHelper.URI_OAC_ANNOTATION + "> . ?s ?p ?o . OPTIONAL {?s <" + OntologyHelper.URI_DC_CREATOR + "> ?author . ?author <" + OntologyHelper.URI_FOAF_NAME + "> ?authorName } } " + "} "; } try { GraphQuery triplesQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query); // TODO: for the first step #74 it is ok. Modify this to support limit and offset (refactor method createStringFromRDFWithFormat) try { String tempTriples = createStringFromRDFWithFormat(triplesQuery, acceptedFormat, false, false, false); if (StringUtils.isNotBlank(tempTriples)) { if (i == 0 || !firstResltSetAdded) { firstResltSetAdded = true; // Necessary to avoid to create invalid rdf+xml and multiple closing tags if (acceptedFormat.contains(MediaType.APPLICATION_RDFXML)) { tempTriples = tempTriples.replaceAll("<\\/rdf:RDF>", ""); } finalResults += tempTriples; } else { if (acceptedFormat.contains(MediaType.APPLICATION_RDFXML)) { tempTriples = Pattern .compile("<\\?xml.*\\?>\n?+\r?+<rdf:RDF[^<>]+>", Pattern.DOTALL) .matcher(tempTriples).replaceAll(""); finalResults += "\n" + tempTriples; } else { finalResults += ",\n" + tempTriples; } } } } catch (RDFHandlerException ex) { logger.log(Level.SEVERE, null, ex); continue; } // ================================================ } catch (QueryEvaluationException ex) { logger.log(Level.SEVERE, null, ex); connection.close(); return null; } catch (org.openrdf.repository.RepositoryException ex) { logger.log(Level.SEVERE, null, ex); connection.close(); return null; } catch (MalformedQueryException ex) { logger.log(Level.SEVERE, null, ex); connection.close(); return null; } } connection.close(); } catch (org.openrdf.repository.RepositoryException ex) { logger.log(Level.SEVERE, null, ex); throw new RepositoryException(); } } } } catch (JSONException ex) { // JSON parameters not correct -> return null (BAD REQUEST) logger.log(Level.SEVERE, null, ex); return null; } // Wrap all JSON result in a block "{ }" but only if finalResults cointains values if (StringUtils.isNotBlank(finalResults)) { if (acceptedFormat.contains(MediaType.APPLICATION_RDFXML)) { finalResults += "</rdf:RDF>"; } else if (acceptedFormat.contains(MediaType.APPLICATION_JSON)) { finalResults = "{\n" + finalResults + "}"; } } return finalResults; }
From source file:edu.northwestern.jcr.adapter.fedora.persistence.FedoraConnector.java
/** * Runs full-text search agains the gSearch service. * * @param value value of the search expression * @return list of pids//from w w w. j a va 2 s. c o m */ public String[] searchFullText(String value) { String response; String url; List<String> resultList; Pattern pattern; Matcher matcher; String result = ""; String pid; resultList = new ArrayList<String>(); // run the search against each field for (String field : gsearchFields) { url = gsearchURL + "/rest?operation=gfindObjects&query=dsm." + field + "%3A\"" + value + "\""; try { response = postMethod(url); } catch (Exception e) { return resultList.toArray(new String[0]); } result += response; } // DOT matches anything including newline characters pattern = Pattern.compile("<span class=\"hitno\">[^<]+</span><a href=\"[^\"]+\">([^<]+)</a>", Pattern.DOTALL); matcher = pattern.matcher(result); while (matcher.find()) { pid = matcher.group(1); if (!resultList.contains(pid)) { resultList.add(pid); } } return resultList.toArray(new String[0]); }
From source file:de.mpg.mpdl.inge.transformation.transformations.commonPublicationFormats.Bibtex.java
/** * @param bibtex/*from w w w. j a v a 2s . co m*/ * @return eSciDoc-publication item XML representation of this BibTeX entry * @throws RuntimeException */ public String getBibtex(String bibtex) throws RuntimeException { // Remove Math '$' from the whole BibTex-String Pattern mathPattern = Pattern.compile("(?sm)\\$(\\\\.*?)(?<!\\\\)\\$"); Matcher mathMatcher = mathPattern.matcher(bibtex); StringBuffer sb = new StringBuffer(); while (mathMatcher.find()) { mathMatcher.appendReplacement(sb, "$1"); } mathMatcher.appendTail(sb); bibtex = sb.toString(); BibtexParser parser = new BibtexParser(true); BibtexFile file = new BibtexFile(); try { parser.parse(file, new StringReader(bibtex)); } catch (Exception e) { this.logger.error("Error parsing BibTex record."); throw new RuntimeException(e); } PubItemVO itemVO = new PubItemVO(); MdsPublicationVO mds = new MdsPublicationVO(); itemVO.setMetadata(mds); List entries = file.getEntries(); boolean entryFound = false; if (entries == null || entries.size() == 0) { this.logger.warn("No entry found in BibTex record."); throw new RuntimeException(); } for (Object object : entries) { if (object instanceof BibtexEntry) { if (entryFound) { this.logger.error("Multiple entries in BibTex record."); throw new RuntimeException(); } entryFound = true; BibtexEntry entry = (BibtexEntry) object; // genre BibTexUtil.Genre bibGenre; try { bibGenre = BibTexUtil.Genre.valueOf(entry.getEntryType()); } catch (IllegalArgumentException iae) { bibGenre = BibTexUtil.Genre.misc; this.logger.warn("Unrecognized genre: " + entry.getEntryType()); } MdsPublicationVO.Genre itemGenre = BibTexUtil.getGenreMapping().get(bibGenre); mds.setGenre(itemGenre); SourceVO sourceVO = new SourceVO(); SourceVO secondSourceVO = new SourceVO(); Map fields = entry.getFields(); // Mapping of BibTeX Standard Entries // title if (fields.get("title") != null) { if (fields.get("chapter") != null) { mds.setTitle(BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("chapter").toString()), false) + " - " + BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("title").toString()), false)); } else { mds.setTitle(BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("title").toString()), false)); } } // booktitle if (fields.get("booktitle") != null) { if (bibGenre == BibTexUtil.Genre.book) { mds.setTitle(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("booktitle").toString()), false)); } else if (bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.inproceedings) { sourceVO.setTitle(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("booktitle").toString()), false)); if (bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.inproceedings) { sourceVO.setGenre(Genre.PROCEEDINGS); } else if (bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection) { sourceVO.setGenre(Genre.BOOK); } } } // fjournal, journal if (fields.get("fjournal") != null) { if (bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.misc || bibGenre == BibTexUtil.Genre.unpublished) { sourceVO.setTitle(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("fjournal").toString()), false)); sourceVO.setGenre(SourceVO.Genre.JOURNAL); if (fields.get("journal") != null) { sourceVO.getAlternativeTitles().add(new AlternativeTitleVO(BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("journal").toString()), false))); } } } else if (fields.get("journal") != null) { if (bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.misc || bibGenre == BibTexUtil.Genre.unpublished || bibGenre == BibTexUtil.Genre.inproceedings) { sourceVO.setTitle(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("journal").toString()), false)); sourceVO.setGenre(SourceVO.Genre.JOURNAL); } } // number if (fields.get("number") != null && bibGenre != BibTexUtil.Genre.techreport) { sourceVO.setIssue(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("number").toString()), false)); } else if (fields.get("number") != null && bibGenre == BibTexUtil.Genre.techreport) { { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.REPORT_NR, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("number").toString()), false))); } } // pages if (fields.get("pages") != null) { if (bibGenre == BibTexUtil.Genre.book || bibGenre == BibTexUtil.Genre.proceedings) { mds.setTotalNumberOfPages(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("pages").toString()), false)); } else { BibTexUtil.fillSourcePages(BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("pages").toString()), false), sourceVO); if (bibGenre == BibTexUtil.Genre.inproceedings && (fields.get("booktitle") == null || fields.get("booktitle").toString() == "") && (fields.get("event_name") != null && fields.get("event_name").toString() != "")) { sourceVO.setTitle(BibTexUtil.stripBraces(fields.get("event_name").toString(), false)); sourceVO.setGenre(Genre.PROCEEDINGS); } } } // Publishing info PublishingInfoVO publishingInfoVO = new PublishingInfoVO(); mds.setPublishingInfo(publishingInfoVO); // address if (fields.get("address") != null) { if (!(bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.incollection) && (sourceVO.getTitle() == null || sourceVO.getTitle() == null)) { publishingInfoVO.setPlace(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("address").toString()), false)); } else { if (sourceVO.getPublishingInfo() == null) { PublishingInfoVO sourcePublishingInfoVO = new PublishingInfoVO(); sourceVO.setPublishingInfo(sourcePublishingInfoVO); } sourceVO.getPublishingInfo().setPlace(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("address").toString()), false)); } } // edition if (fields.get("edition") != null) { publishingInfoVO.setEdition(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("edition").toString()), false)); } // publisher if (!(bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.incollection) && (sourceVO.getTitle() == null || sourceVO.getTitle() == null)) { if (fields.get("publisher") != null) { publishingInfoVO.setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("publisher").toString()), false)); } else if (fields.get("school") != null && (bibGenre == BibTexUtil.Genre.mastersthesis || bibGenre == BibTexUtil.Genre.phdthesis || bibGenre == BibTexUtil.Genre.techreport)) { publishingInfoVO.setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("school").toString()), false)); } else if (fields.get("institution") != null) { publishingInfoVO.setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("institution").toString()), false)); } else if (fields.get("publisher") == null && fields.get("school") == null && fields.get("institution") == null && fields.get("address") != null) { publishingInfoVO.setPublisher("ANY PUBLISHER"); } } else { if (sourceVO.getPublishingInfo() == null) { PublishingInfoVO sourcePublishingInfoVO = new PublishingInfoVO(); sourceVO.setPublishingInfo(sourcePublishingInfoVO); } if (fields.get("publisher") != null) { sourceVO.getPublishingInfo().setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("publisher").toString()), false)); } else if (fields.get("school") != null && (bibGenre == BibTexUtil.Genre.mastersthesis || bibGenre == BibTexUtil.Genre.phdthesis || bibGenre == BibTexUtil.Genre.techreport)) { sourceVO.getPublishingInfo().setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("school").toString()), false)); } else if (fields.get("institution") != null) { sourceVO.getPublishingInfo().setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("institution").toString()), false)); } else if (fields.get("publisher") == null && fields.get("school") == null && fields.get("institution") == null && fields.get("address") != null) { sourceVO.getPublishingInfo().setPublisher("ANY PUBLISHER"); } } // series if (fields.get("series") != null) { if (bibGenre == BibTexUtil.Genre.book || bibGenre == BibTexUtil.Genre.misc || bibGenre == BibTexUtil.Genre.techreport) { sourceVO.setTitle(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("series").toString()), false)); sourceVO.setGenre(SourceVO.Genre.SERIES); } else if (bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference) { secondSourceVO.setTitle(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("series").toString()), false)); secondSourceVO.setGenre(SourceVO.Genre.SERIES); } } // type --> degree if (fields.get("type") != null && bibGenre == BibTexUtil.Genre.mastersthesis) { if (fields.get("type").toString().toLowerCase().contains("master") || fields.get("type").toString().toLowerCase().contains("m.a.") || fields.get("type").toString().toLowerCase().contains("m.s.") || fields.get("type").toString().toLowerCase().contains("m.sc.")) { mds.setDegree(MdsPublicationVO.DegreeType.MASTER); } else if (fields.get("type").toString().toLowerCase().contains("bachelor")) { mds.setDegree(MdsPublicationVO.DegreeType.BACHELOR); } else if (fields.get("type").toString().toLowerCase().contains("magister")) { mds.setDegree(MdsPublicationVO.DegreeType.MAGISTER); } else if (fields.get("type").toString().toLowerCase().contains("diplom")) // covers also // the english // version // (diploma) { mds.setDegree(MdsPublicationVO.DegreeType.DIPLOMA); } else if (fields.get("type").toString().toLowerCase().contains("statsexamen") || fields.get("type").toString().toLowerCase().contains("state examination")) { mds.setDegree(MdsPublicationVO.DegreeType.DIPLOMA); } } else if (fields.get("type") != null && bibGenre == BibTexUtil.Genre.phdthesis) { if (fields.get("type").toString().toLowerCase().contains("phd") || fields.get("type").toString().toLowerCase().contains("dissertation") || fields.get("type").toString().toLowerCase().contains("doktor") || fields.get("type").toString().toLowerCase().contains("doctor")) { mds.setDegree(MdsPublicationVO.DegreeType.PHD); } else if (fields.get("type").toString().toLowerCase().contains("habilitation")) { mds.setDegree(MdsPublicationVO.DegreeType.HABILITATION); } } // volume if (fields.get("volume") != null) { if (bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.book) { sourceVO.setVolume(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("volume").toString()), false)); } else if (bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.conference) { if (sourceVO.getSources() != null && !sourceVO.getSources().isEmpty()) { sourceVO.getSources().get(0).setVolume(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("volume").toString()), false)); } else { sourceVO.setVolume(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("volume").toString()), false)); } } } // event infos if (bibGenre != null && (bibGenre.equals(BibTexUtil.Genre.inproceedings) || bibGenre.equals(BibTexUtil.Genre.proceedings) || bibGenre.equals(BibTexUtil.Genre.conference) || bibGenre.equals(BibTexUtil.Genre.poster) || bibGenre.equals(BibTexUtil.Genre.talk))) { EventVO event = new EventVO(); boolean eventNotEmpty = false; // event location if (fields.get("location") != null) { event.setPlace(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("location").toString()), false)); eventNotEmpty = true; } // event place else if (fields.get("event_place") != null) { event.setPlace(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("event_place").toString()), false)); eventNotEmpty = true; } // event name/title if (fields.get("event_name") != null) { event.setTitle(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("event_name").toString()), false)); eventNotEmpty = true; } // event will be set only it's not empty if (eventNotEmpty == true) { if (event.getTitle() == null) { event.setTitle(""); } mds.setEvent(event); } } // year, month String dateString = null; if (fields.get("year") != null) { dateString = BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("year").toString()), false); if (fields.get("month") != null) { String month = BibTexUtil.parseMonth(fields.get("month").toString()); dateString += "-" + month; } if (bibGenre == BibTexUtil.Genre.unpublished) { mds.setDateCreated(dateString); } else { mds.setDatePublishedInPrint(dateString); } } String affiliation = null; String affiliationAddress = null; // affiliation if (fields.get("affiliation") != null) { affiliation = BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("affiliation").toString()), false); } // affiliationaddress if (fields.get("affiliationaddress") != null) { affiliationAddress = BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("affiliationaddress").toString()), false); } // author boolean noConeAuthorFound = true; if (fields.get("author") != null) { if (fields.get("author") instanceof BibtexPersonList) { BibtexPersonList authors = (BibtexPersonList) fields.get("author"); for (Object author : authors.getList()) { if (author instanceof BibtexPerson) { addCreator(mds, (BibtexPerson) author, CreatorVO.CreatorRole.AUTHOR, affiliation, affiliationAddress); } else { this.logger.warn("Entry in BibtexPersonList not a BibtexPerson: [" + author + "] in [" + author + "]"); } } } else if (fields.get("author") instanceof BibtexPerson) { BibtexPerson author = (BibtexPerson) fields.get("author"); addCreator(mds, (BibtexPerson) author, CreatorVO.CreatorRole.AUTHOR, affiliation, affiliationAddress); } else if (fields.get("author") instanceof BibtexString) { AuthorDecoder decoder; try { String authorString = BibTexUtil.bibtexDecode(fields.get("author").toString(), false); List<CreatorVO> teams = new ArrayList<CreatorVO>(); if (authorString.contains("Team")) { // set pattern for finding Teams (leaded or followed by [and|,|;|{|}|^|$]) Pattern pattern = Pattern.compile( "(?<=(and|,|;|\\{|^))([\\w|\\s]*?Team[\\w|\\s]*?)(?=(and|,|;|\\}|$))", Pattern.DOTALL); Matcher matcher = pattern.matcher(authorString); String matchedGroup; while (matcher.find()) { matchedGroup = matcher.group(); // remove matchedGroup (and prefix/suffix) from authorString if (authorString.startsWith(matchedGroup)) { authorString = authorString.replaceAll(matchedGroup + "(and|,|;|\\})", ""); } else { authorString = authorString.replaceAll("(and|,|;|\\{)" + matchedGroup, ""); } // set matchedGroup as Organisation Author OrganizationVO team = new OrganizationVO(); team.setName(matchedGroup.trim()); CreatorVO creatorVO = new CreatorVO(team, CreatorVO.CreatorRole.AUTHOR); teams.add(creatorVO); } } decoder = new AuthorDecoder(authorString, false); if (decoder.getBestFormat() != null) { List<Author> authors = decoder.getAuthorListList().get(0); for (Author author : authors) { PersonVO personVO = new PersonVO(); personVO.setFamilyName(author.getSurname()); if (author.getGivenName() != null) { personVO.setGivenName(author.getGivenName()); } else { personVO.setGivenName(author.getInitial()); } /* * Case for MPI-KYB (Biological Cybernetics) with CoNE identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ if (configuration != null && "true".equals(configuration.get("CoNE")) && ("identifier and affiliation in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (author.getTags().get("identifier") != null)) { String query = author.getTags().get("identifier"); int affiliationsCount = Integer .parseInt(author.getTags().get("affiliationsCount")); if (affiliationsCount > 0 || configuration.get("OrganizationalUnit") != null) { for (int ouCount = 0; ouCount < (affiliationsCount > 0 ? affiliationsCount : 1); ouCount++) // 1 // is // for // the // case // configuration.get("OrganizationalUnit") // != // null { String organizationalUnit = (author.getTags().get( "affiliation" + new Integer(ouCount).toString()) != null ? author.getTags() .get("affiliation" + new Integer(ouCount).toString()) : (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneEntries = null; if (query.equals(author.getTags().get("identifier"))) { coneEntries = Util.queryConeExactWithIdentifier("persons", query, organizationalUnit); // for MPIKYB due to OUs which do not occur in CoNE if (coneEntries.getFirstChild().getFirstChild() == null) { logger.error("No Person with Identifier (" + author.getTags().get("identifier") + ") and OU (" + organizationalUnit + ") found in CoNE for Publication \"" + fields.get("title") + "\""); } } else { coneEntries = Util.queryConeExact("persons", query, organizationalUnit); } Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position" .equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild() .getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild() .getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild() .getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild() .getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild() .getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(name); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } } /* * Case for MPI-Microstructure Physics with affiliation identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("affiliation id in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (author.getTags().get("identifier") != null)) { String identifier = author.getTags().get("identifier"); String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); if (!("extern".equals(identifier))) { Node coneEntries = null; coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); if (identifier != null && !("".equals(identifier))) { try { String ouSubTitle = identifier.substring(0, identifier.indexOf(",")); Document document = Util.queryFramework( "/oum/organizational-units?query=" + URLEncoder.encode("\"/title\"=\"" + ouSubTitle + "\"", "UTF-8")); NodeList ouList = document.getElementsByTagNameNS( "http://www.escidoc.de/schemas/organizationalunit/0.8", "organizational-unit"); Element ou = (Element) ouList.item(0); String href = ou.getAttribute("xlink:href"); String ouId = href .substring(href.lastIndexOf("/") + 1); OrganizationVO org = new OrganizationVO(); org.setName(identifier); org.setIdentifier(ouId); personVO.getOrganizations().add(org); } catch (Exception e) { logger.error("Error getting OUs", e); throw new RuntimeException( "Error getting Organizational Unit for " + identifier); } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("empty brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors")) && (author.getTags().get("brackets") != null))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(name); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("no".equals(configuration.get("CurlyBracketsForCoNEAuthors")))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(name); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } } if (affiliation != null) { OrganizationVO organization = new OrganizationVO(); organization.setIdentifier(PropertyReader .getProperty("escidoc.pubman.external.organisation.id")); organization.setName(affiliation); organization.setAddress(affiliationAddress); personVO.getOrganizations().add(organization); } CreatorVO creatorVO = new CreatorVO(personVO, CreatorVO.CreatorRole.AUTHOR); mds.getCreators().add(creatorVO); } } if (!teams.isEmpty()) { mds.getCreators().addAll(teams); } } catch (Exception e) { this.logger.error("An error occured while getting field 'author'.", e); throw new RuntimeException(e); } } } // editor boolean noConeEditorFound = false; if (fields.get("editor") != null) { this.logger.debug("fields.get(\"editor\"): " + fields.get("editor").getClass()); if (fields.get("editor") instanceof BibtexPersonList) { BibtexPersonList editors = (BibtexPersonList) fields.get("editor"); for (Object editor : editors.getList()) { if (editor instanceof BibtexPerson) { addCreator(mds, (BibtexPerson) editor, CreatorVO.CreatorRole.EDITOR, affiliation, affiliationAddress); } else { this.logger.warn("Entry in BibtexPersonList not a BibtexPerson: [" + editor + "] in [" + editors + "]"); } } } else if (fields.get("editor") instanceof BibtexPerson) { BibtexPerson editor = (BibtexPerson) fields.get("editor"); addCreator(mds, (BibtexPerson) editor, CreatorVO.CreatorRole.EDITOR, affiliation, affiliationAddress); } else if (fields.get("editor") instanceof BibtexString) { AuthorDecoder decoder; try { String editorString = BibTexUtil.bibtexDecode(fields.get("editor").toString(), false); List<CreatorVO> teams = new ArrayList<CreatorVO>(); if (editorString.contains("Team")) { // set pattern for finding Teams (leaded or followed by [and|,|;|{|}|^|$]) Pattern pattern = Pattern.compile( "(?<=(and|,|;|\\{|^))([\\w|\\s]*?Team[\\w|\\s]*?)(?=(and|,|;|\\}|$))", Pattern.DOTALL); Matcher matcher = pattern.matcher(editorString); String matchedGroup; while (matcher.find()) { matchedGroup = matcher.group(); // remove matchedGroup (and prefix/suffix) from authorString if (editorString.startsWith(matchedGroup)) { editorString = editorString.replaceAll(matchedGroup + "(and|,|;|\\})", ""); } else { editorString = editorString.replaceAll("(and|,|;|\\{)" + matchedGroup, ""); } // set matchedGroup as Organisation Author OrganizationVO team = new OrganizationVO(); team.setName(matchedGroup.trim()); CreatorVO creatorVO = new CreatorVO(team, CreatorVO.CreatorRole.EDITOR); teams.add(creatorVO); } } decoder = new AuthorDecoder(editorString, false); if (decoder.getBestFormat() != null) { List<Author> editors = decoder.getAuthorListList().get(0); for (Author editor : editors) { PersonVO personVO = new PersonVO(); personVO.setFamilyName(editor.getSurname()); if (editor.getGivenName() != null) { personVO.setGivenName(editor.getGivenName()); } else { personVO.setGivenName(editor.getInitial()); } /* * Case for MPI-KYB (Biological Cybernetics) with CoNE identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ if (configuration != null && "true".equals(configuration.get("CoNE")) && ("identifier and affiliation in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (editor.getTags().get("identifier") != null)) { String query = editor.getTags().get("identifier"); int affiliationsCount = Integer .parseInt(editor.getTags().get("affiliationsCount")); if (affiliationsCount > 0 || configuration.get("OrganizationalUnit") != null) { for (int ouCount = 0; ouCount < (affiliationsCount > 0 ? affiliationsCount : 1); ouCount++) // 1 // is // for // the // case // configuration.get("OrganizationalUnit") // != // null { String organizationalUnit = (editor.getTags().get( "affiliation" + new Integer(ouCount).toString()) != null ? editor.getTags() .get("affiliation" + new Integer(ouCount).toString()) : (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneEntries = null; if (query.equals(editor.getTags().get("identifier"))) { coneEntries = Util.queryConeExactWithIdentifier("persons", query, organizationalUnit); // for MPIKYB due to OUs which do not occur in CoNE if (coneEntries.getFirstChild().getFirstChild() == null) { logger.error("No Person with Identifier (" + editor.getTags().get("identifier") + ") and OU (" + organizationalUnit + ") found in CoNE for Publication \"" + fields.get("title") + "\""); } } else { coneEntries = Util.queryConeExact("persons", query, organizationalUnit); } Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeEditorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position" .equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild() .getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild() .getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild() .getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild() .getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild() .getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(name); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } } /* * Case for MPI-Microstructure Physics with affiliation identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("affiliation id in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (editor.getTags().get("identifier") != null)) { String identifier = editor.getTags().get("identifier"); String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); if (!("extern".equals(identifier))) { Node coneEntries = null; coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); if (identifier != null && !("".equals(identifier))) { try { String ouSubTitle = identifier.substring(0, identifier.indexOf(",")); Document document = Util.queryFramework( "/oum/organizational-units?query=" + URLEncoder.encode("\"/title\"=\"" + ouSubTitle + "\"", "UTF-8")); NodeList ouList = document.getElementsByTagNameNS( "http://www.escidoc.de/schemas/organizationalunit/0.8", "organizational-unit"); Element ou = (Element) ouList.item(0); String href = ou.getAttribute("xlink:href"); String ouId = href .substring(href.lastIndexOf("/") + 1); OrganizationVO org = new OrganizationVO(); org.setName(identifier); org.setIdentifier(ouId); personVO.getOrganizations().add(org); } catch (Exception e) { logger.error("Error getting OUs", e); throw new RuntimeException( "Error getting Organizational Unit for " + identifier); } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("empty brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors")) && (editor.getTags().get("brackets") != null))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeEditorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(name); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("no".equals(configuration.get("CurlyBracketsForCoNEAuthors")))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeEditorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(name); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } } if (affiliation != null) { OrganizationVO organization = new OrganizationVO(); organization.setIdentifier(PropertyReader .getProperty("escidoc.pubman.external.organisation.id")); organization.setName(affiliation); organization.setAddress(affiliationAddress); personVO.getOrganizations().add(organization); } CreatorVO creatorVO = new CreatorVO(personVO, CreatorVO.CreatorRole.EDITOR); if ((bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.incollection) && (sourceVO.getTitle() != null || sourceVO.getTitle() == null)) { sourceVO.getCreators().add(creatorVO); } else { mds.getCreators().add(creatorVO); } } } if (!teams.isEmpty()) { mds.getCreators().addAll(teams); } } catch (Exception e) { this.logger.error("An error occured while getting field 'editor'.", e); throw new RuntimeException(e); } } } // No CoNE Author or Editor Found if (noConeAuthorFound == true && noConeEditorFound == true && configuration != null && "true".equals(configuration.get("CoNE"))) { throw new RuntimeException("No CoNE-Author and no CoNE-Editor was found"); } // If no affiliation is given, set the first author to "external" boolean affiliationFound = false; for (CreatorVO creator : mds.getCreators()) { if (creator.getPerson() != null && creator.getPerson().getOrganizations() != null) { for (OrganizationVO organization : creator.getPerson().getOrganizations()) { if (organization.getIdentifier() != null) { affiliationFound = true; break; } } } } if (!affiliationFound && mds.getCreators().size() > 0) { OrganizationVO externalOrganization = new OrganizationVO(); externalOrganization.setName("External Organizations"); try { externalOrganization.setIdentifier( PropertyReader.getProperty("escidoc.pubman.external.organisation.id")); } catch (Exception e) { throw new RuntimeException("Property escidoc.pubman.external.organisation.id not found", e); } if (mds.getCreators().get(0).getPerson() != null) { mds.getCreators().get(0).getPerson().getOrganizations().add(externalOrganization); } } // Mapping of "common" (maybe relevant), non standard BibTeX Entries // abstract if (fields.get("abstract") != null) { mds.getAbstracts().add(new AbstractVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("abstract").toString()), false))); } // contents if (fields.get("contents") != null) { mds.setTableOfContents(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("contents").toString()), false)); } // isbn if (fields.get("isbn") != null) { if (bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.conference) { if (sourceVO != null) { sourceVO.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISBN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("isbn").toString()), false))); } } else { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISBN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("isbn").toString()), false))); } } // issn if (fields.get("issn") != null) { if (bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.conference) { if (sourceVO.getSources() != null && !sourceVO.getSources().isEmpty()) { sourceVO.getSources().get(0).getIdentifiers() .add(new IdentifierVO(IdentifierVO.IdType.ISSN, BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("issn").toString()), false))); } } else if (bibGenre == BibTexUtil.Genre.article) { if (sourceVO != null) { sourceVO.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISSN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("issn").toString()), false))); } } else { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISSN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("issn").toString()), false))); } } // keywords if (fields.get("keywords") != null) { mds.setFreeKeywords(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("keywords").toString()), false)); } // language /* * if (fields.get("language") != null) { * mds.getLanguages().add(BibTexUtil.stripBraces(BibTexUtil * .bibtexDecode(fields.get("language").toString ()), false)); } */ // subtitle if (fields.get("subtitle") != null) { mds.getAlternativeTitles().add(new AlternativeTitleVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("subtitle").toString()), false))); } // url is now mapped to locator if (fields.get("url") != null) { // mds.getIdentifiers().add( // new IdentifierVO( // IdentifierVO.IdType.URI, // BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("url").toString()), false))); FileVO locator = new FileVO(); locator.setContent( BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("url").toString()), false)); locator.setName("Link"); locator.setStorage(FileVO.Storage.EXTERNAL_URL); locator.setVisibility(FileVO.Visibility.PUBLIC); locator.setContentCategory( "http://purl.org/escidoc/metadata/ves/content-categories/any-fulltext"); MdsFileVO metadata = new MdsFileVO(); metadata.setContentCategory( "http://purl.org/escidoc/metadata/ves/content-categories/any-fulltext"); metadata.setTitle("Link"); locator.getMetadataSets().add(metadata); itemVO.getFiles().add(locator); } // web_url as URI-Identifier else if (fields.get("web_url") != null) { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.URI, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("web_url").toString()), false))); } // Prevent the creation of an empty source if (sourceVO.getTitle() != null && sourceVO.getTitle() != null && sourceVO.getTitle() != "" && sourceVO.getGenre() != null) { mds.getSources().add(sourceVO); // Prevent the creation of an empty second if (sourceVO.getSources() != null && !sourceVO.getSources().isEmpty() && sourceVO.getSources().get(0) != null && sourceVO.getSources().get(0).getTitle() != null && sourceVO.getSources().get(0).getTitle() != null && sourceVO.getSources().get(0).getTitle() != "") { mds.getSources().add(sourceVO.getSources().get(0)); } } // Prevent the creation of an empty second source if (secondSourceVO.getTitle() != null && secondSourceVO.getTitle() != null && secondSourceVO.getTitle() != "" && secondSourceVO.getGenre() != null) { mds.getSources().add(secondSourceVO); // Prevent the creation of an empty second if (secondSourceVO.getSources() != null && !secondSourceVO.getSources().isEmpty() && secondSourceVO.getSources().get(0) != null && secondSourceVO.getSources().get(0).getTitle() != null && secondSourceVO.getSources().get(0).getTitle() != null && secondSourceVO.getSources().get(0).getTitle() != "") { mds.getSources().add(secondSourceVO.getSources().get(0)); } } // New mapping for MPIS // DOI if (fields.get("doi") != null) { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.DOI, BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("doi").toString()), false))); } // eid if (fields.get("eid") != null) { if (mds.getSources().size() == 1) { mds.getSources().get(0).setSequenceNumber(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("eid").toString()), false)); } } // rev if (fields.get("rev") != null) { if ("Peer".equals( BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("rev").toString()), false))) { mds.setReviewMethod(ReviewMethod.PEER); } else if ("No review".equals( BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("rev").toString()), false))) { mds.setReviewMethod(ReviewMethod.NO_REVIEW); } } // MPG-Affil if (fields.get("MPG-Affil") != null) { if ("Peer".equals(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("MPG-Affil").toString()), false))) { // TODO } } // MPIS Groups if (fields.get("group") != null) { String[] groups = BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("group").toString()), false).split(","); for (String group : groups) { group = group.trim(); if (!"".equals(group)) { if (groupSet == null) { try { groupSet = loadGroupSet(); } catch (Exception e) { throw new RuntimeException(e); } } if (!groupSet.contains(group)) { throw new RuntimeException("Group '" + group + "' not found."); } mds.getSubjects() .add(new SubjectVO(group, null, SubjectClassification.MPIS_GROUPS.toString())); } } } // MPIS Projects if (fields.get("project") != null) { String[] projects = BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("project").toString()), false) .split(","); for (String project : projects) { project = project.trim(); if (!"".equals(project)) { if (projectSet == null) { try { projectSet = loadProjectSet(); } catch (Exception e) { throw new RuntimeException(e); } } if (!projectSet.contains(project)) { throw new RuntimeException("Project '" + project + "' not found."); } mds.getSubjects().add( new SubjectVO(project, null, SubjectClassification.MPIS_PROJECTS.toString())); } } } // Cite Key mds.getIdentifiers().add(new IdentifierVO(IdType.BIBTEX_CITEKEY, entry.getEntryKey())); } else if (object instanceof BibtexToplevelComment) { this.logger.debug("Comment found: " + ((BibtexToplevelComment) object).getContent()); } } XmlTransforming xmlTransforming = new XmlTransformingBean(); try { if (entryFound) { return xmlTransforming.transformToItem(itemVO); } else { this.logger.warn("No entry found in BibTex record."); throw new RuntimeException(); } } catch (TechnicalException e) { this.logger.error("An error ocurred while transforming the item."); throw new RuntimeException(e); } }
From source file:de.mpg.escidoc.services.transformation.transformations.commonPublicationFormats.Bibtex.java
/** * @param bibtex/*from w w w. j a v a 2 s.c o m*/ * @return eSciDoc-publication item XML representation of this BibTeX entry * @throws RuntimeException */ public String getBibtex(String bibtex) throws RuntimeException { // Remove Math '$' from the whole BibTex-String Pattern mathPattern = Pattern.compile("(?sm)\\$(\\\\.*?)(?<!\\\\)\\$"); Matcher mathMatcher = mathPattern.matcher(bibtex); StringBuffer sb = new StringBuffer(); while (mathMatcher.find()) { mathMatcher.appendReplacement(sb, "$1"); } mathMatcher.appendTail(sb); bibtex = sb.toString(); BibtexParser parser = new BibtexParser(true); BibtexFile file = new BibtexFile(); try { parser.parse(file, new StringReader(bibtex)); } catch (Exception e) { this.logger.error("Error parsing BibTex record."); throw new RuntimeException(e); } PubItemVO itemVO = new PubItemVO(); MdsPublicationVO mds = new MdsPublicationVO(); itemVO.setMetadata(mds); List entries = file.getEntries(); boolean entryFound = false; if (entries == null || entries.size() == 0) { this.logger.warn("No entry found in BibTex record."); throw new RuntimeException(); } for (Object object : entries) { if (object instanceof BibtexEntry) { if (entryFound) { this.logger.error("Multiple entries in BibTex record."); throw new RuntimeException(); } entryFound = true; BibtexEntry entry = (BibtexEntry) object; // genre BibTexUtil.Genre bibGenre; try { bibGenre = BibTexUtil.Genre.valueOf(entry.getEntryType()); } catch (IllegalArgumentException iae) { bibGenre = BibTexUtil.Genre.misc; this.logger.warn("Unrecognized genre: " + entry.getEntryType()); } MdsPublicationVO.Genre itemGenre = BibTexUtil.getGenreMapping().get(bibGenre); mds.setGenre(itemGenre); SourceVO sourceVO = new SourceVO(new TextVO()); SourceVO secondSourceVO = new SourceVO(new TextVO()); Map fields = entry.getFields(); // Mapping of BibTeX Standard Entries // title if (fields.get("title") != null) { if (fields.get("chapter") != null) { mds.setTitle(new TextVO(BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("chapter").toString()), false) + " - " + BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("title").toString()), false))); } else { mds.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("title").toString()), false))); } } // booktitle if (fields.get("booktitle") != null) { if (bibGenre == BibTexUtil.Genre.book) { mds.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("booktitle").toString()), false))); } else if (bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.inproceedings) { sourceVO.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("booktitle").toString()), false))); if (bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.inproceedings) { sourceVO.setGenre(Genre.PROCEEDINGS); } else if (bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection) { sourceVO.setGenre(Genre.BOOK); } } } // fjournal, journal if (fields.get("fjournal") != null) { if (bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.misc || bibGenre == BibTexUtil.Genre.unpublished) { sourceVO.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("fjournal").toString()), false))); sourceVO.setGenre(SourceVO.Genre.JOURNAL); if (fields.get("journal") != null) { sourceVO.getAlternativeTitles().add(new TextVO(BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("journal").toString()), false))); } } } else if (fields.get("journal") != null) { if (bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.misc || bibGenre == BibTexUtil.Genre.unpublished || bibGenre == BibTexUtil.Genre.inproceedings) { sourceVO.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("journal").toString()), false))); sourceVO.setGenre(SourceVO.Genre.JOURNAL); } } // number if (fields.get("number") != null && bibGenre != BibTexUtil.Genre.techreport) { sourceVO.setIssue(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("number").toString()), false)); } else if (fields.get("number") != null && bibGenre == BibTexUtil.Genre.techreport) { { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.REPORT_NR, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("number").toString()), false))); } } // pages if (fields.get("pages") != null) { if (bibGenre == BibTexUtil.Genre.book || bibGenre == BibTexUtil.Genre.proceedings) { mds.setTotalNumberOfPages(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("pages").toString()), false)); } else { BibTexUtil.fillSourcePages(BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("pages").toString()), false), sourceVO); if (bibGenre == BibTexUtil.Genre.inproceedings && (fields.get("booktitle") == null || fields.get("booktitle").toString() == "") && (fields.get("event_name") != null && fields.get("event_name").toString() != "")) { sourceVO.setTitle( new TextVO(BibTexUtil.stripBraces(fields.get("event_name").toString(), false))); sourceVO.setGenre(Genre.PROCEEDINGS); } } } // Publishing info PublishingInfoVO publishingInfoVO = new PublishingInfoVO(); mds.setPublishingInfo(publishingInfoVO); // address if (fields.get("address") != null) { if (!(bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.incollection) && (sourceVO.getTitle() == null || sourceVO.getTitle().getValue() == null)) { publishingInfoVO.setPlace(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("address").toString()), false)); } else { if (sourceVO.getPublishingInfo() == null) { PublishingInfoVO sourcePublishingInfoVO = new PublishingInfoVO(); sourceVO.setPublishingInfo(sourcePublishingInfoVO); } sourceVO.getPublishingInfo().setPlace(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("address").toString()), false)); } } // edition if (fields.get("edition") != null) { publishingInfoVO.setEdition(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("edition").toString()), false)); } // publisher if (!(bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.incollection) && (sourceVO.getTitle() == null || sourceVO.getTitle().getValue() == null)) { if (fields.get("publisher") != null) { publishingInfoVO.setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("publisher").toString()), false)); } else if (fields.get("school") != null && (bibGenre == BibTexUtil.Genre.mastersthesis || bibGenre == BibTexUtil.Genre.phdthesis || bibGenre == BibTexUtil.Genre.techreport)) { publishingInfoVO.setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("school").toString()), false)); } else if (fields.get("institution") != null) { publishingInfoVO.setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("institution").toString()), false)); } else if (fields.get("publisher") == null && fields.get("school") == null && fields.get("institution") == null && fields.get("address") != null) { publishingInfoVO.setPublisher("ANY PUBLISHER"); } } else { if (sourceVO.getPublishingInfo() == null) { PublishingInfoVO sourcePublishingInfoVO = new PublishingInfoVO(); sourceVO.setPublishingInfo(sourcePublishingInfoVO); } if (fields.get("publisher") != null) { sourceVO.getPublishingInfo().setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("publisher").toString()), false)); } else if (fields.get("school") != null && (bibGenre == BibTexUtil.Genre.mastersthesis || bibGenre == BibTexUtil.Genre.phdthesis || bibGenre == BibTexUtil.Genre.techreport)) { sourceVO.getPublishingInfo().setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("school").toString()), false)); } else if (fields.get("institution") != null) { sourceVO.getPublishingInfo().setPublisher(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("institution").toString()), false)); } else if (fields.get("publisher") == null && fields.get("school") == null && fields.get("institution") == null && fields.get("address") != null) { sourceVO.getPublishingInfo().setPublisher("ANY PUBLISHER"); } } // series if (fields.get("series") != null) { if (bibGenre == BibTexUtil.Genre.book || bibGenre == BibTexUtil.Genre.misc || bibGenre == BibTexUtil.Genre.techreport) { sourceVO.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("series").toString()), false))); sourceVO.setGenre(SourceVO.Genre.SERIES); } else if (bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference) { secondSourceVO.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("series").toString()), false))); secondSourceVO.setGenre(SourceVO.Genre.SERIES); } } // type --> degree if (fields.get("type") != null && bibGenre == BibTexUtil.Genre.mastersthesis) { if (fields.get("type").toString().toLowerCase().contains("master") || fields.get("type").toString().toLowerCase().contains("m.a.") || fields.get("type").toString().toLowerCase().contains("m.s.") || fields.get("type").toString().toLowerCase().contains("m.sc.")) { mds.setDegree(MdsPublicationVO.DegreeType.MASTER); } else if (fields.get("type").toString().toLowerCase().contains("bachelor")) { mds.setDegree(MdsPublicationVO.DegreeType.BACHELOR); } else if (fields.get("type").toString().toLowerCase().contains("magister")) { mds.setDegree(MdsPublicationVO.DegreeType.MAGISTER); } else if (fields.get("type").toString().toLowerCase().contains("diplom")) // covers also the english // version (diploma) { mds.setDegree(MdsPublicationVO.DegreeType.DIPLOMA); } else if (fields.get("type").toString().toLowerCase().contains("statsexamen") || fields.get("type").toString().toLowerCase().contains("state examination")) { mds.setDegree(MdsPublicationVO.DegreeType.DIPLOMA); } } else if (fields.get("type") != null && bibGenre == BibTexUtil.Genre.phdthesis) { if (fields.get("type").toString().toLowerCase().contains("phd") || fields.get("type").toString().toLowerCase().contains("dissertation") || fields.get("type").toString().toLowerCase().contains("doktor") || fields.get("type").toString().toLowerCase().contains("doctor")) { mds.setDegree(MdsPublicationVO.DegreeType.PHD); } else if (fields.get("type").toString().toLowerCase().contains("habilitation")) { mds.setDegree(MdsPublicationVO.DegreeType.HABILITATION); } } // volume if (fields.get("volume") != null) { if (bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.book) { sourceVO.setVolume(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("volume").toString()), false)); } else if (bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.conference) { if (sourceVO.getSources() != null && !sourceVO.getSources().isEmpty()) { sourceVO.getSources().get(0).setVolume(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("volume").toString()), false)); } else { sourceVO.setVolume(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("volume").toString()), false)); } } } // event infos if (bibGenre != null && (bibGenre.equals(BibTexUtil.Genre.inproceedings) || bibGenre.equals(BibTexUtil.Genre.proceedings) || bibGenre.equals(BibTexUtil.Genre.conference) || bibGenre.equals(BibTexUtil.Genre.poster) || bibGenre.equals(BibTexUtil.Genre.talk))) { EventVO event = new EventVO(); boolean eventNotEmpty = false; // event location if (fields.get("location") != null) { event.setPlace(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("location").toString()), false))); eventNotEmpty = true; } // event place else if (fields.get("event_place") != null) { event.setPlace(new TextVO(BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("event_place").toString()), false))); eventNotEmpty = true; } // event name/title if (fields.get("event_name") != null) { event.setTitle(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("event_name").toString()), false))); eventNotEmpty = true; } // event will be set only it's not empty if (eventNotEmpty == true) { if (event.getTitle() == null) { event.setTitle(new TextVO()); } mds.setEvent(event); } } // year, month String dateString = null; if (fields.get("year") != null) { dateString = BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("year").toString()), false); if (fields.get("month") != null) { String month = BibTexUtil.parseMonth(fields.get("month").toString()); dateString += "-" + month; } if (bibGenre == BibTexUtil.Genre.unpublished) { mds.setDateCreated(dateString); } else { mds.setDatePublishedInPrint(dateString); } } String affiliation = null; String affiliationAddress = null; // affiliation if (fields.get("affiliation") != null) { affiliation = BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("affiliation").toString()), false); } // affiliationaddress if (fields.get("affiliationaddress") != null) { affiliationAddress = BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("affiliationaddress").toString()), false); } // author boolean noConeAuthorFound = true; if (fields.get("author") != null) { if (fields.get("author") instanceof BibtexPersonList) { BibtexPersonList authors = (BibtexPersonList) fields.get("author"); for (Object author : authors.getList()) { if (author instanceof BibtexPerson) { addCreator(mds, (BibtexPerson) author, CreatorVO.CreatorRole.AUTHOR, affiliation, affiliationAddress); } else { this.logger.warn("Entry in BibtexPersonList not a BibtexPerson: [" + author + "] in [" + author + "]"); } } } else if (fields.get("author") instanceof BibtexPerson) { BibtexPerson author = (BibtexPerson) fields.get("author"); addCreator(mds, (BibtexPerson) author, CreatorVO.CreatorRole.AUTHOR, affiliation, affiliationAddress); } else if (fields.get("author") instanceof BibtexString) { AuthorDecoder decoder; try { String authorString = BibTexUtil.bibtexDecode(fields.get("author").toString(), false); List<CreatorVO> teams = new ArrayList<CreatorVO>(); if (authorString.contains("Team")) { // set pattern for finding Teams (leaded or followed by [and|,|;|{|}|^|$]) Pattern pattern = Pattern.compile( "(?<=(and|,|;|\\{|^))([\\w|\\s]*?Team[\\w|\\s]*?)(?=(and|,|;|\\}|$))", Pattern.DOTALL); Matcher matcher = pattern.matcher(authorString); String matchedGroup; while (matcher.find()) { matchedGroup = matcher.group(); // remove matchedGroup (and prefix/suffix) from authorString if (authorString.startsWith(matchedGroup)) { authorString = authorString.replaceAll(matchedGroup + "(and|,|;|\\})", ""); } else { authorString = authorString.replaceAll("(and|,|;|\\{)" + matchedGroup, ""); } // set matchedGroup as Organisation Author OrganizationVO team = new OrganizationVO(); team.setName(new TextVO(matchedGroup.trim())); CreatorVO creatorVO = new CreatorVO(team, CreatorVO.CreatorRole.AUTHOR); teams.add(creatorVO); } } decoder = new AuthorDecoder(authorString, false); if (decoder.getBestFormat() != null) { List<Author> authors = decoder.getAuthorListList().get(0); for (Author author : authors) { PersonVO personVO = new PersonVO(); personVO.setFamilyName(author.getSurname()); if (author.getGivenName() != null) { personVO.setGivenName(author.getGivenName()); } else { personVO.setGivenName(author.getInitial()); } /* * Case for MPI-KYB (Biological Cybernetics) with CoNE identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ if (configuration != null && "true".equals(configuration.get("CoNE")) && ("identifier and affiliation in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (author.getTags().get("identifier") != null)) { String query = author.getTags().get("identifier"); int affiliationsCount = Integer .parseInt(author.getTags().get("affiliationsCount")); if (affiliationsCount > 0 || configuration.get("OrganizationalUnit") != null) { for (int ouCount = 0; ouCount < (affiliationsCount > 0 ? affiliationsCount : 1); ouCount++) // 1 // is // for // the // case // configuration.get("OrganizationalUnit") // != // null { String organizationalUnit = (author.getTags().get( "affiliation" + new Integer(ouCount).toString()) != null ? author.getTags() .get("affiliation" + new Integer(ouCount).toString()) : (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneEntries = null; if (query.equals(author.getTags().get("identifier"))) { coneEntries = Util.queryConeExactWithIdentifier("persons", query, organizationalUnit); // for MPIKYB due to OUs which do not occur in CoNE if (coneEntries.getFirstChild().getFirstChild() == null) { logger.error("No Person with Identifier (" + author.getTags().get("identifier") + ") and OU (" + organizationalUnit + ") found in CoNE for Publication \"" + fields.get("title") + "\""); } } else { coneEntries = Util.queryConeExact("persons", query, organizationalUnit); } Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position" .equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild() .getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild() .getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild() .getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild() .getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild() .getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(name)); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } } /* * Case for MPI-Microstructure Physics with affiliation identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("affiliation id in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (author.getTags().get("identifier") != null)) { String identifier = author.getTags().get("identifier"); String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); if (!("extern".equals(identifier))) { Node coneEntries = null; coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); if (identifier != null && !("".equals(identifier))) { try { String ouSubTitle = identifier.substring(0, identifier.indexOf(",")); Document document = Util.queryFramework( "/oum/organizational-units?query=" + URLEncoder.encode("\"/title\"=\"" + ouSubTitle + "\"", "UTF-8")); NodeList ouList = document.getElementsByTagNameNS( "http://www.escidoc.de/schemas/organizationalunit/0.8", "organizational-unit"); Element ou = (Element) ouList.item(0); String href = ou.getAttribute("xlink:href"); String ouId = href .substring(href.lastIndexOf("/") + 1); OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(identifier)); org.setIdentifier(ouId); personVO.getOrganizations().add(org); } catch (Exception e) { logger.error("Error getting OUs", e); throw new RuntimeException( "Error getting Organizational Unit for " + identifier); } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("empty brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors")) && (author.getTags().get("brackets") != null))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(name)); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("no".equals(configuration.get("CurlyBracketsForCoNEAuthors")))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(name)); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } } /* * Case for MPI-RA (Radio Astronomy) with identifier and affiliation in brackets * This Case is using NO CoNE! */ if (configuration != null && "false".equals(configuration.get("CoNE")) && ("identifier and affiliation in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (author.getTags().get("identifier") != null)) { String identifier = author.getTags().get("identifier"); String authoAffiliation = author.getTags().get("affiliation0"); OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(authoAffiliation)); org.setIdentifier(identifier); personVO.getOrganizations().add(org); } if (affiliation != null) { OrganizationVO organization = new OrganizationVO(); organization.setIdentifier(PropertyReader .getProperty("escidoc.pubman.external.organisation.id")); organization.setName(new TextVO(affiliation)); organization.setAddress(affiliationAddress); personVO.getOrganizations().add(organization); } CreatorVO creatorVO = new CreatorVO(personVO, CreatorVO.CreatorRole.AUTHOR); mds.getCreators().add(creatorVO); } } if (!teams.isEmpty()) { mds.getCreators().addAll(teams); } } catch (Exception e) { this.logger.error("An error occured while getting field 'author'.", e); throw new RuntimeException(e); } } } // editor boolean noConeEditorFound = false; if (fields.get("editor") != null) { this.logger.debug("fields.get(\"editor\"): " + fields.get("editor").getClass()); if (fields.get("editor") instanceof BibtexPersonList) { BibtexPersonList editors = (BibtexPersonList) fields.get("editor"); for (Object editor : editors.getList()) { if (editor instanceof BibtexPerson) { addCreator(mds, (BibtexPerson) editor, CreatorVO.CreatorRole.EDITOR, affiliation, affiliationAddress); } else { this.logger.warn("Entry in BibtexPersonList not a BibtexPerson: [" + editor + "] in [" + editors + "]"); } } } else if (fields.get("editor") instanceof BibtexPerson) { BibtexPerson editor = (BibtexPerson) fields.get("editor"); addCreator(mds, (BibtexPerson) editor, CreatorVO.CreatorRole.EDITOR, affiliation, affiliationAddress); } else if (fields.get("editor") instanceof BibtexString) { AuthorDecoder decoder; try { String editorString = BibTexUtil.bibtexDecode(fields.get("editor").toString(), false); List<CreatorVO> teams = new ArrayList<CreatorVO>(); if (editorString.contains("Team")) { // set pattern for finding Teams (leaded or followed by [and|,|;|{|}|^|$]) Pattern pattern = Pattern.compile( "(?<=(and|,|;|\\{|^))([\\w|\\s]*?Team[\\w|\\s]*?)(?=(and|,|;|\\}|$))", Pattern.DOTALL); Matcher matcher = pattern.matcher(editorString); String matchedGroup; while (matcher.find()) { matchedGroup = matcher.group(); // remove matchedGroup (and prefix/suffix) from authorString if (editorString.startsWith(matchedGroup)) { editorString = editorString.replaceAll(matchedGroup + "(and|,|;|\\})", ""); } else { editorString = editorString.replaceAll("(and|,|;|\\{)" + matchedGroup, ""); } // set matchedGroup as Organisation Author OrganizationVO team = new OrganizationVO(); team.setName(new TextVO(matchedGroup.trim())); CreatorVO creatorVO = new CreatorVO(team, CreatorVO.CreatorRole.EDITOR); teams.add(creatorVO); } } decoder = new AuthorDecoder(editorString, false); if (decoder.getBestFormat() != null) { List<Author> editors = decoder.getAuthorListList().get(0); for (Author editor : editors) { PersonVO personVO = new PersonVO(); personVO.setFamilyName(editor.getSurname()); if (editor.getGivenName() != null) { personVO.setGivenName(editor.getGivenName()); } else { personVO.setGivenName(editor.getInitial()); } /* * Case for MPI-KYB (Biological Cybernetics) with CoNE identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ if (configuration != null && "true".equals(configuration.get("CoNE")) && ("identifier and affiliation in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (editor.getTags().get("identifier") != null)) { String query = editor.getTags().get("identifier"); int affiliationsCount = Integer .parseInt(editor.getTags().get("affiliationsCount")); if (affiliationsCount > 0 || configuration.get("OrganizationalUnit") != null) { for (int ouCount = 0; ouCount < (affiliationsCount > 0 ? affiliationsCount : 1); ouCount++) // 1 // is // for // the // case // configuration.get("OrganizationalUnit") // != // null { String organizationalUnit = (editor.getTags().get( "affiliation" + new Integer(ouCount).toString()) != null ? editor.getTags() .get("affiliation" + new Integer(ouCount).toString()) : (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneEntries = null; if (query.equals(editor.getTags().get("identifier"))) { coneEntries = Util.queryConeExactWithIdentifier("persons", query, organizationalUnit); // for MPIKYB due to OUs which do not occur in CoNE if (coneEntries.getFirstChild().getFirstChild() == null) { logger.error("No Person with Identifier (" + editor.getTags().get("identifier") + ") and OU (" + organizationalUnit + ") found in CoNE for Publication \"" + fields.get("title") + "\""); } } else { coneEntries = Util.queryConeExact("persons", query, organizationalUnit); } Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeEditorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position" .equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild() .getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild() .getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild() .getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild() .getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild() .getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(name)); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } } /* * Case for MPI-Microstructure Physics with affiliation identifier in brackets and * affiliations to adopt from CoNE for each author (also in brackets) */ else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("affiliation id in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (editor.getTags().get("identifier") != null)) { String identifier = editor.getTags().get("identifier"); String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); if (!("extern".equals(identifier))) { Node coneEntries = null; coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeAuthorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier( new IdentifierVO(IdType.CONE, coneId)); if (identifier != null && !("".equals(identifier))) { try { String ouSubTitle = identifier.substring(0, identifier.indexOf(",")); Document document = Util.queryFramework( "/oum/organizational-units?query=" + URLEncoder.encode("\"/title\"=\"" + ouSubTitle + "\"", "UTF-8")); NodeList ouList = document.getElementsByTagNameNS( "http://www.escidoc.de/schemas/organizationalunit/0.8", "organizational-unit"); Element ou = (Element) ouList.item(0); String href = ou.getAttribute("xlink:href"); String ouId = href .substring(href.lastIndexOf("/") + 1); OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(identifier)); org.setIdentifier(ouId); personVO.getOrganizations().add(org); } catch (Exception e) { logger.error("Error getting OUs", e); throw new RuntimeException( "Error getting Organizational Unit for " + identifier); } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("empty brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors")) && (editor.getTags().get("brackets") != null))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeEditorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(name)); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } else { throw new RuntimeException("Missing CoNE entry for " + query); } } else if (configuration != null && "true".equals(configuration.get("CoNE")) && ("no".equals(configuration.get("CurlyBracketsForCoNEAuthors")))) { String query = personVO.getFamilyName() + ", " + personVO.getGivenName(); Node coneEntries = Util.queryConeExact("persons", query, (configuration.get("OrganizationalUnit") != null ? configuration.get("OrganizationalUnit") : "")); Node coneNode = coneEntries.getFirstChild().getFirstChild(); if (coneNode != null) { Node currentNode = coneNode.getFirstChild(); boolean first = true; while (currentNode != null) { if (currentNode.getNodeType() == Node.ELEMENT_NODE && first) { first = false; noConeEditorFound = false; Node coneEntry = currentNode; String coneId = coneEntry.getAttributes() .getNamedItem("rdf:about").getNodeValue(); personVO.setIdentifier(new IdentifierVO(IdType.CONE, coneId)); for (int i = 0; i < coneEntry.getChildNodes() .getLength(); i++) { Node posNode = coneEntry.getChildNodes().item(i); if ("escidoc:position".equals(posNode.getNodeName())) { String from = null; String until = null; String name = null; String id = null; Node node = posNode.getFirstChild().getFirstChild(); while (node != null) { if ("eprints:affiliatedInstitution" .equals(node.getNodeName())) { name = node.getFirstChild().getNodeValue(); } else if ("escidoc:start-date" .equals(node.getNodeName())) { from = node.getFirstChild().getNodeValue(); } else if ("escidoc:end-date" .equals(node.getNodeName())) { until = node.getFirstChild().getNodeValue(); } else if ("dc:identifier" .equals(node.getNodeName())) { id = node.getFirstChild().getNodeValue(); } node = node.getNextSibling(); } if (smaller(from, dateString) && smaller(dateString, until)) { OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(name)); org.setIdentifier(id); personVO.getOrganizations().add(org); } } } } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) { throw new RuntimeException( "Ambigous CoNE entries for " + query); } currentNode = currentNode.getNextSibling(); } } } /* * Case for MPI-RA (Radio Astronomy) with identifier and affiliation in brackets * This Case is using NO CoNE! */ if (configuration != null && "false".equals(configuration.get("CoNE")) && ("identifier and affiliation in brackets" .equals(configuration.get("CurlyBracketsForCoNEAuthors"))) && (editor.getTags().get("identifier") != null)) { String identifier = editor.getTags().get("identifier"); String authoAffiliation = editor.getTags().get("affiliation0"); OrganizationVO org = new OrganizationVO(); org.setName(new TextVO(authoAffiliation)); org.setIdentifier(identifier); personVO.getOrganizations().add(org); } if (affiliation != null) { OrganizationVO organization = new OrganizationVO(); organization.setIdentifier(PropertyReader .getProperty("escidoc.pubman.external.organisation.id")); organization.setName(new TextVO(affiliation)); organization.setAddress(affiliationAddress); personVO.getOrganizations().add(organization); } CreatorVO creatorVO = new CreatorVO(personVO, CreatorVO.CreatorRole.EDITOR); if ((bibGenre == BibTexUtil.Genre.article || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.conference || bibGenre == BibTexUtil.Genre.incollection) && (sourceVO.getTitle() != null || sourceVO.getTitle().getValue() == null)) { sourceVO.getCreators().add(creatorVO); } else { mds.getCreators().add(creatorVO); } } } if (!teams.isEmpty()) { mds.getCreators().addAll(teams); } } catch (Exception e) { this.logger.error("An error occured while getting field 'editor'.", e); throw new RuntimeException(e); } } } // No CoNE Author or Editor Found if (noConeAuthorFound == true && noConeEditorFound == true && configuration != null && "true".equals(configuration.get("CoNE"))) { throw new RuntimeException("No CoNE-Author and no CoNE-Editor was found"); } // If no affiliation is given, set the first author to "external" boolean affiliationFound = false; for (CreatorVO creator : mds.getCreators()) { if (creator.getPerson() != null && creator.getPerson().getOrganizations() != null) { for (OrganizationVO organization : creator.getPerson().getOrganizations()) { if (organization.getIdentifier() != null) { affiliationFound = true; break; } } } } if (!affiliationFound && mds.getCreators().size() > 0) { OrganizationVO externalOrganization = new OrganizationVO(); externalOrganization.setName(new TextVO("External Organizations")); try { externalOrganization.setIdentifier( PropertyReader.getProperty("escidoc.pubman.external.organisation.id")); } catch (Exception e) { throw new RuntimeException("Property escidoc.pubman.external.organisation.id not found", e); } if (mds.getCreators().get(0).getPerson() != null) { mds.getCreators().get(0).getPerson().getOrganizations().add(externalOrganization); } } // Mapping of "common" (maybe relevant), non standard BibTeX Entries // abstract if (fields.get("abstract") != null) { mds.getAbstracts().add(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("abstract").toString()), false))); } // contents if (fields.get("contents") != null) { mds.setTableOfContents(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("contents").toString()), false))); } // isbn if (fields.get("isbn") != null) { if (bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.conference) { if (sourceVO != null) { sourceVO.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISBN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("isbn").toString()), false))); } } else { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISBN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("isbn").toString()), false))); } } // issn if (fields.get("issn") != null) { if (bibGenre == BibTexUtil.Genre.inproceedings || bibGenre == BibTexUtil.Genre.inbook || bibGenre == BibTexUtil.Genre.incollection || bibGenre == BibTexUtil.Genre.conference) { if (sourceVO.getSources() != null && !sourceVO.getSources().isEmpty()) { sourceVO.getSources().get(0).getIdentifiers() .add(new IdentifierVO(IdentifierVO.IdType.ISSN, BibTexUtil.stripBraces( BibTexUtil.bibtexDecode(fields.get("issn").toString()), false))); } } else if (bibGenre == BibTexUtil.Genre.article) { if (sourceVO != null) { sourceVO.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISSN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("issn").toString()), false))); } } else { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.ISSN, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("issn").toString()), false))); } } // keywords if (fields.get("keywords") != null) { mds.setFreeKeywords(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("keywords").toString()), false))); } // language /* * if (fields.get("language") != null) { * mds.getLanguages().add(BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("language").toString * ()), false)); } */ // subtitle if (fields.get("subtitle") != null) { mds.getAlternativeTitles().add(new TextVO(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("subtitle").toString()), false))); } // url is now mapped to locator if (fields.get("url") != null) { // mds.getIdentifiers().add( // new IdentifierVO( // IdentifierVO.IdType.URI, // BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("url").toString()), false))); FileVO locator = new FileVO(); locator.setContent( BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("url").toString()), false)); locator.setName("Link"); locator.setStorage(FileVO.Storage.EXTERNAL_URL); locator.setVisibility(FileVO.Visibility.PUBLIC); locator.setContentCategory( "http://purl.org/escidoc/metadata/ves/content-categories/any-fulltext"); MdsFileVO metadata = new MdsFileVO(); metadata.setContentCategory( "http://purl.org/escidoc/metadata/ves/content-categories/any-fulltext"); metadata.setTitle(new TextVO("Link")); locator.getMetadataSets().add(metadata); itemVO.getFiles().add(locator); } // web_url as URI-Identifier else if (fields.get("web_url") != null) { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.URI, BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("web_url").toString()), false))); } // Prevent the creation of an empty source if (sourceVO.getTitle() != null && sourceVO.getTitle().getValue() != null && sourceVO.getTitle().getValue() != "" && sourceVO.getGenre() != null) { mds.getSources().add(sourceVO); // Prevent the creation of an empty second if (sourceVO.getSources() != null && !sourceVO.getSources().isEmpty() && sourceVO.getSources().get(0) != null && sourceVO.getSources().get(0).getTitle() != null && sourceVO.getSources().get(0).getTitle().getValue() != null && sourceVO.getSources().get(0).getTitle().getValue() != "") { mds.getSources().add(sourceVO.getSources().get(0)); } } // Prevent the creation of an empty second source if (secondSourceVO.getTitle() != null && secondSourceVO.getTitle().getValue() != null && secondSourceVO.getTitle().getValue() != "" && secondSourceVO.getGenre() != null) { mds.getSources().add(secondSourceVO); // Prevent the creation of an empty second if (secondSourceVO.getSources() != null && !secondSourceVO.getSources().isEmpty() && secondSourceVO.getSources().get(0) != null && secondSourceVO.getSources().get(0).getTitle() != null && secondSourceVO.getSources().get(0).getTitle().getValue() != null && secondSourceVO.getSources().get(0).getTitle().getValue() != "") { mds.getSources().add(secondSourceVO.getSources().get(0)); } } // New mapping for MPIS // DOI if (fields.get("doi") != null) { mds.getIdentifiers().add(new IdentifierVO(IdentifierVO.IdType.DOI, BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("doi").toString()), false))); } // eid if (fields.get("eid") != null) { if (mds.getSources().size() == 1) { mds.getSources().get(0).setSequenceNumber(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("eid").toString()), false)); } } // rev if (fields.get("rev") != null) { if ("Peer".equals( BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("rev").toString()), false))) { mds.setReviewMethod(ReviewMethod.PEER); } else if ("No review".equals( BibTexUtil.stripBraces(BibTexUtil.bibtexDecode(fields.get("rev").toString()), false))) { mds.setReviewMethod(ReviewMethod.NO_REVIEW); } } // MPG-Affil if (fields.get("MPG-Affil") != null) { if ("Peer".equals(BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("MPG-Affil").toString()), false))) { // TODO } } // MPIS Groups if (fields.get("group") != null) { String[] groups = BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("group").toString()), false).split(","); for (String group : groups) { group = group.trim(); if (!"".equals(group)) { if (groupSet == null) { try { groupSet = loadGroupSet(); } catch (Exception e) { throw new RuntimeException(e); } } if (!groupSet.contains(group)) { throw new RuntimeException("Group '" + group + "' not found."); } mds.getSubjects() .add(new TextVO(group, null, SubjectClassification.MPIS_GROUPS.toString())); } } } // MPIS Projects if (fields.get("project") != null) { String[] projects = BibTexUtil .stripBraces(BibTexUtil.bibtexDecode(fields.get("project").toString()), false) .split(","); for (String project : projects) { project = project.trim(); if (!"".equals(project)) { if (projectSet == null) { try { projectSet = loadProjectSet(); } catch (Exception e) { throw new RuntimeException(e); } } if (!projectSet.contains(project)) { throw new RuntimeException("Project '" + project + "' not found."); } mds.getSubjects() .add(new TextVO(project, null, SubjectClassification.MPIS_PROJECTS.toString())); } } } // Cite Key mds.getIdentifiers().add(new IdentifierVO(IdType.BIBTEX_CITEKEY, entry.getEntryKey())); } else if (object instanceof BibtexToplevelComment) { this.logger.debug("Comment found: " + ((BibtexToplevelComment) object).getContent()); } } XmlTransforming xmlTransforming = new XmlTransformingBean(); try { if (entryFound) { return xmlTransforming.transformToItem(itemVO); } else { this.logger.warn("No entry found in BibTex record."); throw new RuntimeException(); } } catch (TechnicalException e) { this.logger.error("An error ocurred while transforming the item."); throw new RuntimeException(e); } }
From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java
/** * executeHarvest For single-feed calls (note exception handling happens in * SAH)// www.j av a 2 s . c o m * * @param source * @param doc * @return * @throws ExtractorDocumentLevelException */ public boolean executeHarvest(HarvestContext context, SourcePojo source, DocumentPojo doc, boolean bFirstTime, boolean bMoreDocs) throws ExtractorDocumentLevelException { regexDuplicates = new HashSet<String>(); cleaner = null; boolean bGetRawDoc = source.getExtractType().equalsIgnoreCase("feed") && (null == doc.getFullText()); // (ie don't have full text and will need to go fetch it from network) if (bFirstTime) { nBetweenDocs_ms = -1; // (reset eg bewteen searchConfig and SAH) } if ((-1 == nBetweenDocs_ms) && bGetRawDoc && (bMoreDocs || bFirstTime)) { // (don't bother if not using it...) // Can override the default (feed) wait time from within the source // (eg for sites that we know // don't get upset about getting hammered) if (null != source.getRssConfig()) { if (null != source.getRssConfig().getWaitTimeOverride_ms()) { nBetweenDocs_ms = source.getRssConfig().getWaitTimeOverride_ms(); } } if (-1 == nBetweenDocs_ms) { // (ie not overridden so use default) PropertiesManager props = new PropertiesManager(); nBetweenDocs_ms = props.getWebCrawlWaitTime(); } } // TESTED (overridden and using system default) _context = context; securityManager = _context.getSecurityManager(); UnstructuredAnalysisConfigPojo uap = source.getUnstructuredAnalysisConfig(); int nChanges = 0; if (null != doc.getMetaData()) { nChanges = doc.getMetaData().size(); } boolean bFetchedUrl = false; if (bGetRawDoc) { if (null == source.getRssConfig()) { source.setRssConfig(new SourceRssConfigPojo()); // (makes logic easier down the road) } try { // Workaround for observed twitter bug (first access after the // RSS was gzipped) if (bFirstTime) { // (first time through, sleep following a URL/RSS access) if (null != source.getUrl()) { // (have already made a call to RSS (or "searchConfig" URL) try { Thread.sleep(nBetweenDocs_ms); } catch (InterruptedException e) { } } // TESTED } if ((null != source.useTextExtractor()) && source.useTextExtractor().equalsIgnoreCase("tika")) { // Special case: if tika enabled then do that first if (null == tikaExtractor) { tikaExtractor = new TextExtractorTika(); tikaExtractor.extractText(doc); } } else { getRawTextFromUrlIfNeeded(doc, source.getRssConfig()); } bFetchedUrl = true; } catch (SecurityException e) { // This seems worthy of actually logging, even though it's a lowly doc error _context.getHarvestStatus().logMessage(e.getMessage(), true); throw new ExtractorDocumentLevelException(e.getMessage()); } //TESTED catch (Exception e) { // Failed to get full text twice... remove doc and carry on throw new ExtractorDocumentLevelException(e.getMessage()); } } long nTime_ms = System.currentTimeMillis(); // ^^^ (end slight hack to get raw text to the UAH for RSS feeds) if (uap != null) { List<metaField> meta = uap.getMeta(); if (savedUap != uap) { String headerRegEx = uap.getHeaderRegEx(); String footerRegEx = uap.getFooterRegEx(); if (headerRegEx != null) headerPattern = Pattern.compile(headerRegEx, Pattern.DOTALL); if (footerRegEx != null) footerPattern = Pattern.compile(footerRegEx, Pattern.DOTALL); savedUap = uap; } try { processBody(doc, meta, true, source, uap); } catch (Exception e) { this._context.getHarvestStatus().logMessage("processBody1: " + e.getMessage(), true); //DEBUG (don't output log messages per doc) //logger.error("processBody1: " + e.getMessage(), e); } try { if (uap.getSimpleTextCleanser() != null) { cleanseText(uap.getSimpleTextCleanser(), doc); } } catch (Exception e) { this._context.getHarvestStatus().logMessage("cleanseText: " + e.getMessage(), true); //DEBUG (don't output log messages per doc) //logger.error("cleanseText: " + e.getMessage(), e); } try { processHeader(headerPattern, doc, meta, source, uap); processFooter(footerPattern, doc, meta, source, uap); } catch (Exception e) { this._context.getHarvestStatus().logMessage("header/footerPattern: " + e.getMessage(), true); //DEBUG (don't output log messages per doc) //logger.error("header/footerPattern: " + e.getMessage(), e); } try { processBody(doc, meta, false, source, uap); } catch (Exception e) { this._context.getHarvestStatus().logMessage("processBody2: " + e.getMessage(), true); //DEBUG (don't output log messages per doc) //logger.error("processBody2: " + e.getMessage(), e); } } if (bMoreDocs && bFetchedUrl) { nTime_ms = nBetweenDocs_ms - (System.currentTimeMillis() - nTime_ms); // (ie delay time - processing time) if (nTime_ms > 0) { try { Thread.sleep(nTime_ms); } catch (InterruptedException e) { } } } // (end politeness delay for URL getting from a single source (likely site) if (null != doc.getMetaData()) { if (nChanges != doc.getMetaData().size()) { return true; } } return false; }
From source file:org.eclipse.birt.report.engine.emitter.docx.writer.BasicComponent.java
private void writeHtmlText(IForeignContent foreignContent) throws EncoderException, UnsupportedEncodingException { mhtPartWriter.println();/*from ww w .j a v a2 s. c om*/ mhtPartWriter.println("--" + BOUNDARY); mhtPartWriter.println("Content-Type: text/html; charset=\"gb2312\""); mhtPartWriter.println("Content-Transfer-Encoding: quoted-printable"); mhtPartWriter.println(); StringBuffer htmlBuffer = new StringBuffer(); String foreignText = foreignContent.getRawValue().toString(); String headInformation = null; String htmlAttribute = null; Pattern pattern = Pattern.compile(validHtml, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); Matcher matcher = pattern.matcher(foreignText); if (matcher.find() && matcher.group(0).length() == foreignText.length()) { headInformation = matcher.group(1); htmlAttribute = matcher.group(2); foreignText = matcher.group(3); } mhtPartWriter.print("=EF=BB=BF"); if (headInformation != null) { htmlBuffer.append(headInformation + " "); } htmlBuffer.append("<html"); if (htmlAttribute != null) { htmlBuffer.append(" " + htmlAttribute); } htmlBuffer.append(">"); IStyle style = foreignContent.getComputedStyle(); buildStyleClass(style, htmlBuffer); buildHtmlBody(foreignContent, foreignText, style, htmlBuffer); mhtPartWriter.print("</html>"); }
From source file:org.geoserver.wms.wms_1_3.GetFeatureInfoIntegrationTest.java
@Test public void testXYCoverage() throws Exception { String layer = getLayerId(MockData.USA_WORLDIMG); String url = "wms?styles=&format=jpeg&info_format=text/plain&request=GetFeatureInfo&layers=" + layer + "&query_layers=" + layer + "&WIDTH=512&HEIGHT=408&X=75&Y=132&srs=epsg:4326"; String request = url + "&VERSION=1.1.1&BBOX=-180,-143.4375,180,143.4375"; String result = getAsString(request); Matcher m = Pattern/*from ww w . j a va2 s . com*/ .compile(".*RED_BAND = (\\d+\\.\\d+).*GREEN_BAND = (\\d+\\.\\d+).*BLUE_BAND = (\\d+\\.\\d+).*", Pattern.DOTALL) .matcher(result); assertTrue(m.matches()); double red = Double.parseDouble(m.group(1)); double green = Double.parseDouble(m.group(2)); double blue = Double.parseDouble(m.group(3)); request = url + "&VERSION=1.3.0&BBOX=-143.4375,-180,143.4375,180"; result = getAsString(request); m = Pattern.compile(".*RED_BAND = (\\d+\\.\\d+).*GREEN_BAND = (\\d+\\.\\d+).*BLUE_BAND = (\\d+\\.\\d+).*", Pattern.DOTALL).matcher(result); assertTrue(m.matches()); assertEquals(red, Double.parseDouble(m.group(1)), 0.0000001); assertEquals(green, Double.parseDouble(m.group(2)), 0.0000001); assertEquals(blue, Double.parseDouble(m.group(3)), 0.0000001); }