Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.marmotta.ucuenca.wk.provider.orcid; import org.apache.commons.lang3.StringUtils; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import java.io.IOException; import org.apache.marmotta.ldclient.api.endpoint.Endpoint; import org.apache.marmotta.ldclient.exception.DataRetrievalException; import org.apache.marmotta.ldclient.services.provider.AbstractHttpProvider; import org.openrdf.model.Model; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.InputStream; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.ConcurrentHashMap; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.Namespace; import org.jdom2.filter.ElementFilter; import org.jdom2.input.SAXBuilder; import org.jdom2.input.sax.XMLReaders; import org.jdom2.xpath.XPathFactory; import org.openrdf.model.URI; import org.openrdf.model.Value; import org.openrdf.model.ValueFactory; import org.openrdf.model.impl.ValueFactoryImpl; import org.openrdf.model.vocabulary.OWL; /** * Support ORCID Author information as RDF * <p/> * Author: Jose Ortiz */ public class ORCIDRawProvider extends AbstractHttpProvider { private static Logger log = LoggerFactory.getLogger(ORCIDRawProvider.class); private ConcurrentHashMap<String, Integer> stats = new ConcurrentHashMap<String, Integer>(); public static final String NAME = "ORCID Raw Provider"; public static final String PATTERN = "https://orcid\\.org/search/.*"; public static final String SEARCHAPI = "https://pub.orcid.org/v2.1/search/?q=%s"; public static final String DESCRIBEAPI = "https://pub.orcid.org/v2.0/%s/record"; public static final String ORCIDPREFIX = "https://orcid.org/ontology/"; /** * Return the name of this data provider. To be used e.g. in the * configuration and in log messages. * * @return */ @Override public String getName() { return NAME; } /** * Return the list of mime types accepted by this data provider. * * @return */ @Override public String[] listMimeTypes() { return new String[] { "text/xml" }; } /** * Build the URL to use to call the webservice in order to retrieve the data * for the resource passed as argument. In many cases, this will just return * the URI of the resource (e.g. Linked Data), but there might be data * providers that use different means for accessing the data for a resource, * e.g. SPARQL or a Cache. * * * @param resource * @param endpoint endpoint configuration for the data provider (optional) * @return */ @Override public List<String> buildRequestUrl(String resource, Endpoint endpoint) { String url = null; Preconditions.checkState(StringUtils.isNotBlank(resource)); String id = URLDecoder.decode(resource.substring(resource.lastIndexOf('/') + 1)); url = String.format(SEARCHAPI, URLEncoder.encode(createSolrQuery(id))); stats.put(resource, 0); return Collections.singletonList(url); } @Override public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input, String contentType) throws DataRetrievalException { log.debug("Request Successful to {0}", requestUrl); if (requestUrl.startsWith("https://pub.orcid.org/v2.1/search")) { return parseSearchAuthors(input, resource, triples); } else if (requestUrl.startsWith("https://pub.orcid.org/v2.0/") && requestUrl.endsWith("/record")) { return parseAuthor(input, requestUrl, triples); } else if (requestUrl.startsWith("https://pub.orcid.org/v2.1/") && requestUrl.contains("/work/")) { return parseDocs(input, requestUrl, triples); } return Collections.emptyList(); } private List<String> parseDocs(InputStream input, String requestUrl, Model triples) throws DataRetrievalException { try { List<String> lsURLs = new ArrayList<>(); Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); mapProp(true, requestUrl, ORCIDPREFIX + "title", triples, doc, "/work:work/work:title/common:title"); mapProp(true, requestUrl, ORCIDPREFIX + "url", triples, doc, "/work:work/work:url"); mapPropRe(requestUrl, ORCIDPREFIX + "creator", ORCIDPREFIX + "contributors", ORCIDPREFIX + "name", triples, doc, "/work:work/work:contributors/work:contributor/work:credit-name"); mapProp(true, requestUrl, ORCIDPREFIX + "date", triples, doc, "/work:work/common:publication-date/common:year"); mapProp(true, requestUrl, ORCIDPREFIX + "type", triples, doc, "/work:work/work:type"); mapProp(true, requestUrl, ORCIDPREFIX + "collection", triples, doc, "/work:work/work:journal-title"); mapProp(true, requestUrl, ORCIDPREFIX + "doi", triples, doc, "/work:work/common:external-ids/common:external-id[common:external-id-type='doi']/common:external-id-value"); mapProp(true, requestUrl, ORCIDPREFIX + "issn", triples, doc, "/work:work/common:external-ids/common:external-id[common:external-id-type='issn']/common:external-id-value"); mapProp(true, requestUrl, ORCIDPREFIX + "isbn", triples, doc, "/work:work/common:external-ids/common:external-id[common:external-id-type='isbn']/common:external-id-value"); return lsURLs; } catch (JDOMException e) { throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); } catch (IOException e) { throw new DataRetrievalException("I/O error while parsing HTML response", e); } } private List<String> parseAuthor(InputStream input, String requestUrl, Model triples) throws DataRetrievalException { try { ValueFactory factory = ValueFactoryImpl.getInstance(); List<String> lsURLs = new ArrayList<>(); Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); mapProp(true, requestUrl, ORCIDPREFIX + "bio", triples, doc, "/record:record/person:person/person:biography/personal-details:content"); mapProp(true, requestUrl, ORCIDPREFIX + "familyName", triples, doc, "/record:record/person:person/person:name/personal-details:family-name"); mapProp(true, requestUrl, ORCIDPREFIX + "givenName", triples, doc, "/record:record/person:person/person:name/personal-details:given-names"); mapProp(true, requestUrl, ORCIDPREFIX + "name", triples, doc, "/record:record/person:person/person:name/personal-details:credit-name"); mapProp(true, requestUrl, ORCIDPREFIX + "name", triples, doc, "/record:record/person:person/other-name:other-names/other-name:other-name/other-name:content"); //MapLiteral(requestUrl, ORCIDPREFIX+"name", triples, doc, "/record:record/person:person/other-name:other-names/other-name:other-name/common:source/common:source-name"); mapProp(true, requestUrl, ORCIDPREFIX + "mail", triples, doc, "/record:record/person:person/email:emails/email:email/email:email"); mapProp(true, requestUrl, ORCIDPREFIX + "account", triples, doc, "/record:record/person:person/external-identifier:external-identifiers/external-identifier:external-identifier/common:external-id-url"); mapProp(true, requestUrl, ORCIDPREFIX + "topics", triples, doc, "/record:record/person:person/keyword:keywords/keyword:keyword/keyword:content"); mapProp(true, requestUrl, ORCIDPREFIX + "account", triples, doc, "/record:record/person:person/researcher-url:researcher-urls/researcher-url:researcher-url/researcher-url:url"); mapProp(true, requestUrl, ORCIDPREFIX + "aff", triples, doc, "/record:record/activities:activities-summary/activities:educations/education:education-summary/education:organization/common:name"); mapProp(true, requestUrl, ORCIDPREFIX + "aff", triples, doc, "/record:record/activities:activities-summary/activities:employments/employment:employment-summary/employment:organization/common:name"); List<String> att = getAtt(doc, "/record:record/activities:activities-summary/activities:works/activities:group/work:work-summary", "put-code"); for (String wrks : att) { String replaceAll = requestUrl.replaceAll("/record", "/work/%s").replaceAll("/v2.0", "/v2.1"); String urlWoks = String.format(replaceAll, wrks); lsURLs.add(urlWoks); triples.add(factory.createStatement(factory.createURI(requestUrl), factory.createURI(ORCIDPREFIX + "publications"), factory.createURI(urlWoks))); } return lsURLs; } catch (JDOMException e) { throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); } catch (IOException e) { throw new DataRetrievalException("I/O error while parsing HTML response", e); } } private List<String> parseSearchAuthors(InputStream input, String resource, Model triples) throws DataRetrievalException { try { List<String> lsURLs = new ArrayList<>(); ValueFactory factory = ValueFactoryImpl.getInstance(); Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); for (Element element : queryElements(doc, "/search:search/search:result/common:orcid-identifier/common:path")) { String urlNewRs = String.format(DESCRIBEAPI, element.getText()); triples.add(factory.createStatement(factory.createURI(urlNewRs), OWL.ONEOF, factory.createURI(resource))); triples.add(factory.createStatement(factory.createURI(urlNewRs), factory.createURI(ORCIDPREFIX + "orcid"), factory.createLiteral(element.getText()))); lsURLs.add(urlNewRs); } return lsURLs; } catch (JDOMException e) { throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); } catch (IOException e) { throw new DataRetrievalException("I/O error while parsing HTML response", e); } } private String createSolrQuery(String id) { String[] split = id.split("_"); String queryFN = ""; String queryLN = ""; for (int i = 0; i < split.length; i++) { String query = ""; List<String> ls = Lists.newArrayList(split[i].split("-")); String queryP = ""; for (int j = 0; j < ls.size(); j++) { String n = ls.get(j).trim(); String tk = n; queryP += tk.toLowerCase() + (j == ls.size() - 1 ? "" : " "); } query += queryP; if (i == 0) { queryFN = query; } else { queryLN = query; } } return "family-name:" + queryLN + " AND given-names:" + queryFN + " AND (text:ec OR text:ecuador OR text:ecuadorian OR text:ecuatoriano OR text:ecuatoriana)"; } protected static List<Element> queryElements(Document n, String query) { return XPathFactory.instance() .compile(query, new ElementFilter(), null, Namespace.getNamespace("external-identifier", "http://www.orcid.org/ns/external-identifier"), Namespace.getNamespace("email", "http://www.orcid.org/ns/email"), Namespace.getNamespace("other-name", "http://www.orcid.org/ns/other-name"), Namespace.getNamespace("personal-details", "http://www.orcid.org/ns/personal-details"), Namespace.getNamespace("person", "http://www.orcid.org/ns/person"), Namespace.getNamespace("record", "http://www.orcid.org/ns/record"), Namespace.getNamespace("search", "http://www.orcid.org/ns/search"), Namespace.getNamespace("keyword", "http://www.orcid.org/ns/keyword"), Namespace.getNamespace("employment", "http://www.orcid.org/ns/employment"), Namespace.getNamespace("researcher-url", "http://www.orcid.org/ns/researcher-url"), Namespace.getNamespace("activities", "http://www.orcid.org/ns/activities"), Namespace.getNamespace("education", "http://www.orcid.org/ns/education"), Namespace.getNamespace("work", "http://www.orcid.org/ns/work"), Namespace.getNamespace("common", "http://www.orcid.org/ns/common")) .evaluate(n); } private void mapProp(boolean literal, String rs, String prop, Model tr, Document dc, String qr) { ValueFactory factory = ValueFactoryImpl.getInstance(); URI createURI = factory.createURI(rs); URI createURI1 = factory.createURI(prop); List<Element> queryElements = queryElements(dc, qr); for (Element e : queryElements) { Value createObj = null; if (literal) { createObj = factory.createLiteral(e.getText()); } else { createObj = factory.createURI(e.getText()); } tr.add(createURI, createURI1, createObj); } } private List<String> getAtt(Document dc, String qr, String fl) { List<String> ls = new ArrayList<>(); List<Element> queryElements = queryElements(dc, qr); for (Element e : queryElements) { ls.add(e.getAttribute(fl).getValue()); } return ls; } private void mapPropRe(String rs, String prop0, String prop, String prop2, Model tr, Document dc, String qr) { ValueFactory factory = ValueFactoryImpl.getInstance(); URI createURI = factory.createURI(rs); URI createURI1 = factory.createURI(prop); URI createURI2 = factory.createURI(prop2); URI createURI3 = factory.createURI(prop0); List<Element> queryElements = queryElements(dc, qr); int i = 0; for (Element e : queryElements) { URI createObj = factory.createURI(rs + "_" + i); tr.add(createURI, createURI1, createObj); if (i == 0) { tr.add(createURI, createURI3, createObj); } tr.add(createObj, createURI2, factory.createLiteral(e.getText())); i++; } } }