Java tutorial
/* * S4 Java client library * Copyright 2016 Ontotext AD * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ontotext.s4.service; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.util.Properties; import org.apache.commons.io.IOUtils; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.ontotext.s4.catalog.ServiceDescriptor; import com.ontotext.s4.catalog.ServicesCatalog; import com.ontotext.s4.client.HttpClient; import com.ontotext.s4.client.HttpClientException; import com.ontotext.s4.common.Parameters; /** * Main entry point for the S4 text analytics APIs to send individual documents for annotation * by a particular pipeline and receive the results immediately. */ public class S4ServiceClient { private HttpClient client; /** * Constructs a <code>S4ServiceClient</code> for accessing a specific processing * pipeline on the s4.ontotext.com platform using the given credentials. * * @param item the {@link ServiceDescriptor} which represents the processing pipeline which will be used * @param apiKeyId API key ID for authentication * @param apiPassword corresponding password */ public S4ServiceClient(ServiceDescriptor item, String apiKeyId, String apiPassword) { URL endpoint; try { endpoint = new URL(item.getServiceUrl()); } catch (MalformedURLException murle) { throw new IllegalArgumentException("Invalid ServiceDescriptor specified. No API endpoint found.", murle); } this.client = new HttpClient(endpoint, apiKeyId, apiPassword); } /** * Constructs a <code>S4ServiceClient</code> for accessing a specific processing * pipeline on the s4.ontotext.com platform using the given credentials. * * @param endpoint the URL of the pipeline which will be used for processing * @param apiKeyId apiKeyId API key ID for authentication * @param apiPassword corresponding password */ public S4ServiceClient(URL endpoint, String apiKeyId, String apiPassword) { this.client = new HttpClient(endpoint, apiKeyId, apiPassword); } /** * Annotates a single document with the specified MIME type. Returns an object which allows * for convenient access to the annotations in the annotated document. * * @param documentText the document content to annotate * @param documentMimeType the MIME type of the document which will be annotated * @return an {@link AnnotatedDocument} containing the original content as well as the annotations produced * @throws S4ServiceClientException */ public AnnotatedDocument annotateDocument(String documentText, SupportedMimeType documentMimeType) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentText, documentMimeType); return processRequest(rq, true); } /** * Classifies a single document with the specified MIME type. Returns an object which allows * for convenient access to the classification information of the document. * * @param documentText the document content to classify * @param documentMimeType the MIME type of the document which will be classified * @return an {@link ClassifiedDocument} containing the original content as well as the classifications produced * @throws S4ServiceClientException */ public ClassifiedDocument classifyDocument(String documentText, SupportedMimeType documentMimeType) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentText, documentMimeType); return classifyRequest(rq, false); } /** * Annotates the contents of a single file with the specified MIME type. Returns an object which allows * for convenient access to the annotations in the annotated document. * * @param documentContent the file whose contents will be annotated * @param documentEncoding the encoding of the document file * @param documentMimeType the MIME type of the document to annotated content as well as the annotations produced * @throws IOException * @throws S4ServiceClientException */ public AnnotatedDocument annotateFileContents(File documentContent, Charset documentEncoding, SupportedMimeType documentMimeType) throws IOException, S4ServiceClientException { Path documentPath = documentContent.toPath(); if (!Files.isReadable(documentPath)) { throw new IOException("File " + documentPath.toString() + " is not readable."); } ByteBuffer buff; buff = ByteBuffer.wrap(Files.readAllBytes(documentPath)); String content = documentEncoding.decode(buff).toString(); return annotateDocument(content, documentMimeType); } /** * Classifies the contents of a single file with the specified MIME type. Returns an object which allows * for convenient access to the classification information for the document. * * @param documentContent the file whose contents will be classified * @param documentEncoding the encoding of the document file * @param documentMimeType the MIME type of the document to classified content as well as the classifications produced * @throws IOException * @throws S4ServiceClientException */ public ClassifiedDocument classifyFileContents(File documentContent, Charset documentEncoding, SupportedMimeType documentMimeType) throws IOException, S4ServiceClientException { Path documentPath = documentContent.toPath(); if (!Files.isReadable(documentPath)) { throw new IOException("File " + documentPath.toString() + " is not readable."); } ByteBuffer buff; buff = ByteBuffer.wrap(Files.readAllBytes(documentPath)); String content = documentEncoding.decode(buff).toString(); return classifyDocument(content, documentMimeType); } /** * Annotates a single document publicly available under a given URL. Returns an object which allows * for convenient access to the annotations in the annotated document * * @param documentUrl the publicly accessible URL from where the document will be downloaded * @param documentMimeType the MIME type of the document which will be annotated * @return an {@link AnnotatedDocument} which allows for convenient programmatic access to the annotated document * @throws S4ServiceClientException */ public AnnotatedDocument annotateDocumentFromUrl(URL documentUrl, SupportedMimeType documentMimeType) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentUrl, documentMimeType); return processRequest(rq, true); } /** * Classifies a single document publicly available under a given URL. Returns an object which allows * for convenient access to the classifications in the classified document * * @param documentUrl the publicly accessible URL from where the document will be downloaded * @param documentMimeType the MIME type of the document which will be classified * @return an {@link ClassifiedDocument} which allows for convenient programmatic access to the classified document * @throws S4ServiceClientException */ public ClassifiedDocument classifyDocumentFromUrl(URL documentUrl, SupportedMimeType documentMimeType) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentUrl, documentMimeType); return classifyRequest(rq, true); } /** * Annotates a single document and returns an {@link InputStream} from * which the contents of the serialized annotated document can be read * * @param documentText the contents of the document which will be annotated * @param documentMimeType the MIME type of the file which will be annotated * @param serializationFormat the format which will be used for serialization of the annotated document * @return an {@link InputStream} from which the serialization of the annotated document can be read * @throws S4ServiceClientException */ public InputStream annotateDocumentAsStream(String documentText, SupportedMimeType documentMimeType, ResponseFormat serializationFormat) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentText, documentMimeType); try { return client.requestForStream("", "POST", rq, "Accept", serializationFormat.acceptHeader); } catch (HttpClientException e) { JsonNode response = e.getResponse(); if (response == null) { throw new S4ServiceClientException(e.getMessage(), e); } JsonNode msg = response.get("message"); throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(), e); } } /** * Classifies a single document and returns an {@link InputStream} from * which the contents of the serialized annotated document can be read * * @param documentText the contents of the document which will be classified * @param documentMimeType the MIME type of the file which will be classified * @return an {@link InputStream} from which the serialization of the classified document can be read * @throws S4ServiceClientException */ public InputStream classifyDocumentAsStream(String documentText, SupportedMimeType documentMimeType) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentText, documentMimeType); try { return client.requestForStream("", "POST", rq, "Accept", ResponseFormat.JSON.acceptHeader); } catch (HttpClientException e) { JsonNode response = e.getResponse(); if (response == null) { throw new S4ServiceClientException(e.getMessage(), e); } JsonNode msg = response.get("message"); throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(), e); } } /** * Annotates the contents of a single file returning an * {@link InputStream} from which the annotated content can be read * * @param documentContent the file which will be annotated * @param documentEncoding the encoding of the file which will be annotated * @param documentMimeType the MIME type of the file which will be annotated * @param serializationFormat the serialization format used for the annotated content * * @throws IOException if there are problems reading the contents of the file * @throws S4ServiceClientException */ public InputStream annotateFileContentsAsStream(File documentContent, Charset documentEncoding, SupportedMimeType documentMimeType, ResponseFormat serializationFormat) throws IOException, S4ServiceClientException { Path documentPath = documentContent.toPath(); if (!Files.isReadable(documentPath)) { throw new IOException("File " + documentPath.toString() + " is not readable."); } ByteBuffer buff; buff = ByteBuffer.wrap(Files.readAllBytes(documentPath)); String content = documentEncoding.decode(buff).toString(); return annotateDocumentAsStream(content, documentMimeType, serializationFormat); } /** * Classifies the contents of a single file returning an * {@link InputStream} from which the classification information can be read * * @param documentContent the file which will be classified * @param documentEncoding the encoding of the file which will be classified * @param documentMimeType the MIME type of the file which will be classified * * @return Service response raw content * * @throws IOException if there are problems reading the contents of the file * @throws S4ServiceClientException */ public InputStream classifyFileContentsAsStream(File documentContent, Charset documentEncoding, SupportedMimeType documentMimeType) throws IOException, S4ServiceClientException { Path documentPath = documentContent.toPath(); if (!Files.isReadable(documentPath)) { throw new IOException("File " + documentPath.toString() + " is not readable."); } ByteBuffer buff; buff = ByteBuffer.wrap(Files.readAllBytes(documentPath)); String content = documentEncoding.decode(buff).toString(); return classifyDocumentAsStream(content, documentMimeType); } /** * Annotates a single document publicly available under a given URL. * Returns the annotated document serialized into the specified format * * @param documentUrl the publicly accessible URL from where the document will be downloaded * @param documentMimeType the MIME type of the document which will be annotated * @param serializationFormat the serialization format of the output * @return an {@link InputStream} from where the serialized output can be read * @throws S4ServiceClientException */ public InputStream annotateDocumentFromUrlAsStream(URL documentUrl, SupportedMimeType documentMimeType, ResponseFormat serializationFormat) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentUrl, documentMimeType); try { return client.requestForStream("", "POST", rq, "Accept", serializationFormat.acceptHeader); } catch (HttpClientException e) { JsonNode response = e.getResponse(); if (response == null) { throw new S4ServiceClientException(e.getMessage(), e); } JsonNode msg = response.get("message"); throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(), e); } } /** * Classifies a single document publicly available under a given URL. * Returns the classified document serialized into the specified format * * @param documentUrl the publicly accessible URL from where the document will be downloaded * @param documentMimeType the MIME type of the document which will be classified * @return an {@link InputStream} from where the serialized output can be read * @throws S4ServiceClientException */ public InputStream classifyDocumentFromUrlAsStream(URL documentUrl, SupportedMimeType documentMimeType) throws S4ServiceClientException { ServiceRequest rq = new ServiceRequest(documentUrl, documentMimeType); try { return client.requestForStream("", "POST", rq, "Accept", ResponseFormat.JSON.acceptHeader); } catch (HttpClientException e) { JsonNode response = e.getResponse(); if (response == null) { throw new S4ServiceClientException(e.getMessage(), e); } JsonNode msg = response.get("message"); throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(), e); } } /** * This low level method allows the user to explicitly specify all the parameters sent to the service. * This is done by constructing the appropriate ServiceRequest object. * Returns the contents of the annotated document * * @param rq the request which will be sent * @param serializationFormat the format in which to output the annotated document * @param requestCompression whether to allow GZIP compression for large documents * @return an{@link InputStream} for the serialization of the annotated document in the specified format * @throws S4ServiceClientException */ public InputStream processRequestForStream(ServiceRequest rq, ResponseFormat serializationFormat, boolean requestCompression) throws S4ServiceClientException { try { if (requestCompression) { return client.requestForStream("", "POST", rq, "Accept", serializationFormat.acceptHeader, "Accept-Encoding", "gzip"); } else { return client.requestForStream("", "POST", rq, "Accept", serializationFormat.acceptHeader); } } catch (HttpClientException e) { JsonNode response = e.getResponse(); if (response == null) { throw new S4ServiceClientException(e.getMessage(), e); } JsonNode msg = response.get("message"); throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(), e); } } /** * This low level method allows the user to specify every parameter explicitly by setting the properties * of the OnlineService request object. Returns an object which wraps the annotated document. * * @param rq the request which will be sent to the service * @param requestCompression whether to allow GZIP compression for large documents * @return an {@link AnnotatedDocument} containing the original content as well as the annotations produced * @throws S4ServiceClientException */ public AnnotatedDocument processRequest(ServiceRequest rq, boolean requestCompression) throws S4ServiceClientException { try { if (requestCompression) { return client.request("", "POST", new TypeReference<AnnotatedDocument>() { }, rq, "Accept", ResponseFormat.GATE_JSON.acceptHeader, "Accept-Encoding", "gzip"); } else { return client.request("", "POST", new TypeReference<AnnotatedDocument>() { }, rq, "Accept", ResponseFormat.GATE_JSON.acceptHeader); } } catch (HttpClientException e) { JsonNode response = e.getResponse(); if (response == null) { throw new S4ServiceClientException(e.getMessage(), e); } JsonNode msg = response.get("message"); throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(), e); } } /** * This low level method allows the user to specify every parameter explicitly by setting the properties * of the OnlineService request object. Returns an object which wraps the classified document. * * @param rq the request which will be sent * @param requestCompression whether to allow GZIP compression for large documents * @return a {@link ClassifiedDocument} containing the original content as well as the annotations produced * @throws S4ServiceClientException */ public ClassifiedDocument classifyRequest(ServiceRequest rq, boolean requestCompression) throws S4ServiceClientException { try { if (requestCompression) { return client.request("", "POST", new TypeReference<ClassifiedDocument>() { }, rq, "Accept", ResponseFormat.JSON.acceptHeader, "Accept-Encoding", "gzip"); } else { return client.request("", "POST", new TypeReference<ClassifiedDocument>() { }, rq, "Accept", ResponseFormat.JSON.acceptHeader); } } catch (HttpClientException e) { JsonNode response = e.getResponse(); if (response == null) { throw new S4ServiceClientException(e.getMessage(), e); } JsonNode msg = response.get("message"); throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(), e); } } public static void main(String... args) { if (args == null || args.length == 0) { printUsageAndTerminate(null); } Parameters params = new Parameters(args); String serviceID = params.getValue("service"); if (serviceID == null) { printUsageAndTerminate("No service name provided"); } ServiceDescriptor service = null; try { service = ServicesCatalog.getItem(serviceID); } catch (UnsupportedOperationException uoe) { printUsageAndTerminate("Unsupported service '" + serviceID + '\''); } SupportedMimeType mimetype = SupportedMimeType.PLAINTEXT; if (params.getValue("dtype") != null) { try { mimetype = SupportedMimeType.valueOf(params.getValue("dtype")); } catch (IllegalArgumentException iae) { printUsageAndTerminate("Unsupported document type (dtype) : " + params.getValue("dtype")); } } String inFile = params.getValue("file"); String url = params.getValue("url"); String outFile = params.getValue("out", "result.json"); if (inFile != null) { if (!new File(inFile).exists()) { printUsageAndTerminate("Input file is not found : " + inFile); } } else { if (url == null) { printUsageAndTerminate("Neither input file, nor remote URL provided"); } } Properties creds = readCredentials(params); if (!creds.containsKey("apikey") || !creds.containsKey("secret")) { printUsageAndTerminate("No credentials details found"); } S4ServiceClient client = new S4ServiceClient(service, creds.getProperty("apikey"), creds.getProperty("secret")); try { InputStream resultData = null; if (service.getName().equals("news-classifier")) { resultData = (inFile != null) ? client.classifyFileContentsAsStream(new File(inFile), Charset.forName("UTF-8"), mimetype) : client.classifyDocumentFromUrlAsStream(new URL(url), mimetype); } else { resultData = (inFile != null) ? client.annotateFileContentsAsStream(new File(inFile), Charset.forName("UTF-8"), mimetype, ResponseFormat.JSON) : client.annotateDocumentFromUrlAsStream(new URL(url), mimetype, ResponseFormat.JSON); } FileOutputStream outStream = new FileOutputStream(outFile); IOUtils.copy(resultData, outStream); outStream.close(); resultData.close(); } catch (IOException ioe) { System.out.println(ioe.getMessage()); System.exit(1); } } private static void printUsageAndTerminate(String error) { if (error != null) { System.out.println(error); } System.out.println("Usage: S4ClientService parameter1=value1 parameter2=value2 ..."); System.out.println("Parameters:"); System.out.println( " service - the service id to be used (one of: 'TwitIE', 'SBT', 'news' and 'news-classifier')"); System.out.println(" file - input file path"); System.out.println(" url - input document URL"); System.out.println( " dtype - the type of the document (one of:'PLAINTEXT', 'HTML', 'XML_APPLICATION', 'XML_TEXT', 'PUBMED', 'COCHRANE', 'MEDIAWIKI', 'TWITTER_JSON')"); System.out.println(" out - result file name. Defaults to 'result.json'"); System.out.println(" apikey - the api key if credentials file is not used"); System.out.println(" secret - the api secret if credentials file is not used"); System.out.println(" creds - credentails file path (if apikey and secret parameters are not used)"); System.exit(1); } private static Properties readCredentials(Parameters params) { Properties props = new Properties(); if (params.getValue("apikey") != null) { if (params.getValue("secret") == null) { printUsageAndTerminate("API key secret not provided"); } props.setProperty("apikey", params.getValue("apikey")); props.setProperty("secret", params.getValue("secret")); return props; } String credsFile = "s4credentials.properties"; if (params.getValue("creds") != null) { credsFile = params.getValue("creds"); } if (new File(credsFile).exists()) { try { props.load(new FileInputStream(credsFile)); } catch (IOException ex) { printUsageAndTerminate("Error reading credentials file: " + ex.getMessage()); } } else { InputStream inStr = Thread.currentThread().getContextClassLoader().getResourceAsStream(credsFile); if (inStr != null) { try { props.load(inStr); } catch (IOException ioe) { printUsageAndTerminate("Error reading credentials file: " + ioe.getMessage()); } } } return props; } }