Java tutorial
package org.swissbib.docproc.flink.plugins; import com.mongodb.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.net.*; import java.text.MessageFormat; import java.text.Normalizer; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * [...description of the type ...] * <p/> * <p/> * <p/> * Copyright (C) project swissbib, University Library Basel, Switzerland * http://www.swissbib.org / http://www.swissbib.ch / http://www.ub.unibas.ch * <p/> * Date: 12/19/12 * Time: 7:18 AM * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, * as published by the Free Software Foundation. * <p/> * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * <p/> * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * <p/> * license: http://opensource.org/licenses/gpl-2.0.php GNU General Public License * * @author Guenter Hipler <guenter.hipler@unibas.ch> * @link http://www.swissbib.org * @link https://github.com/swissbib/xml2SearchDoc */ public class GNDContentEnrichment implements IDocProcPlugin { private static Logger gndProcessing; private static Logger macsProcessing; private static Logger gndProcessingError; private static Proxy proxy; private static Pattern idPattern; private static String urlToSource; //private static ArrayList<GNDTagValues> tagsToUse; private static ArrayList<String> simpleTagsToUse; private static ArrayList<String> simpleTagsToUseForMACS; private static String idPatternForReplacement; //it is possible to initiaze with default values private static boolean initialized; //an error occured while trying to initialize -> processing shouldn't take place private static boolean errorInitializing; private static boolean gndLogOpen; private static DB nativeSource = null; private static MongoClient mClient = null; private static DBCollection searchCollection = null; private static String searchField = ""; private static String responseField = ""; private static String responseFieldMACS = null; private static RemoveDuplicates duplicateDetection; private static boolean inProductionMode = false; //private static BufferedWriter logFile = null; static { GNDContentEnrichment.gndProcessing = LoggerFactory.getLogger("gndProcessing"); GNDContentEnrichment.gndProcessingError = LoggerFactory.getLogger("gndProcessingError"); GNDContentEnrichment.macsProcessing = LoggerFactory.getLogger("macsProcessing"); //tagsToUse = new ArrayList<GNDTagValues>(); simpleTagsToUse = new ArrayList<String>(); simpleTagsToUseForMACS = new ArrayList<String>(); initialized = false; errorInitializing = false; gndLogOpen = false; } //http://www.oxygenxml.com/doc/ug-editor/tasks/generate-certificate.html @Override public void initPlugin(HashMap<String, String> configuration) { //in any case if the method is called the plugin will be marked as initialized //an error during initialization might occur - but this is another case String className = this.getClass().getName(); if (configuration.containsKey("PLUGINS.IN.PRODUCTIONMODE") && configuration.get("PLUGINS.IN.PRODUCTIONMODE").contains(className)) inProductionMode = true; else return; initialized = true; try { //urlToSource = configuration.get("SOURCE.TO.FETCH.GND"); idPatternForReplacement = configuration.get("ID.PATTERN.TO.REPLACE"); initializeMongoConnection(configuration); //initializeProxy(configuration); //initializeIDPattern(configuration); initializeTagsToUse(configuration); duplicateDetection = new RemoveDuplicates(); duplicateDetection.initPlugin(configuration); } catch (Exception ex) { errorInitializing = true; //initialized = false; } } @Override public void finalizePlugIn() { if (mClient != null) { mClient.close(); } } public String getReferencesConcatenated(String gndID) { String toReturn = ""; if (!inProductionMode) return toReturn; if (!initialized) { initDefaultValues(); writeLog("late initialized"); } if (!errorInitializing) { StringBuilder concatReferences = new StringBuilder(); BasicDBObject query = null; DBCursor cursor = null; try { query = new BasicDBObject(searchField, gndID); cursor = searchCollection.find(query); boolean append = false; while (cursor.hasNext()) { DBObject dbObject = cursor.next(); BasicDBObject gndFields = (BasicDBObject) dbObject.get(responseField); Set<Map.Entry<String, Object>> keyValues = gndFields.entrySet(); Iterator<Map.Entry<String, Object>> it = keyValues.iterator(); while (it.hasNext()) { Map.Entry<String, Object> entry = it.next(); String key = entry.getKey(); if (simpleTagsToUse.contains(key)) { BasicDBList dbList = (BasicDBList) entry.getValue(); Iterator<Object> gndValues = dbList.iterator(); while (gndValues.hasNext()) { append = true; String value = (String) gndValues.next(); String composedValue = Normalizer.normalize(value, Normalizer.Form.NFC); //System.out.println(composedValue); concatReferences.append(composedValue).append("##xx##"); } } } if (responseFieldMACS != null) { BasicDBObject macsField = (BasicDBObject) dbObject.get(responseFieldMACS); Set<Map.Entry<String, Object>> keyValuesMacs = macsField.entrySet(); Iterator<Map.Entry<String, Object>> itMacs = keyValuesMacs.iterator(); while (itMacs.hasNext()) { Map.Entry<String, Object> entry = itMacs.next(); String key = entry.getKey(); if (simpleTagsToUseForMACS.contains(key)) { BasicDBList dbList = (BasicDBList) entry.getValue(); Iterator<Object> macsValues = dbList.iterator(); StringBuilder macsReferences = new StringBuilder(); boolean appendMACS = false; while (macsValues.hasNext()) { append = true; appendMACS = true; String value = (String) macsValues.next(); String composedValue = Normalizer.normalize(value, Normalizer.Form.NFC); //System.out.println(composedValue); //only for logging macsReferences.append(composedValue).append("##xx##"); concatReferences.append(composedValue).append("##xx##"); } if (appendMACS) { String macsValuesForLogging = macsReferences.toString(); macsValuesForLogging = macsValuesForLogging.substring(0, macsValuesForLogging.length() - 6); macsValuesForLogging = duplicateDetection .removeDuplicatesFromMultiValuedField(macsValuesForLogging); macsProcessing.info("additional MACS values for GND " + gndID + " : " + macsValuesForLogging); } } } } } toReturn = concatReferences.toString(); if (append) { toReturn = toReturn.substring(0, toReturn.length() - 6); } } catch (Exception excep) { excep.printStackTrace(); } finally { if (cursor != null) { cursor.close(); } } //to suppress duplicates makes sense because we collect values from GND and MACS and merge them together which might produce duplicates toReturn = duplicateDetection.removeDuplicatesFromMultiValuedField(toReturn); gndProcessing.debug("getReferencesConcatenated: gndID: " + gndID + " / references: " + toReturn); } return toReturn; //old way using the SRU interface of DNB for each request // StringBuilder concatReferences = new StringBuilder(); // // //1. get the id we can use to fetch GND record // String tgndid = gndID; // writeLog("tgnid: " + tgndid); // Matcher matcher = idPattern.matcher(tgndid); // boolean append = false; // if (matcher.find()) { // // try { // // tgndid = matcher.group(2); // writeLog("matched number: " + tgndid); // String url = MessageFormat.format(urlToSource,tgndid); // // HttpURLConnection connection = getHTTPConnection (url); // InputStream is = (InputStream) connection.getContent(); // MarcXmlReader marcReader = new MarcXmlReader(is); // // while (marcReader.hasNext()) { // Record record = marcReader.next(); // // Iterator iter = record.getDataFields().iterator(); // // while (iter.hasNext()) { // DataField df = (DataField)iter.next(); // // for (GNDTagValues tags: tagsToUse) { // if(df.getTag().equalsIgnoreCase(tags.tagValue)) { // append = true; // concatReferences.append(df.getSubfield('a').getData()).append("##xx##"); // } // } // // } // // } // // // if (null != is) { // is.close(); // } // // }catch (IOException ioEx) { // ioEx.printStackTrace(); // gndProcessingError.error("getReferencesConcatenated","IOException ",ioEx); // //ioEx.printStackTrace(); // // } catch (Exception exc) { // exc.printStackTrace(); // gndProcessingError.error("getReferencesConcatenated","Exception ",exc); // } catch (Throwable thr) { // thr.printStackTrace(); // gndProcessingError.error("getReferencesConcatenated","Throwable ",thr); // } // // // // } // // // String toReturn = concatReferences.toString(); // if (append) { // toReturn = toReturn.substring(0,toReturn.length()-6); // gndProcessing.info("getReferencesConcatenated", "gndID: " + gndID + " / references: " + toReturn); // } // // writeLog("toReturn: " + toReturn); // return toReturn; } public String getReferencesAsXML(String gndID) { String referencesAsXML = ""; if (!initialized) { initDefaultValues(); } //1. get the id we can use to fetch GND record String tgndid = gndID; Matcher matcher = idPattern.matcher(tgndid); boolean append = false; if (matcher.find()) { try { tgndid = matcher.group(2); //2. build the url to fetch gnd record String url = MessageFormat.format(urlToSource, tgndid); HttpURLConnection connection = getHTTPConnection(url); InputStream is = (InputStream) connection.getContent(); referencesAsXML = new Scanner(is).useDelimiter("\\Z").next(); //3. finally close the connection if (is != null) is.close(); } catch (IOException ioEx) { gndProcessingError.error("getReferencesAsXML", "IOException ", ioEx); //ioEx.printStackTrace(); } catch (Exception exc) { gndProcessingError.error("getReferencesAsXML", "Exception ", exc); } catch (Throwable thr) { gndProcessingError.error("getReferencesAsXML", "Throwable ", thr); } } return referencesAsXML; } private void initializeProxy(HashMap<String, String> configuration) { String proxyProp = configuration.get("PROXYSERVER"); if (proxyProp != null && proxyProp.length() > 0) { try { String proxyServer = null; Integer proxyPort = 0; if (proxyProp.contains(":")) { String[] serverParts = proxyProp.split(":"); proxyServer = serverParts[0]; proxyPort = Integer.valueOf(serverParts[1]); proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyServer, proxyPort)); } else { proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyServer, 80)); } gndProcessing.info("proxy - object: " + proxyProp + " initialized"); } catch (Throwable th) { gndProcessingError.error("\n => Error while creating Proxy"); gndProcessingError.error(th.getMessage()); proxy = null; } } } private void initializeIDPattern(HashMap<String, String> configuration) { if (configuration.containsKey("PATTERN.FOR.ID")) { idPattern = Pattern.compile(configuration.get("PATTERN.FOR.ID")); } } private void initializeTagsToUse(HashMap<String, String> configuration) { String sTagsToUse = configuration.get("TAGS.TO.USE"); String sTagsToUseForMACS = configuration.get("TAGS.TO.USE.FOR.MACS"); if (sTagsToUse != null && sTagsToUse.length() > 0) { String[] aTagsToUse = sTagsToUse.split("###"); for (String tag : aTagsToUse) { //I don't need this for the Mongo solution but won't throw it away so far //String[] dataFieldSubField = tag.split("_"); //if (null != dataFieldSubField && dataFieldSubField.length == 2) { // GNDTagValues tags = new GNDTagValues(dataFieldSubField[0], dataFieldSubField[1]); // tagsToUse.add(tags); //} simpleTagsToUse.add(tag); } } if (sTagsToUseForMACS != null && sTagsToUseForMACS.length() > 0) { String[] aTagsToUse = sTagsToUseForMACS.split("###"); for (String tag : aTagsToUse) { //I don't need this for the Mongo solution but won't throw it away so far //String[] dataFieldSubField = tag.split("_"); //if (null != dataFieldSubField && dataFieldSubField.length == 2) { // GNDTagValues tags = new GNDTagValues(dataFieldSubField[0], dataFieldSubField[1]); // tagsToUse.add(tags); //} simpleTagsToUseForMACS.add(tag); } } } private void initializeMongoConnection(HashMap<String, String> configuration) throws Exception { try { String[] mongoClient = configuration.get("MONGO.CLIENT").split("###"); String[] mongoAuthentication = null; if (configuration.containsKey("MONGO.AUTHENTICATION")) { mongoAuthentication = configuration.get("MONGO.AUTHENTICATION").split("###"); } ServerAddress server = new ServerAddress(mongoClient[0], Integer.valueOf(mongoClient[1])); String[] mongoDB = configuration.get("MONGO.DB").split("###"); DB db = null; if (mongoAuthentication != null) { MongoCredential credential = MongoCredential.createMongoCRCredential(mongoAuthentication[1], mongoAuthentication[0], mongoAuthentication[2].toCharArray()); mClient = new MongoClient(server, Arrays.asList(credential)); db = mClient.getDB(mongoAuthentication[0]); } else { mClient = new MongoClient(server); db = mClient.getDB(mongoDB[0]); } //simple test if authentication was successfull CommandResult cR = db.getStats(); if (cR != null && !cR.ok()) { throw new Exception( "authentication against database wasn't possible - no GND Processing will take place when type is called from XSLT templates"); } nativeSource = mClient.getDB(mongoDB[0]); searchCollection = nativeSource.getCollection(mongoDB[1]); searchField = mongoDB[2]; responseField = mongoDB[3]; if (mongoDB.length > 4) { responseFieldMACS = mongoDB[4]; } } catch (UnknownHostException uHE) { gndProcessingError.error("MongoError", "Mongo Connection couldn't be established"); gndProcessingError.error("MongoError", uHE); uHE.printStackTrace(); throw uHE; } catch (Exception ex) { gndProcessingError.error("MongoError", "General Exception while trying to connect to Mongo"); gndProcessingError.error("MongoError", ex); ex.printStackTrace(); throw ex; } } private HttpURLConnection getHTTPConnection(String url) throws IOException { HttpURLConnection uc = null; URL u = new URL(url); if (proxy != null) { uc = (HttpURLConnection) u.openConnection(proxy); uc.setReadTimeout(1000); uc.connect(); } else { uc = (HttpURLConnection) u.openConnection(); uc.setReadTimeout(1000); uc.connect(); } return uc; } private void initDefaultValues() { HashMap<String, String> configuration = new HashMap<String, String>(); //configuration.put("SOURCE.TO.FETCH.GND","https://portal.dnb.de/opac.htm?method=requestMarcXml&idn={0}"); configuration.put("ID.PATTERN.TO.REPLACE", "?##?"); //configuration.put("PATTERN.FOR.ID","^(\\(.*?\\))(.*)$"); configuration.put("TAGS.TO.USE", "450_a"); //try { //logFile = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( "/home/swissbib/gnd.log"),"UTF-8")); // gndLogOpen = true; //} catch (FileNotFoundException fnfEx) { // fnfEx.printStackTrace(); //} catch (UnsupportedEncodingException usEnc) { // usEnc.printStackTrace(); //} initPlugin(configuration); } private void writeLog(String message) { // if (gndLogOpen) { // try { // logFile.write(message + "\n"); // } catch (IOException ioExc) { // ioExc.printStackTrace(); // } // } } } //class GNDTagValues { // public String tagValue; // public String subFieldValue; // public GNDTagValues(String tag, String subfield) { // this.tagValue = tag; // this.subFieldValue = subfield; // } //}