List of usage examples for org.w3c.dom Element normalize
public void normalize();
Text
nodes in the full depth of the sub-tree underneath this Node
, including attribute nodes, into a "normal" form where only structure (e.g., elements, comments, processing instructions, CDATA sections, and entity references) separates Text
nodes, i.e., there are neither adjacent Text
nodes nor empty Text
nodes. From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
/** Close the connection. Call this before discarding the repository connector. *//* w ww . ja v a2s .c om*/ @Override public void disconnect() throws ManifoldCFException { Logging.connectors.debug("Meridio: Entering 'disconnect' method"); try { if (meridio_ != null) { meridio_.logout(); } } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); Logging.connectors.warn( "Unexpected http error code " + httpErrorCode + " logging out: " + e.getMessage()); return; } Logging.connectors.warn("Unknown http error occurred while logging out: " + e.getMessage()); return; } if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (e.getFaultCode() .equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server"))) { if (e.getFaultString().indexOf(" 23031#") != -1) { // This means that the session has expired, so reset it and retry meridio_ = null; return; } } Logging.connectors.warn("Meridio: Got an unknown remote exception logging out - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString(), e); return; } catch (RemoteException remoteException) { Logging.connectors.warn( "Meridio: A remote exception occurred while " + "logging out: " + remoteException.getMessage(), remoteException); } finally { super.disconnect(); meridio_ = null; urlBase = null; urlVersionBase = null; DmwsURL = null; RmwsURL = null; mySSLFactory = null; Logging.connectors.debug("Meridio: Exiting 'disconnect' method"); } }
From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
/** Returns the categories set up in the Meridio system; these are used by the UI for two * purposes/*w ww. j ava2s .c o m*/ * * 1) To populate the "SearchCategory" * Use "getPROP_title()" on the list of CATEGORIES object in * the return ArrayList * 2) To assist with population of the metadata values to return. The * available metadata depends on the chosen category * *@return Sorted array of strings containing the category names */ public String[] getMeridioCategories() throws ManifoldCFException, ServiceInterruption { Logging.connectors.debug("Entering 'getMeridioCategories' method"); while (true) { getSession(); ArrayList returnCategories = new ArrayList(); try { CATEGORIES[] categories = meridio_.getCategories().getCATEGORIES(); for (int i = 0; i < categories.length; i++) { if (categories[i].getPROP_categoryId() == 4 || // Global Document Category categories[i].getPROP_categoryId() == 5 || // Mail Message categories[i].getPROP_categoryId() > 100) // Custom Document Category { if (!categories[i].getPROP_title().equals("<None>")) { Logging.connectors.debug("Adding category <" + categories[i].getPROP_title() + ">"); returnCategories.add(categories[i].getPROP_title()); } } } String[] returnStringArray = new String[returnCategories.size()]; Iterator it = returnCategories.iterator(); for (int i = 0; it.hasNext(); i++) { returnStringArray[i] = (String) it.next(); } java.util.Arrays.sort(returnStringArray); Logging.connectors.debug("Exiting 'getMeridioCategories' method"); return returnStringArray; } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); throw new ManifoldCFException("Unexpected http error code " + httpErrorCode + " getting categories: " + e.getMessage()); } throw new ManifoldCFException( "Unknown http error occurred while getting categories: " + e.getMessage(), e); } if (e.getFaultCode().equals(new javax.xml.namespace.QName( "http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (e.getFaultCode().equals( new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server"))) { if (e.getFaultString().indexOf(" 23031#") != -1) { // This means that the session has expired, so reset it and retry meridio_ = null; continue; } } throw new ManifoldCFException( "Meridio: Got an unknown remote exception getting categories - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString(), e); } catch (RemoteException remoteException) { throw new ManifoldCFException("Meridio: A Remote Exception occurred while " + "retrieving the Meridio categories: " + remoteException.getMessage(), remoteException); } catch (MeridioDataSetException meridioDataSetException) { throw new ManifoldCFException( "Meridio: DataSet Exception occurred retrieving the Meridio categories: " + meridioDataSetException.getMessage(), meridioDataSetException); } } }
From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
/** Test the connection. Returns a string describing the connection integrity. *@return the connection's status as a displayable string. *///from w w w . j ava 2s. c o m @Override public String check() throws ManifoldCFException { Logging.connectors.debug("Meridio: Entering 'check' method"); try { // Force a relogin meridio_ = null; getSession(); } catch (ServiceInterruption e) { return "Meridio temporarily unavailable: " + e.getMessage(); } catch (ManifoldCFException e) { return e.getMessage(); } try { /*================================================================= * Call a method in the Web Services API to get the Meridio system * name back - just something simple to test the connection * end-to-end *================================================================*/ DMDataSet ds = meridio_.getStaticData(); if (null == ds) { Logging.connectors.debug("Meridio: DM DataSet returned was null in 'check' method"); return "Connection failed - null DM DataSet"; } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio System Name is [" + ds.getSYSTEMINFO().getSystemName() + "] and the comment is [" + ds.getSYSTEMINFO().getComment() + "]"); /*================================================================= * For completeness, we also call a method in the RM Web * Service API *================================================================*/ RMDataSet rmws = meridio_.getConfiguration(); if (null == rmws) { Logging.connectors.warn("Meridio: RM DataSet returned was null in 'check' method"); return "Connection failed - null RM DataSet returned"; } return super.check(); } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); return "Unexpected http error code " + httpErrorCode + " accessing Meridio: " + e.getMessage(); } return "Unknown http error occurred while checking: " + e.getMessage(); } if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Got an unknown remote exception checking - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString() + " - retrying", e); return "Axis fault: " + e.getMessage(); } catch (RemoteException remoteException) { /*================================================================= * Log the exception because we will then discard it * * Potentially attempting to re-login may resolve this error but * if it is being called soon after a successful login, then that * is unlikely. * * A RemoteException could be a transient network error *================================================================*/ if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Unknown remote exception occurred during 'check' method: " + remoteException.getMessage(), remoteException); return "Connection failed - Remote exception: " + remoteException.getMessage(); } catch (MeridioDataSetException meridioDataSetException) { /*================================================================= * Log the exception because we will then discard it * * If it is a DataSet exception it means that we could not marshal * or unmarshall the XML returned from the Web Service call. This * means there is either a problem with the code, or perhaps the * connector is pointing at an incorrect/unsupported version of * Meridio *================================================================*/ if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: DataSet exception occurred during 'check' method: " + meridioDataSetException.getMessage(), meridioDataSetException); return "Connection failed - DataSet exception: " + meridioDataSetException.getMessage(); } finally { Logging.connectors.debug("Meridio: Exiting 'check' method"); } }
From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
/** Queue "seed" documents. Seed documents are the starting places for crawling activity. Documents * are seeded when this method calls appropriate methods in the passed in ISeedingActivity object. * * This method can choose to find repository changes that happen only during the specified time interval. * The seeds recorded by this method will be viewed by the framework based on what the * getConnectorModel() method returns./*from ww w .j a va2 s.c o m*/ * * It is not a big problem if the connector chooses to create more seeds than are * strictly necessary; it is merely a question of overall work required. * * The end time and seeding version string passed to this method may be interpreted for greatest efficiency. * For continuous crawling jobs, this method will * be called once, when the job starts, and at various periodic intervals as the job executes. * * When a job's specification is changed, the framework automatically resets the seeding version string to null. The * seeding version string may also be set to null on each job run, depending on the connector model returned by * getConnectorModel(). * * Note that it is always ok to send MORE documents rather than less to this method. * The connector will be connected before this method can be called. *@param activities is the interface this method should use to perform whatever framework actions are desired. *@param spec is a document specification (that comes from the job). *@param seedTime is the end of the time range of documents to consider, exclusive. *@param lastSeedVersionString is the last seeding version string for this job, or null if the job has no previous seeding version string. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@return an updated seeding version string, to be stored with the job. */ @Override public String addSeedDocuments(ISeedingActivity activities, Specification spec, String lastSeedVersion, long seedTime, int jobMode) throws ManifoldCFException, ServiceInterruption { Logging.connectors.debug("Meridio: Entering 'addSeedDocuments' method"); long startTime; if (lastSeedVersion == null) startTime = 0L; else { // Unpack seed time from seed version string startTime = new Long(lastSeedVersion).longValue(); } // Adjust start time so that we don't miss documents that squeeze in with earlier timestamps after we've already scanned that interval. // Chose an interval of 15 minutes, but I've never seen this effect take place over a time interval even 1/10 of that. long timeAdjust = 15L * 60000L; if (startTime > timeAdjust) startTime -= timeAdjust; else startTime = 0L; while (true) { getSession(); try { DMSearchResults searchResults; int numResultsReturnedByStream = 0; while (true) { searchResults = documentSpecificationSearch(spec, startTime, seedTime, numResultsReturnedByStream + 1, maxHitsToReturn); for (int i = 0; i < searchResults.returnedHitsCount; i++) { long documentId = searchResults.dsDM.getSEARCHRESULTS_DOCUMENTS()[i].getDocId(); String strDocumentId = new Long(documentId).toString(); activities.addSeedDocument(strDocumentId); } numResultsReturnedByStream += searchResults.returnedHitsCount; if (numResultsReturnedByStream == searchResults.totalHitsCount) break; } return new Long(seedTime).toString(); } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); throw new ManifoldCFException("Unexpected http error code " + httpErrorCode + " accessing Meridio: " + e.getMessage(), e); } throw new ManifoldCFException( "Unknown http error occurred while performing search: " + e.getMessage(), e); } if (e.getFaultCode().equals(new javax.xml.namespace.QName( "http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (e.getFaultCode().equals( new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server"))) { if (e.getFaultString().indexOf(" 23031#") != -1) { // This means that the session has expired, so reset it and retry meridio_ = null; continue; } } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "Meridio: Got an unknown remote exception while performing search - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString() + " - retrying", e); throw new ServiceInterruption("Remote procedure exception: " + e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false); } catch (RemoteException remoteException) { throw new ManifoldCFException("Meridio: A Remote Exception occurred while " + "performing a search: " + remoteException.getMessage(), remoteException); } catch (MeridioDataSetException meridioDataSetException) { throw new ManifoldCFException("Meridio: A problem occurred manipulating the Web " + "Service XML: " + meridioDataSetException.getMessage(), meridioDataSetException); } } }
From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
public MeridioClassContents[] getClassOrFolderContents(int classOrFolderId) throws ManifoldCFException, ServiceInterruption { Logging.connectors.debug("Entering 'getClassOrFolderContents' method"); while (true) { getSession();/* w w w . j a v a 2s .c o m*/ ArrayList meridioContainers = new ArrayList(); try { RMDataSet ds = meridio_.getClassContents(classOrFolderId, false, false, false); if (ds == null) { Logging.connectors.debug("No classes or folders in returned DataSet"); return new MeridioClassContents[] {}; } Rm2vClass[] classes = ds.getRm2vClass(); Rm2vFolder[] folders = ds.getRm2vFolder(); for (int i = 0; i < classes.length; i++) { if (classes[i].getHomePage() == null || classes[i].getHomePage().length() == 0) // Not a federated link { MeridioClassContents classContents = new MeridioClassContents(); classContents.containerType = MeridioClassContents.CLASS; classContents.classOrFolderId = classes[i].getId(); classContents.classOrFolderName = classes[i].getName(); meridioContainers.add(classContents); } } for (int i = 0; i < folders.length; i++) { MeridioClassContents classContents = new MeridioClassContents(); classContents.containerType = MeridioClassContents.FOLDER; classContents.classOrFolderId = folders[i].getId(); classContents.classOrFolderName = folders[i].getName(); meridioContainers.add(classContents); } MeridioClassContents[] classArray = new MeridioClassContents[meridioContainers.size()]; Iterator it = meridioContainers.iterator(); for (int i = 0; it.hasNext(); i++) { classArray[i] = (MeridioClassContents) it.next(); } Logging.connectors.debug("Exiting 'getClassOrFolderContents' method"); return classArray; } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); throw new ManifoldCFException("Unexpected http error code " + httpErrorCode + " getting class or folder contents: " + e.getMessage()); } throw new ManifoldCFException( "Unknown http error occurred while getting class or folder contents: " + e.getMessage(), e); } if (e.getFaultCode().equals(new javax.xml.namespace.QName( "http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (e.getFaultCode().equals( new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server"))) { if (e.getFaultString().indexOf(" 23031#") != -1) { // This means that the session has expired, so reset it and retry meridio_ = null; continue; } } throw new ManifoldCFException( "Meridio: Got an unknown remote exception getting class or folder contents - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString(), e); } catch (RemoteException remoteException) { throw new ManifoldCFException("Meridio: A Remote Exception occurred while " + "retrieving class or folder contents: " + remoteException.getMessage(), remoteException); } catch (MeridioDataSetException meridioDataSetException) { throw new ManifoldCFException("Meridio: A problem occurred manipulating the Web " + "Service XML: " + meridioDataSetException.getMessage(), meridioDataSetException); } } }
From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
public String[] getMeridioDocumentProperties() throws ManifoldCFException, ServiceInterruption { Logging.connectors.debug("Entering 'getMeridioDocumentProperties' method"); while (true) { getSession();//from ww w . java2 s .c o m ArrayList meridioDocumentProperties = new ArrayList(); try { CATEGORIES[] categories = meridio_.getCategories().getCATEGORIES(); PROPERTYDEFS[] propertyDefs = meridio_.getStaticData().getPROPERTYDEFS(); for (int i = 0; i < propertyDefs.length; i++) { if (propertyDefs[i].getTableName() == null) { continue; } if (propertyDefs[i].getTableName().compareTo("DOCUMENTS") == 0) { meridioDocumentProperties.add(propertyDefs[i].getDisplayName()); } if ((propertyDefs[i].getCategoryId() == 4 || // Global Document Category propertyDefs[i].getCategoryId() == 5 || // Mail Message propertyDefs[i].getCategoryId() > 100) && // Custom Category propertyDefs[i].getTableName().compareTo("DOCUMENT_CUSTOMPROPS") == 0) { for (int j = 0; j < categories.length; j++) { if (categories[j].getPROP_categoryId() == propertyDefs[i].getCategoryId()) { meridioDocumentProperties.add( categories[j].getPROP_title() + "." + propertyDefs[i].getDisplayName()); Logging.connectors.debug("Prop: <" + categories[j].getPROP_title() + "." + propertyDefs[i].getDisplayName() + "> Column <" + propertyDefs[i].getColumnName() + ">"); break; } } } } String[] returnStringArray = new String[meridioDocumentProperties.size()]; Iterator it = meridioDocumentProperties.iterator(); for (int i = 0; it.hasNext(); i++) { returnStringArray[i] = (String) it.next(); } java.util.Arrays.sort(returnStringArray); Logging.connectors.debug("Exiting 'getMeridioDocumentProperties' method"); return returnStringArray; } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); throw new ManifoldCFException("Unexpected http error code " + httpErrorCode + " getting document properties: " + e.getMessage()); } throw new ManifoldCFException( "Unknown http error occurred while getting document properties: " + e.getMessage(), e); } if (e.getFaultCode().equals(new javax.xml.namespace.QName( "http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (e.getFaultCode().equals( new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server"))) { if (e.getFaultString().indexOf(" 23031#") != -1) { // This means that the session has expired, so reset it and retry meridio_ = null; continue; } } throw new ManifoldCFException( "Meridio: Got an unknown remote exception getting document properties - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString(), e); } catch (RemoteException remoteException) { throw new ManifoldCFException( "Meridio: A Remote Exception occurred while " + "retrieving the Meridio document properties: " + remoteException.getMessage(), remoteException); } catch (MeridioDataSetException meridioDataSetException) { throw new ManifoldCFException( "Meridio: DataSet Exception occurred retrieving the Meridio document properties: " + meridioDataSetException.getMessage(), meridioDataSetException); } } }
From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
/** Set up the session with Meridio */ protected void getSession() throws ManifoldCFException, ServiceInterruption { if (meridio_ == null) { // Do the first part (which used to be in connect() itself) try {/*from w ww .j a v a2 s. c o m*/ /*================================================================= * Construct the URL strings from the parameters *================================================================*/ String DMWSProtocol = params.getParameter("DMWSServerProtocol"); String DMWSPort = params.getParameter("DMWSServerPort"); if (DMWSPort == null || DMWSPort.length() == 0) DMWSPort = ""; else DMWSPort = ":" + DMWSPort; String Url = DMWSProtocol + "://" + params.getParameter("DMWSServerName") + DMWSPort + params.getParameter("DMWSLocation"); Logging.connectors.debug("Meridio: Document Management Web Service (DMWS) URL is [" + Url + "]"); DmwsURL = new URL(Url); String RMWSProtocol = params.getParameter("RMWSServerProtocol"); String RMWSPort = params.getParameter("RMWSServerPort"); if (RMWSPort == null || RMWSPort.length() == 0) RMWSPort = ""; else RMWSPort = ":" + RMWSPort; Url = RMWSProtocol + "://" + params.getParameter("RMWSServerName") + RMWSPort + params.getParameter("RMWSLocation"); Logging.connectors.debug("Meridio: Record Management Web Service (RMWS) URL is [" + Url + "]"); RmwsURL = new URL(Url); // Set up ssl if indicated String keystoreData = params.getParameter("MeridioKeystore"); if (keystoreData != null) mySSLFactory = KeystoreManagerFactory.make("", keystoreData).getSecureSocketFactory(); else mySSLFactory = null; // Put together the url base String clientProtocol = params.getParameter("MeridioWebClientProtocol"); String clientPort = params.getParameter("MeridioWebClientServerPort"); if (clientPort == null || clientPort.length() == 0) clientPort = ""; else clientPort = ":" + clientPort; urlVersionBase = clientProtocol + "://" + params.getParameter("MeridioWebClientServerName") + clientPort + params.getParameter("MeridioWebClientDocDownloadLocation"); urlBase = urlVersionBase + "?launchMode=1&launchAs=0&documentId="; } catch (MalformedURLException malformedURLException) { throw new ManifoldCFException( "Meridio: Could not construct the URL for either " + "the DM or RM Meridio Web Service", malformedURLException, ManifoldCFException.REPOSITORY_CONNECTION_ERROR); } // Do the second part (where we actually try to connect to the system) try { /*================================================================= * Now try and login to Meridio; the wrapper's constructor can be * used as it calls the Meridio login method *================================================================*/ meridio_ = new MeridioWrapper(Logging.connectors, DmwsURL, RmwsURL, null, params.getParameter("DMWSProxyHost"), params.getParameter("DMWSProxyPort"), params.getParameter("RMWSProxyHost"), params.getParameter("RMWSProxyPort"), null, null, params.getParameter("UserName"), params.getObfuscatedParameter("Password"), InetAddress.getLocalHost().getHostName(), mySSLFactory, org.apache.manifoldcf.connectorcommon.common.CommonsHTTPSender.class, "client-config.wsdd"); } catch (NumberFormatException e) { throw new ManifoldCFException("Meridio: bad number: " + e.getMessage(), e); } catch (UnknownHostException unknownHostException) { throw new ManifoldCFException("Meridio: A Unknown Host Exception occurred while " + "connecting - is a network software and hardware configuration: " + unknownHostException.getMessage(), unknownHostException); } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); throw new ManifoldCFException("Unexpected http error code " + httpErrorCode + " accessing Meridio: " + e.getMessage(), e); } throw new ManifoldCFException("Unknown http error occurred while connecting: " + e.getMessage(), e); } if (e.getFaultCode().equals(new javax.xml.namespace.QName( "http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Got an unknown remote exception connecting - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString() + " - retrying", e); throw new ServiceInterruption("Remote procedure exception: " + e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false); } catch (RemoteException remoteException) { throw new ManifoldCFException("Meridio: An unknown remote exception occurred while " + "connecting: " + remoteException.getMessage(), remoteException); } } }
From source file:org.apache.manifoldcf.crawler.connectors.meridio.MeridioConnector.java
/** Process a set of documents. * This is the method that should cause each document to be fetched, processed, and the results either added * to the queue of documents for the current job, and/or entered into the incremental ingestion manager. * The document specification allows this class to filter what is done based on the job. * The connector will be connected before this method can be called. *@param documentIdentifiers is the set of document identifiers to process. *@param statuses are the currently-stored document versions for each document in the set of document identifiers * passed in above.//w w w . j av a 2 s . c o m *@param activities is the interface this method should use to queue up new document references * and ingest documents. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one. */ @Override public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec, IProcessActivity activities, int jobMode, boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption { // Get forced acls/security enable/disable String[] acls = getAcls(spec); // Sort it, in case it is needed. if (acls != null) java.util.Arrays.sort(acls); // Look at the metadata attributes. // So that the version strings are comparable, we will put them in an array first, and sort them. Set<String> holder = new HashSet<String>(); String pathAttributeName = null; MatchMap matchMap = new MatchMap(); boolean allMetadata = false; int i = 0; while (i < spec.getChildCount()) { SpecificationNode n = spec.getChild(i++); if (n.getType().equals("ReturnedMetadata")) { String category = n.getAttributeValue("category"); String attributeName = n.getAttributeValue("property"); String metadataName; if (category == null || category.length() == 0) metadataName = attributeName; else metadataName = category + "." + attributeName; holder.add(metadataName); } else if (n.getType().equals("AllMetadata")) { String value = n.getAttributeValue("value"); if (value != null && value.equals("true")) { allMetadata = true; } } else if (n.getType().equals("pathnameattribute")) pathAttributeName = n.getAttributeValue("value"); else if (n.getType().equals("pathmap")) { // Path mapping info also needs to be looked at, because it affects what is // ingested. String pathMatch = n.getAttributeValue("match"); String pathReplace = n.getAttributeValue("replace"); matchMap.appendMatchPair(pathMatch, pathReplace); } } while (true) { getSession(); // The version string returned must include everything that could affect what is ingested. In meridio's // case, this includes the date stamp, but it also includes the part of the specification that describes // the metadata desired. // The code here relies heavily on the search method to do it's thing. The search method originally // used the document specification to determine what metadata to return, which was problematic because that // meant this method had to modify the specification (not good practice), and was also wrong from the point // of view that we need to get the metadata specification appended to the version string in some way, and // use THAT data in processDocuments(). So I've broken all that up. try { // Put into an array ReturnMetadata[] categoryPropertyValues; String[] categoryPropertyStringValues; String[] sortArray; if (allMetadata) { categoryPropertyStringValues = getMeridioDocumentProperties(); } else { categoryPropertyStringValues = new String[holder.size()]; i = 0; for (String value : holder) { categoryPropertyStringValues[i++] = value; } } // Sort! java.util.Arrays.sort(categoryPropertyStringValues); categoryPropertyValues = new ReturnMetadata[categoryPropertyStringValues.length]; i = 0; for (String value : categoryPropertyStringValues) { int dotIndex = value.indexOf("."); String categoryName = null; String propertyName; if (dotIndex == -1) propertyName = value; else { categoryName = value.substring(0, dotIndex); propertyName = value.substring(dotIndex + 1); } categoryPropertyValues[i++] = new ReturnMetadata(categoryName, propertyName); } // Prepare the part of the version string that is decodeable StringBuilder decodeableString = new StringBuilder(); // Add the metadata piece first packList(decodeableString, categoryPropertyStringValues, '+'); // Now, put in the forced acls. // The version string needs only to contain the forced acls, since the version date captures changes // made to the acls that are actually associated with the document. if (acls == null) decodeableString.append('-'); else { decodeableString.append('+'); packList(decodeableString, acls, '+'); decodeableString.append('+'); pack(decodeableString, defaultAuthorityDenyToken, '+'); } // Calculate the part of the version string that comes from path name and mapping. if (pathAttributeName != null) { decodeableString.append("+"); pack(decodeableString, pathAttributeName, '+'); pack(decodeableString, matchMap.toString(), '+'); } else decodeableString.append("-"); long[] docIds = new long[documentIdentifiers.length]; for (i = 0; i < documentIdentifiers.length; i++) { docIds[i] = new Long(documentIdentifiers[i]).longValue(); } /*================================================================= * Call the search, with the document specification and the list of * document ids - the search will never return more than exactly * one match per document id * * We are assuming that the maximum number of hits to return * should never be more than the maximum batch size set up for this * class * * We are just making one web service call (to the search API) * rather than iteratively calling a web service method for each * document passed in as part of the document array * * Additionally, re-using the same search method as for the * "getDocumentIdentifiers" method ensures that we are not * duplicating any logic which ensures that the document/records * in question match the search criteria or not. *================================================================*/ DMSearchResults searchResults = documentSpecificationSearch(spec, 0, 0, 1, this.getMaxDocumentRequest(), docIds, null); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Found a total of <" + searchResults.totalHitsCount + "> hit(s) " + "and <" + searchResults.returnedHitsCount + "> were returned by the method call"); // If we are searching based on document identifier, then it is possible that we will not // find a document we are looking for, if it was removed from the system between the time // it was put in the queue and when it's version is obtained. Documents where this happens // should return a version string of null. // Let's go through the search results and build a hash based on the document identifier. Map<Long, SEARCHRESULTS_DOCUMENTS> documentMap = new HashMap<Long, SEARCHRESULTS_DOCUMENTS>(); if (searchResults.dsDM != null) { SEARCHRESULTS_DOCUMENTS[] srd = searchResults.dsDM.getSEARCHRESULTS_DOCUMENTS(); for (i = 0; i < srd.length; i++) { documentMap.put(new Long(srd[i].getDocId()), srd[i]); } } // Now, walk through the individual documents. Map<Long, String> versionStrings = new HashMap<Long, String>(); for (int j = 0; j < docIds.length; j++) { String documentIdentifier = documentIdentifiers[j]; long docId = docIds[j]; Long docKey = new Long(docId); // Look up the record. SEARCHRESULTS_DOCUMENTS doc = documentMap.get(docKey); if (doc != null) { // Set the version string. The parseable stuff goes first, so parsing is easy. String version = doc.getStr_value(); StringBuilder composedVersion = new StringBuilder(); composedVersion.append(decodeableString); composedVersion.append(version); // Added 9/7/2007 composedVersion.append("_").append(urlVersionBase); // String versionString = composedVersion.toString(); if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("Meridio: Document " + docKey + " has version " + versionString); if (activities.checkDocumentNeedsReindexing(documentIdentifier, versionString)) versionStrings.put(docKey, versionString); } else { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Document " + docKey + " is no longer in the search set, or has been deleted - removing."); activities.deleteDocument(documentIdentifier); } } // Now submit search requests for all the documents requiring fetch. Map<Long, Map<String, String>> documentPropertyMap = new HashMap<Long, Map<String, String>>(); // Only look up metadata if we need some! if (versionStrings.size() > 0 && categoryPropertyValues.length > 0) { long[] fetchIds = new long[versionStrings.size()]; i = 0; for (Long docKey : versionStrings.keySet()) { fetchIds[i++] = docKey; } /*================================================================= * Call the search, with the document specification and the list of * document ids - the search will never return more than exactly * one match per document id * * This call will return all the metadata that was specified in the * document specification for all the documents and * records in one call. *================================================================*/ searchResults = documentSpecificationSearch(spec, 0, 0, 1, fetchIds.length, fetchIds, categoryPropertyValues); // If we ask for a document and it is no longer there, we should treat this as a deletion. // The activity in that case is to delete the document. A similar thing should happen if // any of the other methods (like getting the document's content) also fail to find the // document. // Let's build a hash which contains all the document metadata returned. The form of // the hash will be: key = the document identifier, value = another hash, which is keyed // by the metadata category/property, and which has a value that is the metadata value. Map<Long, MutableInteger> counterMap = new HashMap<Long, MutableInteger>(); if (searchResults.dsDM != null) { SEARCHRESULTS_DOCUMENTS[] searchResultsDocuments = searchResults.dsDM .getSEARCHRESULTS_DOCUMENTS(); for (SEARCHRESULTS_DOCUMENTS searchResultsDocument : searchResultsDocuments) { long docId = searchResultsDocument.getDocId(); Long docKey = new Long(docId); MutableInteger counterMapItem = counterMap.get(docKey); if (counterMapItem == null) { counterMapItem = new MutableInteger(); counterMap.put(docKey, counterMapItem); } String propertyName = categoryPropertyStringValues[counterMapItem.getValue()]; counterMapItem.increment(); String propertyValue = searchResultsDocuments[i].getStr_value(); Map<String, String> propertyMap = documentPropertyMap.get(docKey); if (propertyMap == null) { propertyMap = new HashMap<String, String>(); documentPropertyMap.put(docKey, propertyMap); } if (propertyValue != null && propertyValue.length() > 0) propertyMap.put(propertyName, propertyValue); } } } // Okay, we are ready now to go through the individual documents and do the ingestion or deletion. for (String documentIdentifier : documentIdentifiers) { Long docKey = new Long(documentIdentifier); long docId = docKey.longValue(); String docVersion = versionStrings.get(docKey); if (docVersion != null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Processing document identifier '" + documentIdentifier + "' " + "with version string '" + docVersion + "'"); // For each document, be sure the job is still allowed to run. activities.checkJobStillActive(); RepositoryDocument repositoryDocument = new RepositoryDocument(); // Load the metadata items into the ingestion document object Map<String, String> docMetadataMap = documentPropertyMap.get(docKey); if (docMetadataMap != null) { for (String categoryPropertyName : categoryPropertyStringValues) { String propertyValue = docMetadataMap.get(categoryPropertyName); if (propertyValue != null && propertyValue.length() > 0) repositoryDocument.addField(categoryPropertyName, propertyValue); } } /*================================================================= * Construct the URL to the object * * HTTP://HOST:PORT/meridio/browse/downloadcontent.aspx?documentId=<docId>&launchMode=1&launchAs=0 * * I expect we need to add additional parameters to the configuration * specification *================================================================*/ String fileURL = urlBase + new Long(docId).toString(); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "URL for document '" + new Long(docId).toString() + "' is '" + fileURL + "'"); /*================================================================= * Get the object's ACLs and owner information *================================================================*/ DMDataSet documentData = null; documentData = meridio_.getDocumentData((int) docId, true, true, false, false, DmVersionInfo.LATEST, false, false, false); if (null == documentData) { if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("Meridio: Could not retrieve document data for document id '" + new Long(docId).toString() + "' in processDocuments method - deleting document."); activities.noDocument(documentIdentifier, docVersion); continue; } if (null == documentData.getDOCUMENTS() || documentData.getDOCUMENTS().length != 1) { if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("Meridio: Could not retrieve document owner for document id '" + new Long(docId).toString() + "' in processDocuments method. No information or incorrect amount " + "of information was returned"); activities.noDocument(documentIdentifier, docVersion); continue; } // Do path metadata if (pathAttributeName != null && pathAttributeName.length() > 0) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Path attribute name is " + pathAttributeName); RMDataSet partList; int recordType = documentData.getDOCUMENTS()[0].getPROP_recordType(); if (recordType == 0 || recordType == 4 || recordType == 19) partList = meridio_.getRecordPartList((int) docId, false, false); else partList = meridio_.getDocumentPartList((int) docId); if (partList != null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Document '" + new Long(docId).toString() + "' has a part list with " + Integer.toString(partList.getRm2vPart().length) + " values"); for (int k = 0; k < partList.getRm2vPart().length; k++) { repositoryDocument.addField(pathAttributeName, matchMap.translate(partList.getRm2vPart()[k].getParentTitlePath())); } } else { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Document '" + new Long(docId).toString() + "' has no part list, so no path attribute"); } } // Process acls. If there are forced acls, use those, otherwise get them from Meridio. String[] allowAcls; String[] denyAcls; // forcedAcls will be null if security is off, or nonzero length if security is on but hard-wired if (acls != null && acls.length == 0) { ACCESSCONTROL[] documentAcls = documentData.getACCESSCONTROL(); List<String> allowAclsArrayList = new ArrayList<String>(); List<String> denyAclsArrayList = new ArrayList<String>(); // Allow a broken authority to disable all Meridio documents, even if the document is 'wide open', because // Meridio does not permit viewing of the document if the user does not exist (at least, I don't know of a way). denyAclsArrayList.add(defaultAuthorityDenyToken); if (documentAcls != null) { for (int j = 0; j < documentAcls.length; j++) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Object Id '" + documentAcls[j].getObjectId() + "' " + "Object Type '" + documentAcls[j].getObjectType() + "' " + "Permission '" + documentAcls[j].getPermission() + "' " + "User Id '" + documentAcls[j].getUserId() + "' " + "Group Id '" + documentAcls[j].getGroupId() + "'"); if (documentAcls[j].getPermission() == 0) // prohibit permission { if (documentAcls[j].getGroupId() > 0) { denyAclsArrayList.add("G" + documentAcls[j].getGroupId()); } else if (documentAcls[j].getUserId() > 0) { denyAclsArrayList.add("U" + documentAcls[j].getUserId()); } } else // read, amend or manage { if (documentAcls[j].getGroupId() > 0) { allowAclsArrayList.add("G" + documentAcls[j].getGroupId()); } else if (documentAcls[j].getUserId() > 0) { allowAclsArrayList.add("U" + documentAcls[j].getUserId()); } } } } DOCUMENTS document = documentData.getDOCUMENTS()[0]; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Document id '" + new Long(docId).toString() + "' is owned by owner id '" + document.getPROP_ownerId() + "' having the owner name '" + document.getPROP_ownerName() + "' Record Type is '" + document.getPROP_recordType() + "'"); if (document.getPROP_recordType() == 4 || document.getPROP_recordType() == 19) { RMDataSet rmds = meridio_.getRecord((int) docId, false, false, false); Rm2vRecord record = rmds.getRm2vRecord()[0]; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Record User Id Owner is '" + record.getOwnerID() + "' Record Group Owner Id is '" + record.getGroupOwnerID() + "'"); /*================================================================= * Either a group or a user owns a record, cannot be both and the * group takes priority if it is set *================================================================*/ if (record.getGroupOwnerID() > 0) { allowAclsArrayList.add("G" + record.getGroupOwnerID()); } else if (record.getOwnerID() > 0) { allowAclsArrayList.add("U" + record.getOwnerID()); } } else { allowAclsArrayList.add("U" + document.getPROP_ownerId()); } /*================================================================= * Set up the string arrays and then set the ACLs in the * repository document *================================================================*/ allowAcls = new String[allowAclsArrayList.size()]; for (int j = 0; j < allowAclsArrayList.size(); j++) { allowAcls[j] = allowAclsArrayList.get(j); if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("Meridio: Adding '" + allowAcls[j] + "' to allow ACLs"); } denyAcls = new String[denyAclsArrayList.size()]; for (int j = 0; j < denyAclsArrayList.size(); j++) { denyAcls[j] = denyAclsArrayList.get(j); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Adding '" + denyAcls[j] + "' to deny ACLs"); } } else { allowAcls = acls; if (allowAcls == null) denyAcls = null; else denyAcls = new String[] { defaultAuthorityDenyToken }; } repositoryDocument.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, allowAcls, denyAcls); /*================================================================= * Get the object's content, and ingest the document *================================================================*/ try { AttachmentPart ap = meridio_.getLatestVersionFile((int) docId); if (null == ap) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: Failed to get content for document '" + new Long(docId).toString() + "'"); // No document. Delete what's there activities.noDocument(documentIdentifier, docVersion); continue; } try { // Get the file name. String fileName = ap.getDataHandler().getName(); // Log what we are about to do. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Meridio: File data is supposedly in " + fileName); File theTempFile = new File(fileName); if (theTempFile.isFile()) { long fileSize = theTempFile.length(); // ap.getSize(); if (activities.checkLengthIndexable(fileSize)) { InputStream is = new FileInputStream(theTempFile); // ap.getDataHandler().getInputStream(); try { repositoryDocument.setBinary(is, fileSize); if (null != activities) { activities.ingestDocumentWithException(documentIdentifier, docVersion, fileURL, repositoryDocument); } } finally { is.close(); } } else { activities.noDocument(documentIdentifier, docVersion); continue; } } else { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "Meridio: Expected temporary file was not present - skipping document '" + new Long(docId).toString() + "'"); activities.deleteDocument(documentIdentifier); continue; } } finally { ap.dispose(); } } catch (java.net.SocketTimeoutException ioex) { throw new ManifoldCFException("Socket timeout exception: " + ioex.getMessage(), ioex); } catch (ConnectTimeoutException ioex) { throw new ManifoldCFException("Connect timeout exception: " + ioex.getMessage(), ioex); } catch (InterruptedIOException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (org.apache.axis.AxisFault e) { throw e; } catch (RemoteException e) { throw e; } catch (SOAPException soapEx) { throw new ManifoldCFException( "SOAP Exception encountered while retrieving document content: " + soapEx.getMessage(), soapEx); } catch (IOException ioex) { throw new ManifoldCFException("Input stream failure: " + ioex.getMessage(), ioex); } } } Logging.connectors.debug("Meridio: Exiting 'processDocuments' method"); return; } catch (org.apache.axis.AxisFault e) { long currentTime = System.currentTimeMillis(); if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) { org.w3c.dom.Element elem = e.lookupFaultDetail( new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode")); if (elem != null) { elem.normalize(); String httpErrorCode = elem.getFirstChild().getNodeValue().trim(); throw new ManifoldCFException("Unexpected http error code " + httpErrorCode + " accessing Meridio: " + e.getMessage(), e); } throw new ManifoldCFException( "Unknown http error occurred while getting doc versions: " + e.getMessage(), e); } if (e.getFaultCode().equals(new javax.xml.namespace.QName( "http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) { String exceptionName = e.getFaultString(); if (exceptionName.equals("java.lang.InterruptedException")) throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED); } if (e.getFaultCode().equals( new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server"))) { if (e.getFaultString().indexOf(" 23031#") != -1) { // This means that the session has expired, so reset it and retry meridio_ = null; continue; } } if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("Meridio: Got an unknown remote exception getting doc versions - axis fault = " + e.getFaultCode().getLocalPart() + ", detail = " + e.getFaultString() + " - retrying", e); throw new ServiceInterruption("Remote procedure exception: " + e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false); } catch (RemoteException remoteException) { throw new ManifoldCFException("Meridio: A remote exception occurred while getting doc versions: " + remoteException.getMessage(), remoteException); } catch (MeridioDataSetException meridioDataSetException) { throw new ManifoldCFException("Meridio: A problem occurred manipulating the Web " + "Service XML: " + meridioDataSetException.getMessage(), meridioDataSetException); } } }
From source file:net.wastl.webmail.xml.XMLCommon.java
/** * Return the node value of a single node selected by the given xpath * expression./* w w w. java2s .c om*/ */ public static String getValueXPath(Element root, String path) { root.normalize(); try { Node n = XPathAPI.selectSingleNode(root, path); if (n != null) { return n.getNodeValue(); } else { return null; } } catch (Exception ex) { log.error("Xpath query failed. Continuing as if no node found.", ex); return null; } }
From source file:net.wastl.webmail.xml.XMLCommon.java
/** * Set the node value of the node selected by the given xpath expression. *///from w ww. j a v a 2s. c om public static void setValueXPath(Element root, String path, String value) { root.normalize(); try { Node n = XPathAPI.selectSingleNode(root, path); if (n != null) { n.setNodeValue(value); } else { addNodeXPath(root, getParentXPath(path), root.getOwnerDocument().createTextNode(value)); } } catch (TransformerException ex) { addNodeXPath(root, getParentXPath(path), root.getOwnerDocument().createTextNode(value)); } catch (Exception ex) { log.error("Failed to set value '" + value + "' for path '" + path + "'. Continuing, but should not.", ex); // TODO: Throw here. } }