Java tutorial
/* $Id: SharePointRepository.java 996524 2010-09-13 13:38:01Z kwright $ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.connectors.sharepoint; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.agents.interfaces.*; import org.apache.manifoldcf.crawler.interfaces.*; import org.apache.manifoldcf.crawler.system.Logging; import org.apache.manifoldcf.crawler.system.ManifoldCF; import org.apache.manifoldcf.core.common.*; import org.apache.manifoldcf.connectorcommon.interfaces.*; import org.apache.manifoldcf.connectorcommon.extmimemap.ExtensionMimeMap; import org.apache.manifoldcf.core.util.URLEncoder; import org.apache.manifoldcf.core.util.URLDecoder; import java.io.*; import java.util.Date; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Locale; import java.util.List; import java.util.Set; import java.util.HashSet; import java.util.concurrent.TimeUnit; import java.net.*; import org.apache.log4j.Logger; import org.apache.log4j.Level; import org.apache.http.conn.HttpClientConnectionManager; import org.apache.http.client.HttpClient; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.protocol.HttpRequestExecutor; import org.apache.http.impl.client.HttpClients; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.CredentialsProvider; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.config.SocketConfig; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.BrowserCompatHostnameVerifier; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.auth.AuthScope; import org.apache.http.auth.NTCredentials; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultRedirectStrategy; import org.apache.http.util.EntityUtils; import org.apache.http.client.HttpRequestRetryHandler; import org.apache.http.protocol.HttpContext; import org.apache.http.HttpHost; /** This is the "repository connector" for Microsoft SharePoint. * Document identifiers for this connector come in three forms: * (1) An "S" followed by the encoded subsite/library path, which represents the encoded relative path from the root site to a library. [deprecated and no longer supported]; * (2) A "D" followed by a subsite/library/folder/file path, which represents the relative path from the root site to a file. [deprecated and no longer supported] * (3) Six different kinds of unencoded path, each of which starts with a "/" at the beginning, where the "/" represents the root site of the connection, as follows: * /sitepath/ - the relative path to a site. The path MUST both begin and end with a single "/". * /sitepath/libraryname// - the relative path to a library. The path MUST begin with a single "/" and end with "//". * /sitepath/libraryname//folderfilepath - the relative path to a file. The path MUST begin with a single "/" and MUST include a "//" after the library, and must NOT end with a "/". * /sitepath/listname/// - the relative path to a list. The path MUST begin with a single "/" and end with "///". * /sitepath/listname///rowid - the relative path to a list item. The path MUST begin with a single "/" and MUST include a "///" after the list name, and must NOT end in a "/". * /sitepath/listname///rowid//attachment_filename - the relative path to a list attachment. The path MUST begin with a single "/", MUST include a "///" after the list name, and * MUST include a "//" separating the rowid from the filename. */ public class SharePointRepository extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector { public static final String _rcsid = "@(#)$Id: SharePointRepository.java 996524 2010-09-13 13:38:01Z kwright $"; // Properties we need public final static String wsddPathProperty = "org.apache.manifoldcf.sharepoint.wsddpath"; // Activities we log public final static String ACTIVITY_FETCH = "fetch"; protected final static long sessionExpirationInterval = 300000L; private boolean supportsItemSecurity = false; private boolean dspStsWorks = true; private boolean attachmentsSupported = false; private boolean activeDirectoryAuthority = true; private String serverProtocol = null; private String serverUrl = null; private String fileBaseUrl = null; private String userName = null; private String strippedUserName = null; private String password = null; private String ntlmDomain = null; private String serverName = null; private String serverLocation = null; private String encodedServerLocation = null; private int serverPort = -1; private SPSProxyHelper proxy = null; private long sessionTimeout; // SSL support private String keystoreData = null; private IKeystoreManager keystoreManager = null; private HttpClientConnectionManager connectionManager = null; private HttpClient httpClient = null; // Current host name private static String currentHost = null; static { // Find the current host name try { java.net.InetAddress addr = java.net.InetAddress.getLocalHost(); // Get hostname currentHost = addr.getHostName(); } catch (UnknownHostException e) { } } // Turn off AXIS debug output that we don't want static { Logger logger = Logger.getLogger("org.apache.axis.ConfigurationException"); logger.setLevel(Level.INFO); } /** Deny access token for default authority */ private final static String defaultAuthorityDenyToken = GLOBAL_DENY_TOKEN; /** Constructor. */ public SharePointRepository() { } /** Set up a session */ protected void getSession() throws ManifoldCFException { if (proxy == null) { String serverVersion = params.getParameter(SharePointConfig.PARAM_SERVERVERSION); if (serverVersion == null) serverVersion = "4.0"; supportsItemSecurity = !serverVersion.equals("2.0"); dspStsWorks = serverVersion.equals("2.0") || serverVersion.equals("3.0"); attachmentsSupported = !serverVersion.equals("2.0"); String authorityType = params.getParameter(SharePointConfig.PARAM_AUTHORITYTYPE); if (authorityType == null) authorityType = "ActiveDirectory"; activeDirectoryAuthority = authorityType.equals("ActiveDirectory"); serverProtocol = params.getParameter(SharePointConfig.PARAM_SERVERPROTOCOL); if (serverProtocol == null) serverProtocol = "http"; try { String serverPort = params.getParameter(SharePointConfig.PARAM_SERVERPORT); if (serverPort == null || serverPort.length() == 0) { if (serverProtocol.equals("https")) this.serverPort = 443; else this.serverPort = 80; } else this.serverPort = Integer.parseInt(serverPort); } catch (NumberFormatException e) { throw new ManifoldCFException(e.getMessage(), e); } serverLocation = params.getParameter(SharePointConfig.PARAM_SERVERLOCATION); if (serverLocation == null) serverLocation = ""; if (serverLocation.endsWith("/")) serverLocation = serverLocation.substring(0, serverLocation.length() - 1); if (serverLocation.length() > 0 && !serverLocation.startsWith("/")) serverLocation = "/" + serverLocation; encodedServerLocation = serverLocation; serverLocation = decodePath(serverLocation); userName = params.getParameter(SharePointConfig.PARAM_SERVERUSERNAME); password = params.getObfuscatedParameter(SharePointConfig.PARAM_SERVERPASSWORD); int index = userName.indexOf("\\"); if (index != -1) { strippedUserName = userName.substring(index + 1); ntlmDomain = userName.substring(0, index); } else { strippedUserName = null; ntlmDomain = null; } String proxyHost = params.getParameter(SharePointConfig.PARAM_PROXYHOST); String proxyPortString = params.getParameter(SharePointConfig.PARAM_PROXYPORT); int proxyPort = 8080; if (proxyPortString != null && proxyPortString.length() > 0) { try { proxyPort = Integer.parseInt(proxyPortString); } catch (NumberFormatException e) { throw new ManifoldCFException(e.getMessage(), e); } } String proxyUsername = params.getParameter(SharePointConfig.PARAM_PROXYUSER); String proxyPassword = params.getParameter(SharePointConfig.PARAM_PROXYPASSWORD); String proxyDomain = params.getParameter(SharePointConfig.PARAM_PROXYDOMAIN); serverUrl = serverProtocol + "://" + serverName; if (serverProtocol.equals("https")) { if (serverPort != 443) serverUrl += ":" + Integer.toString(serverPort); } else { if (serverPort != 80) serverUrl += ":" + Integer.toString(serverPort); } fileBaseUrl = serverUrl + encodedServerLocation; // Set up ssl if indicated keystoreData = params.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE); int connectionTimeout = 60000; int socketTimeout = 900000; connectionManager = new PoolingHttpClientConnectionManager(); CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); SSLConnectionSocketFactory myFactory = null; if (keystoreData != null) { keystoreManager = KeystoreManagerFactory.make("", keystoreData); myFactory = new SSLConnectionSocketFactory(keystoreManager.getSecureSocketFactory(), new BrowserCompatHostnameVerifier()); } if (strippedUserName != null) { credentialsProvider.setCredentials(new AuthScope(serverName, serverPort), new NTCredentials(strippedUserName, password, currentHost, ntlmDomain)); } RequestConfig.Builder requestBuilder = RequestConfig.custom().setCircularRedirectsAllowed(true) .setSocketTimeout(socketTimeout).setStaleConnectionCheckEnabled(true) .setExpectContinueEnabled(false).setConnectTimeout(connectionTimeout) .setConnectionRequestTimeout(socketTimeout); // If there's a proxy, set that too. if (proxyHost != null && proxyHost.length() > 0) { // Configure proxy authentication if (proxyUsername != null && proxyUsername.length() > 0) { if (proxyPassword == null) proxyPassword = ""; if (proxyDomain == null) proxyDomain = ""; credentialsProvider.setCredentials(new AuthScope(proxyHost, proxyPort), new NTCredentials(proxyUsername, proxyPassword, currentHost, proxyDomain)); } HttpHost proxy = new HttpHost(proxyHost, proxyPort); requestBuilder.setProxy(proxy); } HttpClientBuilder builder = HttpClients.custom().setConnectionManager(connectionManager) .setMaxConnTotal(1).disableAutomaticRetries().setDefaultRequestConfig(requestBuilder.build()) .setDefaultSocketConfig( SocketConfig.custom().setTcpNoDelay(true).setSoTimeout(socketTimeout).build()) .setDefaultCredentialsProvider(credentialsProvider); if (myFactory != null) builder.setSSLSocketFactory(myFactory); builder.setRequestExecutor(new HttpRequestExecutor(socketTimeout)) .setRedirectStrategy(new DefaultRedirectStrategy()); httpClient = builder.build(); proxy = new SPSProxyHelper(serverUrl, encodedServerLocation, serverLocation, userName, password, org.apache.manifoldcf.connectorcommon.common.CommonsHTTPSender.class, "client-config.wsdd", httpClient); } sessionTimeout = System.currentTimeMillis() + sessionExpirationInterval; } protected void expireSession() throws ManifoldCFException { serverUrl = null; fileBaseUrl = null; userName = null; strippedUserName = null; password = null; ntlmDomain = null; serverLocation = null; encodedServerLocation = null; serverPort = -1; keystoreData = null; keystoreManager = null; proxy = null; httpClient = null; if (connectionManager != null) connectionManager.shutdown(); connectionManager = null; } /** Return the list of activities that this connector supports (i.e. writes into the log). *@return the list. */ @Override public String[] getActivitiesList() { return new String[] { ACTIVITY_FETCH }; } /** Connect. *@param configParameters is the set of configuration parameters, which * in this case describe the root directory. */ @Override public void connect(ConfigParams configParameters) { super.connect(configParameters); // This is needed by getBins() serverName = configParameters.getParameter(SharePointConfig.PARAM_SERVERNAME); } /** Close the connection. Call this before discarding the repository connector. */ @Override public void disconnect() throws ManifoldCFException { serverUrl = null; fileBaseUrl = null; userName = null; strippedUserName = null; password = null; ntlmDomain = null; serverName = null; serverLocation = null; encodedServerLocation = null; serverPort = -1; keystoreData = null; keystoreManager = null; proxy = null; httpClient = null; if (connectionManager != null) connectionManager.shutdown(); connectionManager = null; super.disconnect(); } /** Get the bin name string for a document identifier. The bin name describes the queue to which the * document will be assigned for throttling purposes. Throttling controls the rate at which items in a * given queue are fetched; it does not say anything about the overall fetch rate, which may operate on * multiple queues or bins. * For example, if you implement a web crawler, a good choice of bin name would be the server name, since * that is likely to correspond to a real resource that will need real throttle protection. *@param documentIdentifier is the document identifier. *@return the bin name. */ @Override public String[] getBinNames(String documentIdentifier) { return new String[] { serverName }; } /** Get the maximum number of documents to amalgamate together into one batch, for this connector. *@return the maximum number. 0 indicates "unlimited". */ @Override public int getMaxDocumentRequest() { // Since we went to a carrydown-based implementation, having this greater than 1 does not help. return 1; } /** Test the connection. Returns a string describing the connection integrity. *@return the connection's status as a displayable string. */ @Override public String check() throws ManifoldCFException { getSession(); try { URL urlServer = new URL(serverUrl); } catch (MalformedURLException e) { return "Illegal SharePoint url: " + e.getMessage(); } try { proxy.checkConnection("/", supportsItemSecurity); } catch (ServiceInterruption e) { return "SharePoint temporarily unavailable: " + e.getMessage(); } catch (ManifoldCFException e) { return e.getMessage(); } return super.check(); } /** This method is periodically called for all connectors that are connected but not * in active use. */ @Override public void poll() throws ManifoldCFException { if (proxy != null && System.currentTimeMillis() >= sessionTimeout) expireSession(); if (connectionManager != null) connectionManager.closeIdleConnections(60000L, TimeUnit.MILLISECONDS); } /** This method is called to assess whether to count this connector instance should * actually be counted as being connected. *@return true if the connector instance is actually connected. */ @Override public boolean isConnected() { return connectionManager != null; } /** Request arbitrary connector information. * This method is called directly from the API in order to allow API users to perform any one of several connector-specific * queries. *@param output is the response object, to be filled in by this method. *@param command is the command, which is taken directly from the API request. *@return true if the resource is found, false if not. In either case, output may be filled in. */ @Override public boolean requestInfo(Configuration output, String command) throws ManifoldCFException { if (command.startsWith("fields/")) { String library; String sitePath; String remainder = command.substring("fields/".length()); try { int index = remainder.indexOf("/"); if (index == -1) { library = remainder; sitePath = ""; } else { library = remainder.substring(0, index); sitePath = remainder.substring(index + 1); } Map<String, String> fieldSet = getLibFieldList(sitePath, library); Iterator<String> iter = fieldSet.keySet().iterator(); while (iter.hasNext()) { String fieldName = iter.next(); String displayName = fieldSet.get(fieldName); ConfigurationNode node = new ConfigurationNode("field"); ConfigurationNode child; child = new ConfigurationNode("name"); child.setValue(fieldName); node.addChild(node.getChildCount(), child); child = new ConfigurationNode("display_name"); child.setValue(displayName); node.addChild(node.getChildCount(), child); output.addChild(output.getChildCount(), node); } } catch (ServiceInterruption e) { ManifoldCF.createServiceInterruptionNode(output, e); } catch (ManifoldCFException e) { ManifoldCF.createErrorNode(output, e); } } else if (command.startsWith("listfields/")) { String listName; String sitePath; String remainder = command.substring("listfields/".length()); try { int index = remainder.indexOf("/"); if (index == -1) { listName = remainder; sitePath = ""; } else { listName = remainder.substring(0, index); sitePath = remainder.substring(index + 1); } Map<String, String> fieldSet = getListFieldList(sitePath, listName); Iterator<String> iter = fieldSet.keySet().iterator(); while (iter.hasNext()) { String fieldName = iter.next(); String displayName = fieldSet.get(fieldName); ConfigurationNode node = new ConfigurationNode("field"); ConfigurationNode child; child = new ConfigurationNode("name"); child.setValue(fieldName); node.addChild(node.getChildCount(), child); child = new ConfigurationNode("display_name"); child.setValue(displayName); node.addChild(node.getChildCount(), child); output.addChild(output.getChildCount(), node); } } catch (ServiceInterruption e) { ManifoldCF.createServiceInterruptionNode(output, e); } catch (ManifoldCFException e) { ManifoldCF.createErrorNode(output, e); } } else if (command.startsWith("sites/")) { try { String sitePath = command.substring("sites/".length()); List<NameValue> sites = getSites(sitePath); int i = 0; while (i < sites.size()) { NameValue site = sites.get(i++); ConfigurationNode node = new ConfigurationNode("site"); ConfigurationNode child; child = new ConfigurationNode("name"); child.setValue(site.getValue()); node.addChild(node.getChildCount(), child); child = new ConfigurationNode("display_name"); child.setValue(site.getPrettyName()); node.addChild(node.getChildCount(), child); output.addChild(output.getChildCount(), node); } } catch (ServiceInterruption e) { ManifoldCF.createServiceInterruptionNode(output, e); } catch (ManifoldCFException e) { ManifoldCF.createErrorNode(output, e); } } else if (command.startsWith("libraries/")) { try { String sitePath = command.substring("libraries/".length()); List<NameValue> libs = getDocLibsBySite(sitePath); int i = 0; while (i < libs.size()) { NameValue lib = libs.get(i++); ConfigurationNode node = new ConfigurationNode("library"); ConfigurationNode child; child = new ConfigurationNode("name"); child.setValue(lib.getValue()); node.addChild(node.getChildCount(), child); child = new ConfigurationNode("display_name"); child.setValue(lib.getPrettyName()); node.addChild(node.getChildCount(), child); output.addChild(output.getChildCount(), node); } } catch (ServiceInterruption e) { ManifoldCF.createServiceInterruptionNode(output, e); } catch (ManifoldCFException e) { ManifoldCF.createErrorNode(output, e); } } else if (command.startsWith("lists/")) { try { String sitePath = command.substring("lists/".length()); List<NameValue> libs = getListsBySite(sitePath); int i = 0; while (i < libs.size()) { NameValue lib = libs.get(i++); ConfigurationNode node = new ConfigurationNode("list"); ConfigurationNode child; child = new ConfigurationNode("name"); child.setValue(lib.getValue()); node.addChild(node.getChildCount(), child); child = new ConfigurationNode("display_name"); child.setValue(lib.getPrettyName()); node.addChild(node.getChildCount(), child); output.addChild(output.getChildCount(), node); } } catch (ServiceInterruption e) { ManifoldCF.createServiceInterruptionNode(output, e); } catch (ManifoldCFException e) { ManifoldCF.createErrorNode(output, e); } } else return super.requestInfo(output, command); return true; } /** Queue "seed" documents. Seed documents are the starting places for crawling activity. Documents * are seeded when this method calls appropriate methods in the passed in ISeedingActivity object. * * This method can choose to find repository changes that happen only during the specified time interval. * The seeds recorded by this method will be viewed by the framework based on what the * getConnectorModel() method returns. * * It is not a big problem if the connector chooses to create more seeds than are * strictly necessary; it is merely a question of overall work required. * * The end time and seeding version string passed to this method may be interpreted for greatest efficiency. * For continuous crawling jobs, this method will * be called once, when the job starts, and at various periodic intervals as the job executes. * * When a job's specification is changed, the framework automatically resets the seeding version string to null. The * seeding version string may also be set to null on each job run, depending on the connector model returned by * getConnectorModel(). * * Note that it is always ok to send MORE documents rather than less to this method. * The connector will be connected before this method can be called. *@param activities is the interface this method should use to perform whatever framework actions are desired. *@param spec is a document specification (that comes from the job). *@param seedTime is the end of the time range of documents to consider, exclusive. *@param lastSeedVersionString is the last seeding version string for this job, or null if the job has no previous seeding version string. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@return an updated seeding version string, to be stored with the job. */ @Override public String addSeedDocuments(ISeedingActivity activities, Specification spec, String lastSeedVersion, long seedTime, int jobMode) throws ManifoldCFException, ServiceInterruption { // Check the session getSession(); // Add just the root. activities.addSeedDocument("/"); return ""; } protected static final String[] attachmentDataNames = new String[] { "createdDate", "modifiedDate", "accessTokens", "denyTokens", "url", "guids" }; /** Process a set of documents. * This is the method that should cause each document to be fetched, processed, and the results either added * to the queue of documents for the current job, and/or entered into the incremental ingestion manager. * The document specification allows this class to filter what is done based on the job. * The connector will be connected before this method can be called. *@param documentIdentifiers is the set of document identifiers to process. *@param statuses are the currently-stored document versions for each document in the set of document identifiers * passed in above. *@param activities is the interface this method should use to queue up new document references * and ingest documents. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one. */ @Override public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec, IProcessActivity activities, int jobMode, boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption { // Get the forced acls. (We need this only for the case where documents have their own acls) String[] forcedAcls = getAcls(spec); SystemMetadataDescription sDesc = new SystemMetadataDescription(spec); // Look at the metadata attributes. // So that the version strings are comparable, we will put them in an array first, and sort them. String pathAttributeName = null; MatchMap matchMap = new MatchMap(); int i = 0; while (i < spec.getChildCount()) { SpecificationNode n = spec.getChild(i++); if (n.getType().equals("pathnameattribute")) pathAttributeName = n.getAttributeValue("value"); else if (n.getType().equals("pathmap")) { // Path mapping info also needs to be looked at, because it affects what is // ingested. String pathMatch = n.getAttributeValue("match"); String pathReplace = n.getAttributeValue("replace"); matchMap.appendMatchPair(pathMatch, pathReplace); } } // Calculate the part of the version string that comes from path name and mapping. // This starts with = since ; is used by another optional component (the forced acls) StringBuilder pathNameAttributeVersion = new StringBuilder(); if (pathAttributeName != null) pathNameAttributeVersion.append("=").append(pathAttributeName).append(":").append(matchMap); for (String documentIdentifier : documentIdentifiers) { // Check if we should abort activities.checkJobStillActive(); getSession(); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Getting version of '" + documentIdentifier + "'"); if (documentIdentifier.startsWith("D") || documentIdentifier.startsWith("S")) { // Old-style document identifier. We don't recognize these anymore, so signal deletion. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: Removing old-style document identifier '" + documentIdentifier + "'"); activities.deleteDocument(documentIdentifier); continue; } else if (documentIdentifier.startsWith("/")) { // New-style document identifier. A double-slash marks the separation between the library and folder/file levels. // A triple-slash marks the separation between a list name and list row ID. int dListSeparatorIndex = documentIdentifier.indexOf("///"); int dLibSeparatorIndex = documentIdentifier.indexOf("//"); if (dListSeparatorIndex != -1) { // === List-style identifier === if (dListSeparatorIndex == documentIdentifier.length() - 3) { // == List path! == if (!checkIncludeList(documentIdentifier.substring(0, documentIdentifier.length() - 3), spec)) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: List specification no longer includes list '" + documentIdentifier + "' - removing"); activities.deleteDocument(documentIdentifier); continue; } // Version string for a list String versionString = ""; // Chained connectors always scan parent nodes, so they don't bother setting a version String siteListPath = documentIdentifier.substring(0, documentIdentifier.length() - 3); int listCutoff = siteListPath.lastIndexOf("/"); String site = siteListPath.substring(0, listCutoff); String listName = siteListPath.substring(listCutoff + 1); if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Document identifier is a list: '" + siteListPath + "'"); String listID = proxy.getListID(encodePath(site), site, listName); if (listID == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: GUID lookup failed for list '" + siteListPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } String encodedSitePath = encodePath(site); // Get the list's fields Map<String, String> fieldNames = proxy.getFieldList(encodedSitePath, listID); if (fieldNames == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Field list lookup failed for list '" + siteListPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } String[] fields = new String[fieldNames.size()]; int j = 0; for (String field : fieldNames.keySet()) { fields[j++] = field; } String[] accessTokens; String[] denyTokens; if (forcedAcls == null) { // Security is off accessTokens = new String[0]; denyTokens = new String[0]; } else if (forcedAcls.length != 0) { // Forced security accessTokens = forcedAcls; denyTokens = new String[0]; } else { // Security enabled, native security accessTokens = proxy.getACLs(encodedSitePath, listID, activeDirectoryAuthority); denyTokens = new String[] { defaultAuthorityDenyToken }; } if (accessTokens == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Access token lookup failed for list '" + siteListPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } ListItemStream fs = new ListItemStream(activities, encodedServerLocation, site, siteListPath, spec, documentIdentifier, accessTokens, denyTokens, listID, fields); boolean success = proxy.getChildren(fs, encodedSitePath, listID, dspStsWorks); if (!success) { // Site/list no longer exists, so delete entry if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: No list found for list '" + siteListPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } activities.noDocument(documentIdentifier, versionString); } else { // == List item or attachment path! == // Convert the modified document path to an unmodified one, plus a library path. String decodedListPath = documentIdentifier.substring(0, dListSeparatorIndex); String itemAndAttachment = documentIdentifier.substring(dListSeparatorIndex + 2); String decodedItemPath = decodedListPath + itemAndAttachment; int cutoff = decodedListPath.lastIndexOf("/"); String sitePath = decodedListPath.substring(0, cutoff); String list = decodedListPath.substring(cutoff + 1); String encodedSitePath = encodePath(sitePath); int attachmentSeparatorIndex = itemAndAttachment.indexOf("//", 1); if (attachmentSeparatorIndex == -1) { // == List item path! == if (!checkIncludeListItem(decodedItemPath, spec)) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: List item '" + documentIdentifier + "' is no longer included - removing"); activities.deleteDocument(documentIdentifier); continue; } // This file is included, so calculate a version string. This will include metadata info, so get that first. MetadataInformation metadataInfo = getMetadataSpecification(decodedItemPath, spec); String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens"); String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens"); String[] listIDs = activities.retrieveParentData(documentIdentifier, "guids"); String[] listFields = activities.retrieveParentData(documentIdentifier, "fields"); String[] displayURLs = activities.retrieveParentData(documentIdentifier, "displayURLs"); String listID; if (listIDs.length >= 1) listID = listIDs[0]; else listID = null; String displayURL; if (displayURLs.length >= 1) displayURL = displayURLs[0]; else displayURL = null; if (listID == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Can't get version of '" + documentIdentifier + "' because list '" + decodedListPath + "' does not exist - removing"); activities.deleteDocument(documentIdentifier); continue; } String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo, listFields); // Sort access tokens so they are comparable in the version string java.util.Arrays.sort(accessTokens); java.util.Arrays.sort(denyTokens); // Next, get the actual timestamp field for the file. List<String> metadataDescription = new ArrayList<String>(); metadataDescription.add("Modified"); metadataDescription.add("Created"); metadataDescription.add("ID"); metadataDescription.add("GUID"); // The document path includes the library, with no leading slash, and is decoded. String decodedItemPathWithoutSite = decodedItemPath.substring(cutoff + 1); Map<String, String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, listID, "/Lists/" + decodedItemPathWithoutSite, dspStsWorks); String modifiedDate = values.get("Modified"); String createdDate = values.get("Created"); String id = values.get("ID"); String guid = values.get("GUID"); if (modifiedDate == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Can't get version of '" + documentIdentifier + "' because it has no modify date"); activities.deleteDocument(documentIdentifier); continue; } // Item has a modified date so we presume it exists. Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate); Date createdDateValue = DateParser.parseISO8601Date(createdDate); // Build version string String versionToken = modifiedDate; // Revamped version string on 9/21/2013 to make parseability better StringBuilder sb = new StringBuilder(); packList(sb, sortedMetadataFields, '+'); packList(sb, accessTokens, '+'); packList(sb, denyTokens, '+'); packDate(sb, modifiedDateValue); packDate(sb, createdDateValue); pack(sb, id, '+'); pack(sb, guid, '+'); pack(sb, displayURL, '+'); // The rest of this is unparseable sb.append(versionToken); sb.append(pathNameAttributeVersion); // Added 9/7/07 sb.append("_").append(fileBaseUrl); // String versionString = sb.toString(); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Complete version string for '" + documentIdentifier + "': " + versionString); // Before we index, we queue up any attachments // Now, do any queuing that is needed. if (attachmentsSupported) { String itemNumber = id; List<NameValue> attachmentNames = proxy.getAttachmentNames(sitePath, listID, itemNumber); // Now, queue up each attachment as a separate entry for (NameValue attachmentName : attachmentNames) { // For attachments, we use the carry-down feature to get the data where we need it. That's why // we unpacked the version information early above. // No check for inclusion; if the list item is included, so is this String[][] dataValues = new String[attachmentDataNames.length][]; if (createdDateValue == null) dataValues[0] = new String[0]; else dataValues[0] = new String[] { new Long(createdDateValue.getTime()).toString() }; if (modifiedDateValue == null) dataValues[1] = new String[0]; else dataValues[1] = new String[] { new Long(modifiedDateValue.getTime()).toString() }; if (accessTokens == null) dataValues[2] = new String[0]; else dataValues[2] = accessTokens; if (denyTokens == null) dataValues[3] = new String[0]; else dataValues[3] = denyTokens; dataValues[4] = new String[] { attachmentName.getPrettyName() }; dataValues[5] = new String[] { guid }; activities.addDocumentReference( documentIdentifier + "//" + attachmentName.getValue(), documentIdentifier, null, attachmentDataNames, dataValues); } } if (!activities.checkDocumentNeedsReindexing(documentIdentifier, versionString)) continue; // Convert the modified document path to an unmodified one, plus a library path. String encodedItemPath = encodePath(decodedListPath.substring(0, cutoff) + "/Lists/" + decodedItemPath.substring(cutoff + 1)); // Generate the URL we are going to use String itemUrl = serverUrl + displayURL; //fileBaseUrl + encodedItemPath; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Processing list item '" + documentIdentifier + "'; url: '" + itemUrl + "'"); // Fetch the metadata we will be indexing Map<String, String> metadataValues = null; if (sortedMetadataFields.length > 0) { metadataValues = proxy.getFieldValues(sortedMetadataFields, encodePath(sitePath), listID, "/Lists/" + decodedItemPath.substring(cutoff + 1), dspStsWorks); if (metadataValues == null) { // Item has vanished if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: Item metadata fetch failure indicated that item is gone: '" + documentIdentifier + "' - removing"); activities.recordActivity(null, ACTIVITY_FETCH, null, documentIdentifier, "NOMETADATA", "List item metadata is missing", null); activities.noDocument(documentIdentifier, versionString); continue; } } if (!activities.checkLengthIndexable(0L)) { // Document too long (should never happen; length is 0) activities.recordActivity(null, ACTIVITY_FETCH, null, documentIdentifier, activities.EXCLUDED_LENGTH, "List item excluded due to content length (0)", null); activities.noDocument(documentIdentifier, versionString); continue; } InputStream is = new ByteArrayInputStream(new byte[0]); try { RepositoryDocument data = new RepositoryDocument(); data.setBinary(is, 0L); if (modifiedDateValue != null) data.setModifiedDate(modifiedDateValue); if (createdDateValue != null) data.setCreatedDate(createdDateValue); setDataACLs(data, accessTokens, denyTokens); setPathAttribute(data, sDesc, documentIdentifier); if (metadataValues != null) { Iterator<String> iter = metadataValues.keySet().iterator(); while (iter.hasNext()) { String fieldName = iter.next(); String fieldData = metadataValues.get(fieldName); data.addField(fieldName, fieldData); } } data.addField("GUID", guid); try { activities.ingestDocumentWithException(documentIdentifier, versionString, itemUrl, data); } catch (IOException e) { handleIOException(e, "reading document"); } } finally { try { is.close(); } catch (IOException e) { handleIOException(e, "closing stream"); } } } else { // == List item attachment path! == if (!checkIncludeListItemAttachment(decodedItemPath, spec)) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: List item attachment '" + documentIdentifier + "' is no longer included - removing"); activities.deleteDocument(documentIdentifier); continue; } // To save work, we retrieve most of what we need in version info from the parent. // Retrieve modified and created dates String[] modifiedDateSet = activities.retrieveParentData(documentIdentifier, "modifiedDate"); String[] createdDateSet = activities.retrieveParentData(documentIdentifier, "createdDate"); String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens"); String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens"); String[] urlSet = activities.retrieveParentData(documentIdentifier, "url"); // Only one modifiedDate and createdDate can be used. If there's more than one, just pick one - the item will be reindexed // anyhow. String modifiedDate; if (modifiedDateSet.length >= 1) modifiedDate = modifiedDateSet[0]; else modifiedDate = null; String createdDate; if (createdDateSet.length >= 1) createdDate = createdDateSet[0]; else createdDate = null; String url; if (urlSet.length >= 1) url = urlSet[0]; else url = null; // If we have no modified or created date, it means that the parent has gone away, so we go away too. if (modifiedDate == null || url == null) { // Can't look up list ID, which means the list is gone, so delete if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Can't get version of '" + documentIdentifier + "' because modified date or attachment url not found"); activities.deleteDocument(documentIdentifier); continue; } // Item has a modified date so we presume it exists. Date modifiedDateValue; if (modifiedDate != null) modifiedDateValue = new Date(new Long(modifiedDate).longValue()); else modifiedDateValue = null; Date createdDateValue; if (createdDate != null) createdDateValue = new Date(new Long(createdDate).longValue()); else createdDateValue = null; // Build version string String versionToken = modifiedDate; StringBuilder sb = new StringBuilder(); // Pack the URL to get the data from pack(sb, url, '+'); // Do the acls. If we get this far, we are guaranteed to have them, but we need to sort. java.util.Arrays.sort(accessTokens); java.util.Arrays.sort(denyTokens); packList(sb, accessTokens, '+'); packList(sb, denyTokens, '+'); packDate(sb, modifiedDateValue); packDate(sb, createdDateValue); // The rest of this is unparseable sb.append(versionToken); sb.append(pathNameAttributeVersion); sb.append("_").append(fileBaseUrl); // String versionString = sb.toString(); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Complete version string for '" + documentIdentifier + "': " + versionString); if (!activities.checkDocumentNeedsReindexing(documentIdentifier, versionString)) continue; // We need the list ID, which we've already fetched, so grab that from the parent data. String[] guids = activities.retrieveParentData(documentIdentifier, "guids"); String guid; if (guids.length >= 1) guid = guids[0]; else guid = null; if (guid == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Skipping attachment '" + documentIdentifier + "' because no parent guid found"); activities.recordActivity(null, ACTIVITY_FETCH, null, documentIdentifier, "NOGUID", "List item attachment GUID is missing", null); activities.noDocument(documentIdentifier, versionString); continue; } int lastIndex = url.lastIndexOf("/"); guid = guid + ":" + url.substring(lastIndex + 1); // Fetch and index. This also filters documents based on output connector restrictions. String fileUrl = serverUrl + encodePath(url); String fetchUrl = fileUrl; fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, fetchUrl, accessTokens, denyTokens, createdDateValue, modifiedDateValue, null, guid, sDesc); } } } else if (dLibSeparatorIndex != -1) { // === Library-style identifier === if (dLibSeparatorIndex == documentIdentifier.length() - 2) { // Library path! if (!checkIncludeLibrary(documentIdentifier.substring(0, documentIdentifier.length() - 2), spec)) { if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Library specification no longer includes library '" + documentIdentifier + "' - removing"); activities.deleteDocument(documentIdentifier); continue; } // This is the path for the library: No versioning String versionString = ""; // Chained document parents are always rescanned String siteLibPath = documentIdentifier.substring(0, documentIdentifier.length() - 2); int libCutoff = siteLibPath.lastIndexOf("/"); String site = siteLibPath.substring(0, libCutoff); String libName = siteLibPath.substring(libCutoff + 1); if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Document identifier is a library: '" + siteLibPath + "'"); String libID = proxy.getDocLibID(encodePath(site), site, libName); if (libID == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: GUID lookup failed for library '" + siteLibPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } String encodedSitePath = encodePath(site); // Get the lib's fields Map<String, String> fieldNames = proxy.getFieldList(encodedSitePath, libID); if (fieldNames == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Field list lookup failed for library '" + siteLibPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } String[] fields = new String[fieldNames.size()]; int j = 0; for (String field : fieldNames.keySet()) { fields[j++] = field; } String[] accessTokens; String[] denyTokens; if (forcedAcls == null) { // Security is off accessTokens = new String[0]; denyTokens = new String[0]; } else if (forcedAcls.length != 0) { // Forced security accessTokens = forcedAcls; denyTokens = new String[0]; } else { // Security enabled, native security accessTokens = proxy.getACLs(encodedSitePath, libID, activeDirectoryAuthority); denyTokens = new String[] { defaultAuthorityDenyToken }; } if (accessTokens == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Access token lookup failed for library '" + siteLibPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } FileStream fs = new FileStream(activities, encodedServerLocation, site, siteLibPath, spec, documentIdentifier, accessTokens, denyTokens, libID, fields); boolean success = proxy.getChildren(fs, encodedSitePath, libID, dspStsWorks); if (!success) { // Site/library no longer exists, so delete entry if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: No list found for library '" + siteLibPath + "' - deleting"); activities.deleteDocument(documentIdentifier); continue; } activities.noDocument(documentIdentifier, versionString); } else { // == Document path == // Convert the modified document path to an unmodified one, plus a library path. String decodedLibPath = documentIdentifier.substring(0, dLibSeparatorIndex); String decodedDocumentPath = decodedLibPath + documentIdentifier.substring(dLibSeparatorIndex + 1); if (!checkIncludeFile(decodedDocumentPath, spec)) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Document '" + documentIdentifier + "' is no longer included - removing"); activities.deleteDocument(documentIdentifier); continue; } // This file is included, so calculate a version string. This will include metadata info, so get that first. MetadataInformation metadataInfo = getMetadataSpecification(decodedDocumentPath, spec); int lastIndex = decodedLibPath.lastIndexOf("/"); String sitePath = decodedLibPath.substring(0, lastIndex); String lib = decodedLibPath.substring(lastIndex + 1); // Retrieve the carry-down data we will be using. // Note well: for sharepoint versions that include document/folder acls, these access tokens will be ignored, // but they will still be carried down nonetheless, in case someone switches versions on us. String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens"); String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens"); String[] libIDs = activities.retrieveParentData(documentIdentifier, "guids"); String[] libFields = activities.retrieveParentData(documentIdentifier, "fields"); String libID; if (libIDs.length >= 1) libID = libIDs[0]; else libID = null; if (libID == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Can't get version of '" + documentIdentifier + "' because library '" + decodedLibPath + "' does not exist - removing"); activities.deleteDocument(documentIdentifier); continue; } String encodedSitePath = encodePath(sitePath); String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo, libFields); // Sort access tokens java.util.Arrays.sort(accessTokens); java.util.Arrays.sort(denyTokens); // Next, get the actual timestamp field for the file. List<String> metadataDescription = new ArrayList<String>(); metadataDescription.add("Last_x0020_Modified"); metadataDescription.add("Modified"); metadataDescription.add("Created"); metadataDescription.add("GUID"); // The document path includes the library, with no leading slash, and is decoded. int cutoff = decodedLibPath.lastIndexOf("/"); String decodedDocumentPathWithoutSite = decodedDocumentPath.substring(cutoff); Map<String, String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, libID, decodedDocumentPathWithoutSite, dspStsWorks); String modifiedDate = values.get("Modified"); String createdDate = values.get("Created"); String guid = values.get("GUID"); String modifyDate = values.get("Last_x0020_Modified"); if (modifyDate == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Can't get version of '" + documentIdentifier + "' because it has no modify date"); activities.deleteDocument(documentIdentifier); continue; } // Item has a modified date, so we presume it exists Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate); Date createdDateValue = DateParser.parseISO8601Date(createdDate); // Build version string String versionToken = modifyDate; if (supportsItemSecurity) { // Do the acls. if (forcedAcls == null) { // Security is off accessTokens = new String[0]; denyTokens = new String[0]; } else if (forcedAcls.length > 0) { // Security on, forced acls accessTokens = forcedAcls; denyTokens = new String[0]; } else { // Security on, is native accessTokens = proxy.getDocumentACLs(encodedSitePath, encodePath(decodedDocumentPath), activeDirectoryAuthority); denyTokens = new String[] { defaultAuthorityDenyToken }; } } if (accessTokens == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: Couldn't get access tokens for item '" + decodedDocumentPath + "'; removing document '" + documentIdentifier + "'"); activities.deleteDocument(documentIdentifier); continue; } // Revamped version string on 9/21/2013 to make parseability better StringBuilder sb = new StringBuilder(); packList(sb, sortedMetadataFields, '+'); packList(sb, accessTokens, '+'); packList(sb, denyTokens, '+'); packDate(sb, modifiedDateValue); packDate(sb, createdDateValue); pack(sb, guid, '+'); // The rest of this is unparseable sb.append(versionToken); sb.append(pathNameAttributeVersion); // Added 9/7/07 sb.append("_").append(fileBaseUrl); // String versionString = sb.toString(); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Complete version string for '" + documentIdentifier + "': " + versionString); if (!activities.checkDocumentNeedsReindexing(documentIdentifier, versionString)) continue; // Convert the modified document path to an unmodified one, plus a library path. String encodedDocumentPath = encodePath(decodedDocumentPath); // Parse what we need out of version string. // Generate the URL we are going to use String fileUrl = fileBaseUrl + encodedDocumentPath; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Processing file '" + documentIdentifier + "'; url: '" + fileUrl + "'"); // First, fetch the metadata we plan to index. Map<String, String> metadataValues = null; if (sortedMetadataFields.length > 0) { metadataValues = proxy.getFieldValues(sortedMetadataFields, encodePath(sitePath), libID, decodedDocumentPath.substring(cutoff), dspStsWorks); if (metadataValues == null) { // Document has vanished if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: Document metadata fetch failure indicated that document is gone: '" + documentIdentifier + "' - removing"); activities.recordActivity(null, ACTIVITY_FETCH, null, documentIdentifier, "NOMETADATA", "Document metadata is missing", null); activities.noDocument(documentIdentifier, versionString); continue; } } // Fetch and index. This also filters documents based on output connector restrictions. fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, serverUrl + encodedServerLocation + encodedDocumentPath, accessTokens, denyTokens, createdDateValue, modifiedDateValue, metadataValues, guid, sDesc); } } else { // === Site-style identifier === String sitePath = documentIdentifier.substring(0, documentIdentifier.length() - 1); if (sitePath.length() == 0) sitePath = "/"; if (!checkIncludeSite(sitePath, spec)) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site specification no longer includes site '" + documentIdentifier + "' - removing"); activities.deleteDocument(documentIdentifier); continue; } String versionString = ""; activities.noDocument(documentIdentifier, versionString); // Strip off the trailing "/" to get the site name. String decodedSitePath = documentIdentifier.substring(0, documentIdentifier.length() - 1); if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Document identifier is a site: '" + decodedSitePath + "'"); // Look at subsites List<NameValue> subsites = proxy.getSites(encodePath(decodedSitePath)); if (subsites != null) { for (NameValue subSiteName : subsites) { String newPath = decodedSitePath + "/" + subSiteName.getValue(); String encodedNewPath = encodePath(newPath); if (checkIncludeSite(newPath, spec)) activities.addDocumentReference(newPath + "/"); } } else { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: No permissions to access subsites of '" + decodedSitePath + "' - skipping"); } // Look at libraries List<NameValue> libraries = proxy.getDocumentLibraries(encodePath(decodedSitePath), decodedSitePath); if (libraries != null) { for (NameValue library : libraries) { String newPath = decodedSitePath + "/" + library.getValue(); if (checkIncludeLibrary(newPath, spec)) activities.addDocumentReference(newPath + "//"); } } else { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: No permissions to access libraries of '" + decodedSitePath + "' - skipping"); } // Look at lists List<NameValue> lists = proxy.getLists(encodePath(decodedSitePath), decodedSitePath); if (lists != null) { for (NameValue list : lists) { String newPath = decodedSitePath + "/" + list.getValue(); if (checkIncludeList(newPath, spec)) activities.addDocumentReference(newPath + "///"); } } else { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: No permissions to access lists of '" + decodedSitePath + "' - skipping"); } } } else throw new ManifoldCFException( "Invalid document identifier discovered: '" + documentIdentifier + "'"); } } protected static void packDate(StringBuilder sb, Date dateValue) { if (dateValue != null) { sb.append("+"); pack(sb, new Long(dateValue.getTime()).toString(), '+'); } else sb.append("-"); } protected static int unpackDate(String value, int index, Date theDate) { if (value.length() > index) { if (value.charAt(index++) == '+') { StringBuilder sb = new StringBuilder(); index = unpack(sb, value, index, '+'); if (sb.length() > 0) { theDate.setTime(new Long(sb.toString()).longValue()); } } } return index; } protected String[] getInterestingFieldSetSorted(MetadataInformation metadataInfo, String[] allFields) { Set<String> metadataFields = new HashSet<String>(); // Figure out the actual metadata fields we will request if (metadataInfo.getAllMetadata()) { for (String field : allFields) { metadataFields.add(field); } } else { String[] fields = metadataInfo.getMetadataFields(); for (String field : fields) { metadataFields.add(field); } } // Convert the hashtable to an array and sort it. String[] sortedMetadataFields = new String[metadataFields.size()]; int z = 0; for (String field : metadataFields) { sortedMetadataFields[z++] = field; } java.util.Arrays.sort(sortedMetadataFields); return sortedMetadataFields; } /** Method that fetches and indexes a file fetched from a SharePoint URL, with appropriate error handling * etc. */ protected void fetchAndIndexFile(IProcessActivity activities, String documentIdentifier, String version, String fileUrl, String fetchUrl, String[] accessTokens, String[] denyTokens, Date createdDate, Date modifiedDate, Map<String, String> metadataValues, String guid, SystemMetadataDescription sDesc) throws ManifoldCFException, ServiceInterruption { String errorCode = null; String errorDesc = null; long startTime = System.currentTimeMillis(); Long fileLengthLong = null; try { // Before we fetch, confirm that the output connector will accept the document if (!activities.checkURLIndexable(fileUrl)) { // URL failed errorCode = activities.EXCLUDED_URL; errorDesc = "Document rejected because of URL (" + fileUrl + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier + "' because output connector says URL '" + fileUrl + "' is not indexable"); activities.noDocument(documentIdentifier, version); return; } // Also check mime type String contentType = mapExtensionToMimeType(documentIdentifier); if (!activities.checkMimeTypeIndexable(contentType)) { // Mime type failed errorCode = activities.EXCLUDED_MIMETYPE; errorDesc = "Document rejected because of mime type (" + contentType + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier + "' because output connector says mime type '" + ((contentType == null) ? "null" : contentType) + "' is not indexable"); activities.noDocument(documentIdentifier, version); return; } // Now check date stamp if (!activities.checkDateIndexable(modifiedDate)) { // Date failed errorCode = activities.EXCLUDED_DATE; errorDesc = "Document rejected because of date (" + modifiedDate + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier + "' because output connector says date '" + ((modifiedDate == null) ? "null" : modifiedDate) + "' is not indexable"); activities.noDocument(documentIdentifier, version); return; } // Set stuff up for fetch activity logging try { // Read the document into a local temporary file, so I get a reliable length. File tempFile = File.createTempFile("__shp__", ".tmp"); try { // Open the output stream OutputStream os = new FileOutputStream(tempFile); try { // Catch all exceptions having to do with reading the document try { ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os); emt.start(); int returnCode = emt.finishUp(); if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415) { // Well, sharepoint thought the document was there, but it really isn't, so delete it. errorCode = "DOCUMENTNOTFOUND"; errorDesc = "Document not found; HTTP code " + returnCode; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: Document at '" + fileUrl + "' failed to fetch with code " + Integer.toString(returnCode) + ", deleting"); activities.noDocument(documentIdentifier, version); return; } else if (returnCode != 200) { errorCode = "UNKNOWNHTTPCODE"; errorDesc = "Unknown HTTP return code " + returnCode; throw new ManifoldCFException("Error fetching document '" + fileUrl + "': " + Integer.toString(returnCode)); } } catch (InterruptedException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (java.net.SocketTimeoutException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } catch (org.apache.http.conn.ConnectTimeoutException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } catch (InterruptedIOException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IllegalArgumentException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.error("SharePoint: Illegal argument: " + e.getMessage(), e); throw new ManifoldCFException("SharePoint: Illegal argument: " + e.getMessage(), e); } catch (org.apache.http.HttpException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: HttpException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } catch (IOException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: IOException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } } finally { os.close(); } // Ingest the document long documentLength = tempFile.length(); if (!activities.checkLengthIndexable(documentLength)) { // Document too long errorCode = activities.EXCLUDED_LENGTH; errorDesc = "Document excluded due to length (" + documentLength + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Document '" + documentIdentifier + "' was too long, according to output connector"); activities.noDocument(documentIdentifier, version); return; } InputStream is = new FileInputStream(tempFile); try { RepositoryDocument data = new RepositoryDocument(); data.setBinary(is, documentLength); data.setFileName(mapToFileName(documentIdentifier)); if (contentType != null) data.setMimeType(contentType); setDataACLs(data, accessTokens, denyTokens); setPathAttribute(data, sDesc, documentIdentifier); if (modifiedDate != null) data.setModifiedDate(modifiedDate); if (createdDate != null) data.setCreatedDate(createdDate); if (metadataValues != null) { Iterator<String> iter = metadataValues.keySet().iterator(); while (iter.hasNext()) { String fieldName = iter.next(); String fieldData = metadataValues.get(fieldName); data.addField(fieldName, fieldData); } } data.addField("GUID", guid); try { activities.ingestDocumentWithException(documentIdentifier, version, fileUrl, data); errorCode = "OK"; fileLengthLong = new Long(documentLength); } catch (IOException e) { handleIOException(e, "reading document"); } return; } finally { try { is.close(); } catch (java.net.SocketTimeoutException e) { // This is not fatal Logging.connectors.debug("SharePoint: Timeout before read could finish for '" + fileUrl + "': " + e.getMessage(), e); } catch (org.apache.http.conn.ConnectTimeoutException e) { // This is not fatal Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '" + fileUrl + "': " + e.getMessage(), e); } catch (InterruptedIOException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IOException e) { // This is not fatal Logging.connectors .debug("SharePoint: Server closed connection before read could finish for '" + fileUrl + "': " + e.getMessage(), e); } } } finally { tempFile.delete(); } } catch (java.net.SocketTimeoutException e) { throw new ManifoldCFException( "Socket timeout error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e); } catch (org.apache.http.conn.ConnectTimeoutException e) { throw new ManifoldCFException( "Connect timeout error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e); } catch (InterruptedIOException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IOException e) { throw new ManifoldCFException( "IO error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e); } } catch (ManifoldCFException e) { if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) errorCode = null; throw e; } finally { if (errorCode != null) activities.recordActivity(new Long(startTime), ACTIVITY_FETCH, fileLengthLong, documentIdentifier, errorCode, errorDesc, null); } } protected static void handleIOException(IOException e, String context) throws ManifoldCFException, ServiceInterruption { if (e instanceof java.net.SocketTimeoutException) { long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to " + context + ", retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } else if (e instanceof org.apache.http.conn.ConnectTimeoutException) { long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to " + context + ", retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } else if (e instanceof InterruptedIOException) throw new ManifoldCFException(e.getMessage(), e, ManifoldCFException.INTERRUPTED); else throw new ManifoldCFException(e.getMessage(), e); } /** Map an extension to a mime type */ protected static String mapExtensionToMimeType(String fileName) { int slashIndex = fileName.lastIndexOf("/"); if (slashIndex != -1) fileName = fileName.substring(slashIndex + 1); int dotIndex = fileName.lastIndexOf("."); if (dotIndex == -1) return null; return ExtensionMimeMap.mapToMimeType(fileName.substring(dotIndex + 1)); } /** Map document identifier to file name */ protected static String mapToFileName(String fileName) { int slashIndex = fileName.lastIndexOf("/"); if (slashIndex != -1) fileName = fileName.substring(slashIndex + 1); return fileName; } protected static void setDataACLs(RepositoryDocument data, String[] acls, String[] denyAcls) { if (acls != null) { if (Logging.connectors.isDebugEnabled()) { StringBuilder sb = new StringBuilder("SharePoint: Acls: [ "); for (String acl : acls) { sb.append(acl).append(" "); } sb.append("]"); Logging.connectors.debug(sb.toString()); } data.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT, acls); } if (denyAcls != null) { if (Logging.connectors.isDebugEnabled()) { StringBuilder sb = new StringBuilder("SharePoint: DenyAcls: [ "); for (String denyAcl : denyAcls) { sb.append(denyAcl).append(" "); } sb.append("]"); Logging.connectors.debug(sb.toString()); } data.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT, denyAcls); } } protected static void setPathAttribute(RepositoryDocument data, SystemMetadataDescription sDesc, String documentIdentifier) throws ManifoldCFException { // Add the path metadata item into the mix, if enabled String pathAttributeName = sDesc.getPathAttributeName(); if (pathAttributeName != null && pathAttributeName.length() > 0) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Path attribute name is '" + pathAttributeName + "'"); String pathString = sDesc.getPathAttributeValue(documentIdentifier); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Path attribute value is '" + pathString + "'"); data.addField(pathAttributeName, pathString); } else Logging.connectors.debug("SharePoint: Path attribute name is null"); } protected final static String[] fileStreamDataNames = new String[] { "accessTokens", "denyTokens", "guids", "fields" }; protected class FileStream implements IFileStream { protected final IProcessActivity activities; protected final Specification spec; protected final String rootPath; protected final String sitePath; protected final String siteLibPath; // For carry-down protected final String documentIdentifier; protected final String[][] dataValues; public FileStream(IProcessActivity activities, String rootPath, String sitePath, String siteLibPath, Specification spec, String documentIdentifier, String[] accessTokens, String denyTokens[], String libID, String[] fields) { this.activities = activities; this.spec = spec; this.rootPath = rootPath; this.sitePath = sitePath; this.siteLibPath = siteLibPath; this.documentIdentifier = documentIdentifier; this.dataValues = new String[fileStreamDataNames.length][]; this.dataValues[0] = accessTokens; this.dataValues[1] = denyTokens; this.dataValues[2] = new String[] { libID }; this.dataValues[3] = fields; } @Override public void addFile(String relPath, String displayURL) throws ManifoldCFException { // First, convert the relative path to a full path if (!relPath.startsWith("/")) { relPath = rootPath + sitePath + "/" + relPath; } // Now, strip away what we don't want - namely, the root path. This makes the path relative to the root. if (relPath.startsWith(rootPath)) { relPath = relPath.substring(rootPath.length()); if (checkIncludeFile(relPath, spec)) { // Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels. // The way I've chosen to do this is to use a double slash at that point, as a separator. if (relPath.startsWith(siteLibPath)) { // Split at the libpath/file boundary String modifiedPath = siteLibPath + "/" + relPath.substring(siteLibPath.length()); activities.addDocumentReference(modifiedPath, documentIdentifier, null, fileStreamDataNames, dataValues); } else { Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '" + relPath + "', but expected to see something beginning with '" + siteLibPath + "'"); } } } else { Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '" + relPath + "', but expected to see something beginning with '" + rootPath + "'"); } } } protected final static String[] listItemStreamDataNames = new String[] { "accessTokens", "denyTokens", "guids", "fields", "displayURLs" }; protected class ListItemStream implements IFileStream { protected final IProcessActivity activities; protected final Specification spec; protected final String rootPath; protected final String sitePath; protected final String siteListPath; // For carry-down protected final String documentIdentifier; protected final String[][] dataValues; public ListItemStream(IProcessActivity activities, String rootPath, String sitePath, String siteListPath, Specification spec, String documentIdentifier, String[] accessTokens, String denyTokens[], String listID, String[] fields) { this.activities = activities; this.spec = spec; this.rootPath = rootPath; this.sitePath = sitePath; this.siteListPath = siteListPath; this.documentIdentifier = documentIdentifier; this.dataValues = new String[listItemStreamDataNames.length][]; this.dataValues[0] = accessTokens; this.dataValues[1] = denyTokens; this.dataValues[2] = new String[] { listID }; this.dataValues[3] = fields; } @Override public void addFile(String relPath, String displayURL) throws ManifoldCFException { // First, convert the relative path to a full path if (!relPath.startsWith("/")) { relPath = rootPath + sitePath + "/" + relPath; } String fullPath = relPath; // Now, strip away what we don't want - namely, the root path. This makes the path relative to the root. if (relPath.startsWith(rootPath)) { relPath = relPath.substring(rootPath.length()); if (relPath.startsWith(sitePath)) { relPath = relPath.substring(sitePath.length()); // Now, strip "Lists" from relPath. If it doesn't start with /Lists/, ignore it. if (relPath.startsWith("/Lists/")) { relPath = sitePath + relPath.substring("/Lists".length()); if (checkIncludeListItem(relPath, spec)) { if (relPath.startsWith(siteListPath)) { // Since the processing for a item needs to know the list path, we need a way to signal the cutoff between list and item levels. // The way I've chosen to do this is to use a triple slash at that point, as a separator. String modifiedPath = relPath.substring(0, siteListPath.length()) + "//" + relPath.substring(siteListPath.length()); if (displayURL != null) dataValues[4] = new String[] { displayURL }; else dataValues[4] = new String[] { fullPath }; activities.addDocumentReference(modifiedPath, documentIdentifier, null, listItemStreamDataNames, dataValues); } else { Logging.connectors.warn("SharePoint: Unexpected relPath structure; site path is '" + relPath + "', but expected to see something beginning with '" + siteListPath + "'"); } } } else { Logging.connectors.warn("SharePoint: Unexpected relPath structure; rel path is '" + relPath + "', but expected to see something beginning with '/Lists/'"); } } else { Logging.connectors.warn("SharePoint: Unexpected relPath structure; site path is '" + relPath + "', but expected to see something beginning with '" + sitePath + "'"); } } else { Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '" + relPath + "', but expected to see something beginning with '" + rootPath + "'"); } } } // UI support methods. // // These support methods come in two varieties. The first bunch is involved in setting up connection configuration information. The second bunch // is involved in presenting and editing document specification information for a job. The two kinds of methods are accordingly treated differently, // in that the first bunch cannot assume that the current connector object is connected, while the second bunch can. That is why the first bunch // receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect() // method, above). /** Output the configuration header section. * This method is called in the head section of the connector's configuration page. Its purpose is to add the required tabs to the list, and to output any * javascript methods that might be needed by the configuration editing HTML. *@param threadContext is the local thread context. *@param out is the output to which any HTML should be sent. *@param parameters are the configuration parameters, as they currently exist, for this connection being configured. *@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector. */ @Override public void outputConfigurationHeader(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray) throws ManifoldCFException, IOException { tabsArray.add(Messages.getString(locale, "SharePointRepository.Server")); tabsArray.add(Messages.getString(locale, "SharePointRepository.AuthorityType")); Messages.outputResourceWithVelocity(out, locale, "editConfiguration.js", null); } /** Output the configuration body section. * This method is called in the body section of the connector's configuration page. Its purpose is to present the required form elements for editing. * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags. The name of the * form is "editconnection". *@param threadContext is the local thread context. *@param out is the output to which any HTML should be sent. *@param parameters are the configuration parameters, as they currently exist, for this connection being configured. *@param tabName is the current tab name. */ @Override public void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName) throws ManifoldCFException, IOException { Map<String, Object> velocityContext = new HashMap<String, Object>(); velocityContext.put("TabName", tabName); fillInServerTab(velocityContext, out, parameters); fillInAuthorityTypeTab(velocityContext, out, parameters); Messages.outputResourceWithVelocity(out, locale, "editConfiguration_Server.html", velocityContext); Messages.outputResourceWithVelocity(out, locale, "editConfiguration_AuthorityType.html", velocityContext); } /** Process a configuration post. * This method is called at the start of the connector's configuration page, whenever there is a possibility that form data for a connection has been * posted. Its purpose is to gather form information and modify the configuration parameters accordingly. * The name of the posted form is "editconnection". *@param threadContext is the local thread context. *@param variableContext is the set of variables available from the post, including binary file post information. *@param parameters are the configuration parameters, as they currently exist, for this connection being configured. *@return null if all is well, or a string error message if there is an error that should prevent saving of the connection (and cause a redirection to an error page). */ @Override public String processConfigurationPost(IThreadContext threadContext, IPostParameters variableContext, Locale locale, ConfigParams parameters) throws ManifoldCFException { String serverVersion = variableContext.getParameter("serverVersion"); if (serverVersion != null) parameters.setParameter(SharePointConfig.PARAM_SERVERVERSION, serverVersion); String serverProtocol = variableContext.getParameter("serverProtocol"); if (serverProtocol != null) parameters.setParameter(SharePointConfig.PARAM_SERVERPROTOCOL, serverProtocol); String serverName = variableContext.getParameter("serverName"); if (serverName != null) parameters.setParameter(SharePointConfig.PARAM_SERVERNAME, serverName); String serverPort = variableContext.getParameter("serverPort"); if (serverPort != null) parameters.setParameter(SharePointConfig.PARAM_SERVERPORT, serverPort); String serverLocation = variableContext.getParameter("serverLocation"); if (serverLocation != null) parameters.setParameter(SharePointConfig.PARAM_SERVERLOCATION, serverLocation); String userName = variableContext.getParameter("serverUserName"); if (userName != null) parameters.setParameter(SharePointConfig.PARAM_SERVERUSERNAME, userName); String password = variableContext.getParameter("serverPassword"); if (password != null) parameters.setObfuscatedParameter(SharePointConfig.PARAM_SERVERPASSWORD, variableContext.mapKeyToPassword(password)); String proxyHost = variableContext.getParameter("proxyhost"); if (proxyHost != null) parameters.setParameter(SharePointConfig.PARAM_PROXYHOST, proxyHost); String proxyPort = variableContext.getParameter("proxyport"); if (proxyPort != null) parameters.setParameter(SharePointConfig.PARAM_PROXYPORT, proxyPort); String proxyUser = variableContext.getParameter("proxyuser"); if (proxyUser != null) parameters.setParameter(SharePointConfig.PARAM_PROXYUSER, proxyUser); String proxyPassword = variableContext.getParameter("proxypassword"); if (proxyPassword != null) parameters.setObfuscatedParameter(SharePointConfig.PARAM_PROXYPASSWORD, variableContext.mapKeyToPassword(proxyPassword)); String proxyDomain = variableContext.getParameter("proxydomain"); if (proxyDomain != null) parameters.setParameter(SharePointConfig.PARAM_PROXYDOMAIN, proxyDomain); String keystoreValue = variableContext.getParameter("keystoredata"); if (keystoreValue != null) parameters.setParameter(SharePointConfig.PARAM_SERVERKEYSTORE, keystoreValue); String configOp = variableContext.getParameter("configop"); if (configOp != null) { if (configOp.equals("Delete")) { String alias = variableContext.getParameter("shpkeystorealias"); keystoreValue = parameters.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE); IKeystoreManager mgr; if (keystoreValue != null) mgr = KeystoreManagerFactory.make("", keystoreValue); else mgr = KeystoreManagerFactory.make(""); mgr.remove(alias); parameters.setParameter(SharePointConfig.PARAM_SERVERKEYSTORE, mgr.getString()); } else if (configOp.equals("Add")) { String alias = IDFactory.make(threadContext); byte[] certificateValue = variableContext.getBinaryBytes("shpcertificate"); keystoreValue = parameters.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE); IKeystoreManager mgr; if (keystoreValue != null) mgr = KeystoreManagerFactory.make("", keystoreValue); else mgr = KeystoreManagerFactory.make(""); java.io.InputStream is = new java.io.ByteArrayInputStream(certificateValue); String certError = null; try { mgr.importCertificate(alias, is); } catch (Throwable e) { certError = e.getMessage(); } finally { try { is.close(); } catch (IOException e) { // Don't report anything } } if (certError != null) { // Redirect to error page return "Illegal certificate: " + certError; } parameters.setParameter(SharePointConfig.PARAM_SERVERKEYSTORE, mgr.getString()); } } String authorityType = variableContext.getParameter("authorityType"); if (authorityType != null) parameters.setParameter(SharePointConfig.PARAM_AUTHORITYTYPE, authorityType); return null; } /** View configuration. * This method is called in the body section of the connector's view configuration page. Its purpose is to present the connection information to the user. * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags. *@param threadContext is the local thread context. *@param out is the output to which any HTML should be sent. *@param parameters are the configuration parameters, as they currently exist, for this connection being configured. */ @Override public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException { Map<String, Object> velocityContext = new HashMap<String, Object>(); fillInServerTab(velocityContext, out, parameters); fillInAuthorityTypeTab(velocityContext, out, parameters); Messages.outputResourceWithVelocity(out, locale, "viewConfiguration.html", velocityContext); } protected static void fillInAuthorityTypeTab(Map<String, Object> velocityContext, IHTTPOutput out, ConfigParams parameters) throws ManifoldCFException { // Default to Active Directory, for backwards compatibility String authorityType = parameters.getParameter(SharePointConfig.PARAM_AUTHORITYTYPE); if (authorityType == null) authorityType = "ActiveDirectory"; velocityContext.put("AUTHORITYTYPE", authorityType); } protected static void fillInServerTab(Map<String, Object> velocityContext, IHTTPOutput out, ConfigParams parameters) throws ManifoldCFException { String serverVersion = parameters.getParameter(SharePointConfig.PARAM_SERVERVERSION); if (serverVersion == null) serverVersion = "4.0"; String serverProtocol = parameters.getParameter(SharePointConfig.PARAM_SERVERPROTOCOL); if (serverProtocol == null) serverProtocol = "http"; String serverName = parameters.getParameter(SharePointConfig.PARAM_SERVERNAME); if (serverName == null) serverName = "localhost"; String serverPort = parameters.getParameter(SharePointConfig.PARAM_SERVERPORT); if (serverPort == null) serverPort = ""; String serverLocation = parameters.getParameter(SharePointConfig.PARAM_SERVERLOCATION); if (serverLocation == null) serverLocation = ""; String userName = parameters.getParameter(SharePointConfig.PARAM_SERVERUSERNAME); if (userName == null) userName = ""; String password = parameters.getObfuscatedParameter(SharePointConfig.PARAM_SERVERPASSWORD); if (password == null) password = ""; else password = out.mapPasswordToKey(password); String keystore = parameters.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE); IKeystoreManager localKeystore; if (keystore == null) localKeystore = KeystoreManagerFactory.make(""); else localKeystore = KeystoreManagerFactory.make("", keystore); List<Map<String, String>> certificates = new ArrayList<Map<String, String>>(); String[] contents = localKeystore.getContents(); for (String alias : contents) { String description = localKeystore.getDescription(alias); if (description.length() > 128) description = description.substring(0, 125) + "..."; Map<String, String> certificate = new HashMap<String, String>(); certificate.put("ALIAS", alias); certificate.put("DESCRIPTION", description); certificates.add(certificate); } String proxyHost = parameters.getParameter(SharePointConfig.PARAM_PROXYHOST); if (proxyHost == null) proxyHost = ""; String proxyPort = parameters.getParameter(SharePointConfig.PARAM_PROXYPORT); if (proxyPort == null) proxyPort = ""; String proxyUser = parameters.getParameter(SharePointConfig.PARAM_PROXYUSER); if (proxyUser == null) proxyUser = ""; String proxyPassword = parameters.getParameter(SharePointConfig.PARAM_PROXYPASSWORD); if (proxyPassword == null) proxyPassword = ""; else proxyPassword = out.mapPasswordToKey(proxyPassword); String proxyDomain = parameters.getParameter(SharePointConfig.PARAM_PROXYDOMAIN); if (proxyDomain == null) proxyDomain = ""; // Fill in context velocityContext.put("SERVERVERSION", serverVersion); velocityContext.put("SERVERPROTOCOL", serverProtocol); velocityContext.put("SERVERNAME", serverName); velocityContext.put("SERVERPORT", serverPort); velocityContext.put("SERVERLOCATION", serverLocation); velocityContext.put("SERVERUSERNAME", userName); velocityContext.put("SERVERPASSWORD", password); if (keystore != null) velocityContext.put("KEYSTORE", keystore); velocityContext.put("CERTIFICATELIST", certificates); velocityContext.put("PROXYHOST", proxyHost); velocityContext.put("PROXYPORT", proxyPort); velocityContext.put("PROXYUSER", proxyUser); velocityContext.put("PROXYPASSWORD", proxyPassword); velocityContext.put("PROXYDOMAIN", proxyDomain); } /** Output the specification header section. * This method is called in the head section of a job page which has selected a repository connection of the * current type. Its purpose is to add the required tabs to the list, and to output any javascript methods * that might be needed by the job editing HTML. * The connector will be connected before this method can be called. *@param out is the output to which any HTML should be sent. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. *@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector. */ @Override public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException { tabsArray.add(Messages.getString(locale, "SharePointRepository.Paths")); tabsArray.add(Messages.getString(locale, "SharePointRepository.Security")); tabsArray.add(Messages.getString(locale, "SharePointRepository.Metadata")); Map<String, Object> velocityContext = new HashMap<String, Object>(); velocityContext.put("SeqNum", Integer.toString(connectionSequenceNumber)); Messages.outputResourceWithVelocity(out, locale, "editSpecification.js", velocityContext); } /** Output the specification body section. * This method is called in the body section of a job page which has selected a repository connection of the * current type. Its purpose is to present the required form elements for editing. * The coder can presume that the HTML that is output from this configuration will be within appropriate * <html>, <body>, and <form> tags. The name of the form is always "editjob". * The connector will be connected before this method can be called. *@param out is the output to which any HTML should be sent. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. *@param actualSequenceNumber is the connection within the job that has currently been selected. *@param tabName is the current tab name. (actualSequenceNumber, tabName) form a unique tuple within * the job. */ @Override public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber, int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException { Map<String, Object> velocityContext = new HashMap<String, Object>(); velocityContext.put("TabName", tabName); velocityContext.put("SeqNum", Integer.toString(connectionSequenceNumber)); velocityContext.put("SelectedNum", Integer.toString(actualSequenceNumber)); fillInSecurityTab(velocityContext, out, ds); fillInPathsTab(velocityContext, out, ds); fillInMetadataTab(velocityContext, out, ds); // Now, do the part of the tabs that requires context logic if (tabName.equals(Messages.getString(locale, "SharePointRepository.Paths"))) fillInTransientPathsInfo(velocityContext, connectionSequenceNumber); else if (tabName.equals(Messages.getString(locale, "SharePointRepository.Metadata"))) fillInTransientMetadataInfo(velocityContext, connectionSequenceNumber); Messages.outputResourceWithVelocity(out, locale, "editSpecification_Security.html", velocityContext); Messages.outputResourceWithVelocity(out, locale, "editSpecification_Paths.html", velocityContext); Messages.outputResourceWithVelocity(out, locale, "editSpecification_Metadata.html", velocityContext); } /** Fill in metadata tab */ protected static void fillInMetadataTab(Map<String, Object> velocityContext, IHTTPOutput out, Specification ds) { // Find the path-value metadata attribute name String pathNameAttribute = ""; MatchMap matchMap = new MatchMap(); List<Map<String, Object>> metadataRules = new ArrayList<Map<String, Object>>(); for (int i = 0; i < ds.getChildCount(); i++) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("pathnameattribute")) { pathNameAttribute = sn.getAttributeValue("value"); } else if (sn.getType().equals("pathmap")) { String pathMatch = sn.getAttributeValue("match"); String pathReplace = sn.getAttributeValue("replace"); matchMap.appendMatchPair(pathMatch, pathReplace); } else if (sn.getType().equals("startpoint")) { String site = sn.getAttributeValue("site"); String lib = sn.getAttributeValue("lib"); String path = site + "/" + lib + "/*"; String allmetadata = sn.getAttributeValue("allmetadata"); StringBuilder metadataFieldList = new StringBuilder(); List<String> metadataFieldArray = new ArrayList<String>(); if (allmetadata == null || !allmetadata.equals("true")) { for (int j = 0; j < sn.getChildCount(); j++) { SpecificationNode node = sn.getChild(j); if (node.getType().equals("metafield")) { if (metadataFieldList.length() > 0) metadataFieldList.append(", "); String val = node.getAttributeValue("value"); metadataFieldList.append(val); metadataFieldArray.add(val); } } allmetadata = "false"; } if (allmetadata.equals("true") || metadataFieldList.length() > 0) { Map<String, Object> item = new HashMap<String, Object>(); item.put("THEPATH", path); item.put("THEACTION", "include"); item.put("ALLFLAG", allmetadata); item.put("FIELDLIST", metadataFieldArray); item.put("FIELDS", metadataFieldList.toString()); metadataRules.add(item); } } else if (sn.getType().equals("metadatarule")) { String path = sn.getAttributeValue("match"); String action = sn.getAttributeValue("action"); String allmetadata = sn.getAttributeValue("allmetadata"); StringBuilder metadataFieldList = new StringBuilder(); List<String> metadataFieldArray = new ArrayList<String>(); if (action.equals("include")) { if (allmetadata == null || !allmetadata.equals("true")) { for (int j = 0; j < sn.getChildCount(); j++) { SpecificationNode node = sn.getChild(j); if (node.getType().equals("metafield")) { String val = node.getAttributeValue("value"); if (metadataFieldList.length() > 0) metadataFieldList.append(", "); metadataFieldList.append(val); metadataFieldArray.add(val); } } allmetadata = "false"; } } else allmetadata = ""; Map<String, Object> item = new HashMap<String, Object>(); item.put("THEPATH", path); item.put("THEACTION", action); item.put("ALLFLAG", allmetadata); item.put("FIELDLIST", metadataFieldArray); item.put("FIELDS", metadataFieldList.toString()); metadataRules.add(item); } } List<Map<String, String>> mapList = new ArrayList<Map<String, String>>(); for (int i = 0; i < matchMap.getMatchCount(); i++) { String matchString = matchMap.getMatchString(i); String replaceString = matchMap.getReplaceString(i); Map<String, String> item = new HashMap<String, String>(); item.put("MATCH", matchString); item.put("REPLACE", replaceString); mapList.add(item); } velocityContext.put("PATHNAMEATTRIBUTE", pathNameAttribute); velocityContext.put("MAPLIST", mapList); velocityContext.put("METADATARULES", metadataRules); } /** Fill in transient metadata info */ protected void fillInTransientMetadataInfo(Map<String, Object> velocityContext, int connectionSequenceNumber) { String seqPrefix = "s" + connectionSequenceNumber + "_"; // The following variables may be in the thread context because postspec.jsp put them there: // (1) "metapath", which contains the rule path as it currently stands; // (2) "metapathstate", which describes what the current path represents. Values are "unknown", "site", "library". // (3) "metapathlibrary" is the library or list path (if this is known yet). // Once the widget is in the state "unknown", it can only be reset, and cannot be further modified String metaPathSoFar = (String) currentContext.get(seqPrefix + "metapath"); String metaPathState = (String) currentContext.get(seqPrefix + "metapathstate"); String metaPathLibrary = (String) currentContext.get(seqPrefix + "metapathlibrary"); if (metaPathState == null) metaPathState = "unknown"; if (metaPathSoFar == null) { metaPathSoFar = "/"; metaPathState = "site"; } String message = null; List<NameValue> fieldList = null; if (metaPathLibrary != null) { // Look up metadata fields int index = metaPathLibrary.lastIndexOf("/"); String site = metaPathLibrary.substring(0, index); String libOrList = metaPathLibrary.substring(index + 1); Map<String, String> metaFieldList = null; try { if (metaPathState.equals("library") || metaPathState.equals("file")) metaFieldList = getLibFieldList(site, libOrList); else if (metaPathState.equals("list")) metaFieldList = getListFieldList(site, libOrList); } catch (ManifoldCFException e) { e.printStackTrace(); message = e.getMessage(); } catch (ServiceInterruption e) { message = "SharePoint unavailable: " + e.getMessage(); } if (metaFieldList != null) { String[] fields = new String[metaFieldList.size()]; int j = 0; Iterator<String> iter = metaFieldList.keySet().iterator(); while (iter.hasNext()) { fields[j++] = iter.next(); } java.util.Arrays.sort(fields); fieldList = new ArrayList<NameValue>(); for (String field : fields) { fieldList.add(new NameValue(field, metaFieldList.get(field))); } } } // Grab next site list and lib list List<NameValue> childSiteList = null; List<NameValue> childLibList = null; List<NameValue> childListList = null; if (message == null && metaPathState.equals("site")) { try { String queryPath = metaPathSoFar; if (queryPath.equals("/")) queryPath = ""; childSiteList = getSites(queryPath); if (childSiteList == null) { if (queryPath.length() == 0) throw new ManifoldCFException("Root site is unreachable, or user has no permissions"); // Illegal path - state becomes "unknown". metaPathState = "unknown"; metaPathLibrary = null; } childLibList = getDocLibsBySite(queryPath); if (childLibList == null) { // Illegal path - state becomes "unknown" if (queryPath.length() == 0) throw new ManifoldCFException("Root site is unreachable, or user has no permissions"); metaPathState = "unknown"; metaPathLibrary = null; } childListList = getListsBySite(queryPath); if (childListList == null) { // Illegal path - state becomes "unknown" if (queryPath.length() == 0) throw new ManifoldCFException("Root site is unreachable, or user has no permissions"); metaPathState = "unknown"; metaPathLibrary = null; } } catch (ManifoldCFException e) { Logging.connectors.warn(e.getMessage(), e); message = e.getMessage(); } catch (ServiceInterruption e) { message = "SharePoint unavailable: " + e.getMessage(); } } if (metaPathSoFar != null) velocityContext.put("METAPATHSOFAR", metaPathSoFar); if (metaPathState != null) velocityContext.put("METAPATHSTATE", metaPathState); if (metaPathLibrary != null) velocityContext.put("METAPATHLIBRARY", metaPathLibrary); if (message != null) velocityContext.put("METAMESSAGE", message); if (fieldList != null) velocityContext.put("METAFIELDLIST", fieldList); if (childSiteList != null) velocityContext.put("METACHILDSITELIST", childSiteList); if (childLibList != null) velocityContext.put("METACHILDLIBLIST", childLibList); if (childListList != null) velocityContext.put("METACHILDLISTLIST", childListList); } /** Fill in paths tab */ protected static void fillInPathsTab(Map<String, Object> velocityContext, IHTTPOutput out, Specification ds) { List<Map<String, String>> rules = new ArrayList<Map<String, String>>(); for (int i = 0; i < ds.getChildCount(); i++) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("startpoint")) { String site = sn.getAttributeValue("site"); String lib = sn.getAttributeValue("lib"); String siteLib = site + "/" + lib + "/"; // Go through all the file/folder rules for the startpoint, and generate new "rules" corresponding to each. for (int j = 0; j < sn.getChildCount(); j++) { SpecificationNode node = sn.getChild(j); if (node.getType().equals("include") || node.getType().equals("exclude")) { String matchPart = node.getAttributeValue("match"); String ruleType = node.getAttributeValue("type"); String theFlavor = node.getType(); String thePath = siteLib + matchPart; Map<String, String> item = new HashMap<String, String>(); item.put("THEPATH", thePath); item.put("THETYPE", "file"); item.put("THEACTION", theFlavor); rules.add(item); if (ruleType.equals("file") && !matchPart.startsWith("*")) { thePath = siteLib + "*/" + matchPart; item = new HashMap<String, String>(); item.put("THEPATH", thePath); item.put("THETYPE", "file"); item.put("THEACTION", theFlavor); rules.add(item); } } } } else if (sn.getType().equals("pathrule")) { String match = sn.getAttributeValue("match"); String type = sn.getAttributeValue("type"); String action = sn.getAttributeValue("action"); Map<String, String> item = new HashMap<String, String>(); item.put("THEPATH", match); item.put("THETYPE", type); item.put("THEACTION", action); rules.add(item); } } velocityContext.put("RULES", rules); } /** Fill in the transient portion of the Paths tab */ protected void fillInTransientPathsInfo(Map<String, Object> velocityContext, int connectionSequenceNumber) { String seqPrefix = "s" + connectionSequenceNumber + "_"; // The following variables may be in the thread context because postspec.jsp put them there: // (1) "specpath", which contains the rule path as it currently stands; // (2) "specpathstate", which describes what the current path represents. Values are "unknown", "site", "library", "list". // Once the widget is in the state "unknown", it can only be reset, and cannot be further modified // specsitepath may be in the thread context, put there by postspec.jsp String pathSoFar = (String) currentContext.get(seqPrefix + "specpath"); String pathState = (String) currentContext.get(seqPrefix + "specpathstate"); String pathLibrary = (String) currentContext.get(seqPrefix + "specpathlibrary"); if (pathState == null) { pathState = "unknown"; pathLibrary = null; } if (pathSoFar == null) { pathSoFar = "/"; pathState = "site"; pathLibrary = null; } // Grab next site list and lib list List<NameValue> childSiteList = null; List<NameValue> childLibList = null; List<NameValue> childListList = null; String message = null; if (pathState.equals("site")) { try { String queryPath = pathSoFar; if (queryPath.equals("/")) queryPath = ""; childSiteList = getSites(queryPath); if (childSiteList == null) { // Illegal path - state becomes "unknown". if (queryPath.length() == 0) throw new ManifoldCFException("Root site is unreachable, or user has no permissions"); pathState = "unknown"; pathLibrary = null; } childLibList = getDocLibsBySite(queryPath); if (childLibList == null) { // Illegal path - state becomes "unknown" if (queryPath.length() == 0) throw new ManifoldCFException("Root site is unreachable, or user has no permissions"); pathState = "unknown"; pathLibrary = null; } childListList = getListsBySite(queryPath); if (childListList == null) { // Illegal path - state becomes "unknown" if (queryPath.length() == 0) throw new ManifoldCFException("Root site is unreachable, or user has no permissions"); pathState = "unknown"; pathLibrary = null; } } catch (ManifoldCFException e) { Logging.connectors.warn(e.getMessage(), e); message = e.getMessage(); } catch (ServiceInterruption e) { message = "SharePoint unavailable: " + e.getMessage(); } } if (pathSoFar != null) velocityContext.put("PATHSOFAR", pathSoFar); if (pathState != null) velocityContext.put("PATHSTATE", pathState); if (pathLibrary != null) velocityContext.put("PATHLIBRARY", pathLibrary); if (message != null) velocityContext.put("MESSAGE", message); if (childSiteList != null) velocityContext.put("CHILDSITELIST", childSiteList); if (childLibList != null) velocityContext.put("CHILDLIBLIST", childLibList); if (childListList != null) velocityContext.put("CHILDLISTLIST", childListList); } /** Fill in security tab */ protected static void fillInSecurityTab(Map<String, Object> velocityContext, IHTTPOutput out, Specification ds) { // Security tab String security = "on"; List<String> accessTokens = new ArrayList<String>(); for (int i = 0; i < ds.getChildCount(); i++) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("security")) { security = sn.getAttributeValue("value"); } else if (sn.getType().equals("access")) { String token = sn.getAttributeValue("token"); accessTokens.add(token); } } velocityContext.put("SECURITY", security); velocityContext.put("ACCESSTOKENS", accessTokens); } /** Process a specification post. * This method is called at the start of job's edit or view page, whenever there is a possibility that form * data for a connection has been posted. Its purpose is to gather form information and modify the * document specification accordingly. The name of the posted form is always "editjob". * The connector will be connected before this method can be called. *@param variableContext contains the post data, including binary file-upload information. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. *@return null if all is well, or a string error message if there is an error that should prevent saving of * the job (and cause a redirection to an error page). */ @Override public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification ds, int connectionSequenceNumber) throws ManifoldCFException { String seqPrefix = "s" + connectionSequenceNumber + "_"; // Remove old-style rules, but only if the information would not be lost if (variableContext.getParameter(seqPrefix + "specpathcount") != null && variableContext.getParameter(seqPrefix + "metapathcount") != null) { int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("startpoint")) ds.removeChild(i); else i++; } } String x = variableContext.getParameter(seqPrefix + "specpathcount"); if (x != null) { // Delete all path rule entries first int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("pathrule")) ds.removeChild(i); else i++; } // Find out how many children were sent int pathCount = Integer.parseInt(x); // Gather up these i = 0; while (i < pathCount) { String pathDescription = "_" + Integer.toString(i); String pathOpName = seqPrefix + "specop" + pathDescription; x = variableContext.getParameter(pathOpName); if (x != null && x.equals("Delete")) { // Skip to the next i++; continue; } // Get the stored information for this rule. String path = variableContext.getParameter(seqPrefix + "specpath" + pathDescription); String type = variableContext.getParameter(seqPrefix + "spectype" + pathDescription); String action = variableContext.getParameter(seqPrefix + "specflav" + pathDescription); SpecificationNode node = new SpecificationNode("pathrule"); node.setAttribute("match", path); node.setAttribute("action", action); node.setAttribute("type", type); // If there was an insert operation, do it now if (x != null && x.equals("Insert Here")) { // The global parameters are what are used to create the rule path = variableContext.getParameter(seqPrefix + "specpath"); type = variableContext.getParameter(seqPrefix + "spectype"); action = variableContext.getParameter(seqPrefix + "specflavor"); SpecificationNode sn = new SpecificationNode("pathrule"); sn.setAttribute("match", path); sn.setAttribute("action", action); sn.setAttribute("type", type); ds.addChild(ds.getChildCount(), sn); } ds.addChild(ds.getChildCount(), node); i++; } // See if there's a global path rule operation String op = variableContext.getParameter(seqPrefix + "specop"); if (op != null) { if (op.equals("Add")) { String match = variableContext.getParameter(seqPrefix + "specpath"); String action = variableContext.getParameter(seqPrefix + "specflavor"); String type = variableContext.getParameter(seqPrefix + "spectype"); SpecificationNode node = new SpecificationNode("pathrule"); node.setAttribute("match", match); node.setAttribute("action", action); node.setAttribute("type", type); ds.addChild(ds.getChildCount(), node); } } // See if there's a global pathbuilder operation String pathop = variableContext.getParameter(seqPrefix + "specpathop"); if (pathop != null) { if (pathop.equals("Reset")) { currentContext.save(seqPrefix + "specpath", "/"); currentContext.save(seqPrefix + "specpathstate", "site"); currentContext.save(seqPrefix + "specpathlibrary", null); } else if (pathop.equals("AppendSite")) { String path = variableContext.getParameter(seqPrefix + "specpath"); String addon = variableContext.getParameter(seqPrefix + "specsite"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; } currentContext.save(seqPrefix + "specpath", path); currentContext.save(seqPrefix + "specpathstate", "site"); currentContext.save(seqPrefix + "specpathlibrary", null); } else if (pathop.equals("AppendLibrary")) { String path = variableContext.getParameter(seqPrefix + "specpath"); String addon = variableContext.getParameter(seqPrefix + "speclibrary"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; currentContext.save(seqPrefix + "specpathstate", "library"); currentContext.save(seqPrefix + "specpathlibrary", path); } currentContext.save(seqPrefix + "specpath", path); } else if (pathop.equals("AppendList")) { String path = variableContext.getParameter(seqPrefix + "specpath"); String addon = variableContext.getParameter(seqPrefix + "speclist"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; currentContext.save(seqPrefix + "specpathstate", "list"); currentContext.save(seqPrefix + "specpathlibrary", path); } currentContext.save(seqPrefix + "specpath", path); } else if (pathop.equals("AppendText")) { String path = variableContext.getParameter(seqPrefix + "specpath"); String library = variableContext.getParameter(seqPrefix + "specpathlibrary"); String addon = variableContext.getParameter(seqPrefix + "specmatch"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; currentContext.save(seqPrefix + "specpathstate", "unknown"); } currentContext.save(seqPrefix + "specpath", path); currentContext.save(seqPrefix + "specpathlibrary", library); } else if (pathop.equals("Remove")) { // Strip off end String path = variableContext.getParameter(seqPrefix + "specpath"); int index = path.lastIndexOf("/"); path = path.substring(0, index); if (path.length() == 0) path = "/"; currentContext.save(seqPrefix + "specpath", path); // Now, adjust state. String pathState = variableContext.getParameter(seqPrefix + "specpathstate"); if (pathState.equals("library") || pathState.equals("list")) pathState = "site"; currentContext.save(seqPrefix + "specpathstate", pathState); } } } x = variableContext.getParameter(seqPrefix + "metapathcount"); if (x != null) { // Delete all metadata rule entries first int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("metadatarule")) ds.removeChild(i); else i++; } // Find out how many children were sent int pathCount = Integer.parseInt(x); // Gather up these i = 0; while (i < pathCount) { String pathDescription = "_" + Integer.toString(i); String pathOpName = seqPrefix + "metaop" + pathDescription; x = variableContext.getParameter(pathOpName); if (x != null && x.equals("Delete")) { // Skip to the next i++; continue; } // Get the stored information for this rule. String path = variableContext.getParameter(seqPrefix + "metapath" + pathDescription); String action = variableContext.getParameter(seqPrefix + "metaflav" + pathDescription); String allmetadata = variableContext.getParameter(seqPrefix + "metaall" + pathDescription); String[] metadataFields = variableContext .getParameterValues(seqPrefix + "metafields" + pathDescription); SpecificationNode node = new SpecificationNode("metadatarule"); node.setAttribute("match", path); node.setAttribute("action", action); if (action.equals("include")) { if (allmetadata != null) node.setAttribute("allmetadata", allmetadata); if (metadataFields != null) { int j = 0; while (j < metadataFields.length) { SpecificationNode sn = new SpecificationNode("metafield"); sn.setAttribute("value", metadataFields[j]); node.addChild(j++, sn); } } } if (x != null && x.equals("Insert Here")) { // Insert the new global rule information now path = variableContext.getParameter(seqPrefix + "metapath"); action = variableContext.getParameter(seqPrefix + "metaflavor"); allmetadata = variableContext.getParameter(seqPrefix + "metaall"); metadataFields = variableContext.getParameterValues(seqPrefix + "metafields"); SpecificationNode sn = new SpecificationNode("metadatarule"); sn.setAttribute("match", path); sn.setAttribute("action", action); if (action.equals("include")) { if (allmetadata != null) node.setAttribute("allmetadata", allmetadata); if (metadataFields != null) { int j = 0; while (j < metadataFields.length) { SpecificationNode node2 = new SpecificationNode("metafield"); node2.setAttribute("value", metadataFields[j]); sn.addChild(j++, node2); } } } ds.addChild(ds.getChildCount(), sn); } ds.addChild(ds.getChildCount(), node); i++; } // See if there's a global path rule operation String op = variableContext.getParameter(seqPrefix + "metaop"); if (op != null) { if (op.equals("Add")) { String match = variableContext.getParameter(seqPrefix + "metapath"); String action = variableContext.getParameter(seqPrefix + "metaflavor"); SpecificationNode node = new SpecificationNode("metadatarule"); node.setAttribute("match", match); node.setAttribute("action", action); if (action.equals("include")) { String allmetadata = variableContext.getParameter(seqPrefix + "metaall"); String[] metadataFields = variableContext.getParameterValues(seqPrefix + "metafields"); if (allmetadata != null) node.setAttribute("allmetadata", allmetadata); if (metadataFields != null) { int j = 0; while (j < metadataFields.length) { SpecificationNode sn = new SpecificationNode("metafield"); sn.setAttribute("value", metadataFields[j]); node.addChild(j++, sn); } } } ds.addChild(ds.getChildCount(), node); } } // See if there's a global pathbuilder operation String pathop = variableContext.getParameter(seqPrefix + "metapathop"); if (pathop != null) { if (pathop.equals("Reset")) { currentContext.save(seqPrefix + "metapath", "/"); currentContext.save(seqPrefix + "metapathstate", "site"); currentContext.save(seqPrefix + "metapathlibrary", null); } else if (pathop.equals("AppendSite")) { String path = variableContext.getParameter(seqPrefix + "metapath"); String addon = variableContext.getParameter(seqPrefix + "metasite"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; } currentContext.save(seqPrefix + "metapath", path); currentContext.save(seqPrefix + "metapathstate", "site"); currentContext.save(seqPrefix + "metapathlibrary", null); } else if (pathop.equals("AppendLibrary")) { String path = variableContext.getParameter(seqPrefix + "metapath"); String addon = variableContext.getParameter(seqPrefix + "metalibrary"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; currentContext.save(seqPrefix + "metapathstate", "library"); currentContext.save(seqPrefix + "metapathlibrary", path); } currentContext.save(seqPrefix + "metapath", path); } else if (pathop.equals("AppendList")) { String path = variableContext.getParameter(seqPrefix + "metapath"); String addon = variableContext.getParameter(seqPrefix + "metalist"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; currentContext.save(seqPrefix + "metapathstate", "list"); currentContext.save(seqPrefix + "metapathlibrary", path); // Automatically add on wildcard for list item part of the match path += "/*"; } currentContext.save(seqPrefix + "metapath", path); } else if (pathop.equals("AppendText")) { String path = variableContext.getParameter(seqPrefix + "metapath"); String library = variableContext.getParameter(seqPrefix + "metapathlibrary"); String addon = variableContext.getParameter(seqPrefix + "metamatch"); if (addon != null && addon.length() > 0) { if (path.equals("/")) path = path + addon; else path = path + "/" + addon; if (library != null) currentContext.save(seqPrefix + "metapathstate", "file"); else currentContext.save(seqPrefix + "metapathstate", "unknown"); } currentContext.save(seqPrefix + "metapath", path); currentContext.save(seqPrefix + "metapathlibrary", library); } else if (pathop.equals("Remove")) { String pathState = variableContext.getParameter(seqPrefix + "metapathstate"); String path; if (pathState.equals("file")) { pathState = "library"; path = variableContext.getParameter(seqPrefix + "metapathlibrary"); } else if (pathState.equals("list") || pathState.equals("library")) { pathState = "site"; path = variableContext.getParameter(seqPrefix + "metapathlibrary"); int index = path.lastIndexOf("/"); path = path.substring(0, index); if (path.length() == 0) path = "/"; currentContext.save(seqPrefix + "metapathlibrary", null); } else { path = variableContext.getParameter(seqPrefix + "metapath"); int index = path.lastIndexOf("/"); path = path.substring(0, index); if (path.length() == 0) path = "/"; } currentContext.save(seqPrefix + "metapathstate", pathState); currentContext.save(seqPrefix + "metapath", path); } } } String xc = variableContext.getParameter(seqPrefix + "specsecurity"); if (xc != null) { // Delete all security entries first int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("security")) ds.removeChild(i); else i++; } SpecificationNode node = new SpecificationNode("security"); node.setAttribute("value", xc); ds.addChild(ds.getChildCount(), node); } xc = variableContext.getParameter(seqPrefix + "tokencount"); if (xc != null) { // Delete all file specs first int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("access")) ds.removeChild(i); else i++; } int accessCount = Integer.parseInt(xc); i = 0; while (i < accessCount) { String accessDescription = "_" + Integer.toString(i); String accessOpName = seqPrefix + "accessop" + accessDescription; xc = variableContext.getParameter(accessOpName); if (xc != null && xc.equals("Delete")) { // Next row i++; continue; } // Get the stuff we need String accessSpec = variableContext.getParameter(seqPrefix + "spectoken" + accessDescription); SpecificationNode node = new SpecificationNode("access"); node.setAttribute("token", accessSpec); ds.addChild(ds.getChildCount(), node); i++; } String op = variableContext.getParameter(seqPrefix + "accessop"); if (op != null && op.equals("Add")) { String accessspec = variableContext.getParameter(seqPrefix + "spectoken"); SpecificationNode node = new SpecificationNode("access"); node.setAttribute("token", accessspec); ds.addChild(ds.getChildCount(), node); } } xc = variableContext.getParameter(seqPrefix + "specpathnameattribute"); if (xc != null) { // Delete old one int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("pathnameattribute")) ds.removeChild(i); else i++; } if (xc.length() > 0) { SpecificationNode node = new SpecificationNode("pathnameattribute"); node.setAttribute("value", xc); ds.addChild(ds.getChildCount(), node); } } xc = variableContext.getParameter(seqPrefix + "specmappingcount"); if (xc != null) { // Delete old spec int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals("pathmap")) ds.removeChild(i); else i++; } // Now, go through the data and assemble a new list. int mappingCount = Integer.parseInt(xc); // Gather up these i = 0; while (i < mappingCount) { String pathDescription = "_" + Integer.toString(i); String pathOpName = seqPrefix + "specmappingop" + pathDescription; xc = variableContext.getParameter(pathOpName); if (xc != null && xc.equals("Delete")) { // Skip to the next i++; continue; } // Inserts won't happen until the very end String match = variableContext.getParameter(seqPrefix + "specmatch" + pathDescription); String replace = variableContext.getParameter(seqPrefix + "specreplace" + pathDescription); SpecificationNode node = new SpecificationNode("pathmap"); node.setAttribute("match", match); node.setAttribute("replace", replace); ds.addChild(ds.getChildCount(), node); i++; } // Check for add xc = variableContext.getParameter(seqPrefix + "specmappingop"); if (xc != null && xc.equals("Add")) { String match = variableContext.getParameter(seqPrefix + "specmatch"); String replace = variableContext.getParameter(seqPrefix + "specreplace"); SpecificationNode node = new SpecificationNode(seqPrefix + "pathmap"); node.setAttribute("match", match); node.setAttribute("replace", replace); ds.addChild(ds.getChildCount(), node); } } return null; } /** View specification. * This method is called in the body section of a job's view page. Its purpose is to present the document * specification information to the user. The coder can presume that the HTML that is output from * this configuration will be within appropriate <html> and <body> tags. * The connector will be connected before this method can be called. *@param out is the output to which any HTML should be sent. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. */ @Override public void viewSpecification(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber) throws ManifoldCFException, IOException { Map<String, Object> velocityContext = new HashMap<String, Object>(); velocityContext.put("SeqNum", Integer.toString(connectionSequenceNumber)); fillInSecurityTab(velocityContext, out, ds); fillInPathsTab(velocityContext, out, ds); fillInMetadataTab(velocityContext, out, ds); Messages.outputResourceWithVelocity(out, locale, "viewSpecification.html", velocityContext); } protected static class ExecuteMethodThread extends Thread { protected final HttpClient httpClient; protected final String url; protected final OutputStream os; protected Throwable exception = null; protected int returnCode = 0; public ExecuteMethodThread(HttpClient httpClient, String url, OutputStream os) { super(); setDaemon(true); this.httpClient = httpClient; this.url = url; this.os = os; } public void run() { try { HttpGet method = new HttpGet(url); // Try block to insure that the connection gets cleaned up try { // Begin the fetch HttpResponse response = httpClient.execute(method); returnCode = response.getStatusLine().getStatusCode(); if (returnCode == 200) { // Process the data HttpEntity entity = response.getEntity(); if (entity != null) { InputStream is = entity.getContent(); // Figure out what to do with the data. byte[] transferBuffer = new byte[65536]; while (true) { int amt = is.read(transferBuffer); if (amt == -1) break; os.write(transferBuffer, 0, amt); } } } } finally { // Consumes and closes the stream, releasing the connection method.abort(); } } catch (Throwable e) { this.exception = e; } } public int finishUp() throws InterruptedException, IOException, org.apache.http.HttpException { join(); if (exception != null) { if (exception instanceof IOException) throw (IOException) exception; else if (exception instanceof Error) throw (Error) exception; else if (exception instanceof org.apache.http.HttpException) throw (org.apache.http.HttpException) exception; else if (exception instanceof RuntimeException) throw (RuntimeException) exception; else throw new RuntimeException( "Unexpected exception type thrown: " + exception.getClass().getName()); } return returnCode; } } /** * Gets a list of field names of the given document library or list. * @param parentSite - parent site path * @param docLibrary name * @return list of the fields */ public Map<String, String> getLibFieldList(String parentSite, String docLibrary) throws ServiceInterruption, ManifoldCFException { getSession(); return proxy.getFieldList(encodePath(parentSite), proxy.getDocLibID(encodePath(parentSite), parentSite, docLibrary)); } /** * Gets a list of field names of the given document library or list. * @param parentSite - parent site path * @param docLibrary name * @return list of the fields */ public Map<String, String> getListFieldList(String parentSite, String listName) throws ServiceInterruption, ManifoldCFException { getSession(); return proxy.getFieldList(encodePath(parentSite), proxy.getListID(encodePath(parentSite), parentSite, listName)); } /** * Gets a list of sites/subsites of the given parent site * @param parentSite the unencoded parent site path to search for subsites, empty for root. * @return list of the sites */ public List<NameValue> getSites(String parentSite) throws ServiceInterruption, ManifoldCFException { getSession(); return proxy.getSites(encodePath(parentSite)); } /** * Gets a list of document libraries of the given parent site * @param parentSite the unencoded parent site to search for libraries, empty for root. * @return list of the libraries */ public List<NameValue> getDocLibsBySite(String parentSite) throws ManifoldCFException, ServiceInterruption { getSession(); return proxy.getDocumentLibraries(encodePath(parentSite), parentSite); } /** * Gets a list of lists of the given parent site * @param parentSite the unencoded parent site to search for lists, empty for root. * @return list of the lists */ public List<NameValue> getListsBySite(String parentSite) throws ManifoldCFException, ServiceInterruption { getSession(); return proxy.getLists(encodePath(parentSite), parentSite); } // Protected static methods /** Check if a library should be included, given a document specification. *@param libraryPath is the unencoded canonical library name (including site path from root site), without any starting slash. *@param documentSpecification is the specification. *@return true if it should be included. */ protected boolean checkIncludeLibrary(String libraryPath, Specification documentSpecification) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Checking whether to include library '" + libraryPath + "'"); // Scan the specification, looking for the old-style "startpoint" matches and the new-style "libraryrule" matches. int i = 0; while (i < documentSpecification.getChildCount()) { SpecificationNode sn = documentSpecification.getChild(i++); if (sn.getType().equals("startpoint")) { // Old style rule! String site = sn.getAttributeValue("site"); String lib = sn.getAttributeValue("lib"); // Both site and lib are unencoded. See if they match the library path String pathStart = site + "/" + lib; // Old-style matches have a preceding "/" when there's no subsite... if (libraryPath.equals(pathStart)) { // Hey, the startpoint rule matches! It's an implicit inclusion, so we don't need to do anything else except return. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Library path '" + libraryPath + "' matched old-style startpoint with site '" + site + "' and library '" + lib + "' - including"); return true; } } else if (sn.getType().equals("pathrule")) { // New-style rule. // Here's the trick: We do what the first matching rule tells us to do. String pathMatch = sn.getAttributeValue("match"); String action = sn.getAttributeValue("action"); String ruleType = sn.getAttributeValue("type"); // First, find out if we match EXACTLY. if (checkMatch(libraryPath, 0, pathMatch)) { // If this is true, the type also has to match if the rule is to apply. if (ruleType.equals("library")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Library '" + libraryPath + "' exactly matched rule path '" + pathMatch + "'"); if (action.equals("include")) { // For include rules, partial match is good enough to proceed. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Including library '" + libraryPath + "'"); return true; } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Excluding library '" + libraryPath + "'"); return false; } } else if (ruleType.equals("file") && checkPartialPathMatch(libraryPath, 0, pathMatch, 1) && action.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Library '" + libraryPath + "' partially matched file rule path '" + pathMatch + "' - including"); return true; } else if (ruleType.equals("folder") && checkPartialPathMatch(libraryPath, 0, pathMatch, 1) && action.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Library '" + libraryPath + "' partially matched folder rule path '" + pathMatch + "' - including"); return true; } } } if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Not including library '" + libraryPath + "' because no matching rule"); return false; } /** Check if a list should be included, given a document specification. *@param listPath is the unencoded canonical list name (including site path from root site), without any starting slash. *@param documentSpecification is the specification. *@return true if it should be included. */ protected boolean checkIncludeList(String listPath, Specification documentSpecification) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Checking whether to include list '" + listPath + "'"); // Scan the specification, looking for new-style "pathrule" matches. int i = 0; while (i < documentSpecification.getChildCount()) { SpecificationNode sn = documentSpecification.getChild(i++); if (sn.getType().equals("pathrule")) { // New-style rule. // Here's the trick: We do what the first matching rule tells us to do. String pathMatch = sn.getAttributeValue("match"); String action = sn.getAttributeValue("action"); String ruleType = sn.getAttributeValue("type"); // First, find out if we match EXACTLY. if (checkMatch(listPath, 0, pathMatch)) { // If this is true, the type also has to match if the rule is to apply. if (ruleType.equals("list")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: List '" + listPath + "' exactly matched rule path '" + pathMatch + "'"); if (action.equals("include")) { // For include rules, partial match is good enough to proceed. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Including list '" + listPath + "'"); return true; } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Excluding list '" + listPath + "'"); return false; } } } } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Not including list '" + listPath + "' because no matching rule"); return false; } /** Check if a site should be included, given a document specification. *@param sitePath is the unencoded canonical site path name from the root site level, without any starting slash. *@param documentSpecification is the specification. *@return true if it should be included. */ protected boolean checkIncludeSite(String sitePath, Specification documentSpecification) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Checking whether to include site '" + sitePath + "'"); // Scan the specification, looking for the old-style "startpoint" matches and the new-style "libraryrule" matches. int i = 0; while (i < documentSpecification.getChildCount()) { SpecificationNode sn = documentSpecification.getChild(i++); if (sn.getType().equals("startpoint")) { // Old style rule! String site = sn.getAttributeValue("site"); // Both site and lib are unencoded. See if they match part of the site path. // Note well: We want a complete subsection match! That is, what's left in the path after the match must // either start with "/" or be empty. if (!site.startsWith("/")) site = "/" + site; // Old-style matches have a preceding "/" when there's no subsite... if (site.startsWith(sitePath)) { if (sitePath.length() == 1 || site.length() == sitePath.length() || site.charAt(sitePath.length()) == '/') { // Hey, the startpoint rule matches! It's an implicit inclusion, so we don't need to do anything else except return. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site path '" + sitePath + "' matched old-style startpoint with site '" + site + "' - including"); return true; } } } else if (sn.getType().equals("pathrule")) { // New-style rule. String pathMatch = sn.getAttributeValue("match"); String action = sn.getAttributeValue("action"); String ruleType = sn.getAttributeValue("type"); // First, find out if we match EXACTLY. if (checkMatch(sitePath, 0, pathMatch)) { // If this is true, the type also has to match if the rule is to apply. if (ruleType.equals("site")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site '" + sitePath + "' exactly matched rule path '" + pathMatch + "'"); if (action.equals("include")) { // For include rules, partial match is good enough to proceed. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Including site '" + sitePath + "'"); return true; } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Excluding site '" + sitePath + "'"); return false; } } else if (ruleType.equals("library") && checkPartialPathMatch(sitePath, 0, pathMatch, 1) && action.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site '" + sitePath + "' partially matched library rule path '" + pathMatch + "' - including"); return true; } else if (ruleType.equals("list") && checkPartialPathMatch(sitePath, 0, pathMatch, 1) && action.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site '" + sitePath + "' partially matched list rule path '" + pathMatch + "' - including"); return true; } else if (ruleType.equals("site") && checkPartialPathMatch(sitePath, 0, pathMatch, 0) && action.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site '" + sitePath + "' partially matched site rule path '" + pathMatch + "' - including"); return true; } else if (ruleType.equals("file") && checkPartialPathMatch(sitePath, 0, pathMatch, 2) && action.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site '" + sitePath + "' partially matched file rule path '" + pathMatch + "' - including"); return true; } else if (ruleType.equals("folder") && checkPartialPathMatch(sitePath, 0, pathMatch, 2) && action.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Site '" + sitePath + "' partially matched folder rule path '" + pathMatch + "' - including"); return true; } } } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Not including site '" + sitePath + "' because no matching rule"); return false; } /** Get a file or item's metadata specification, given a path and a document specification. *@param filePath is the unencoded path to a file or item, including sites and library/list, beneath the root site. *@param documentSpecification is the document specification. *@return the metadata description appropriate to the file. */ protected MetadataInformation getMetadataSpecification(String filePath, Specification documentSpecification) { if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Finding metadata to include for document/item '" + filePath + "'."); MetadataInformation rval = new MetadataInformation(); // Scan the specification, looking for the old-style "startpoint" matches and the new-style "metadatarule" matches. int i = 0; while (i < documentSpecification.getChildCount()) { SpecificationNode sn = documentSpecification.getChild(i++); if (sn.getType().equals("startpoint")) { // Old style rule! String site = sn.getAttributeValue("site"); String lib = sn.getAttributeValue("lib"); // Both site and lib are unencoded. See if they match the first part of the filepath String pathStart = site + "/" + lib + "/"; // Old-style matches have a preceding "/" when there's no subsite... if (filePath.startsWith(pathStart)) { // Hey, the startpoint rule matches! It's an implicit inclusion, so this is where we get the metadata from (and then return) String allmetadata = sn.getAttributeValue("allmetadata"); if (allmetadata != null && allmetadata.equals("true")) rval.setAllMetadata(); else { // Scan children looking for metadata nodes int j = 0; while (j < sn.getChildCount()) { SpecificationNode node = sn.getChild(j++); if (node.getType().equals("metafield")) rval.addMetadataField(node.getAttributeValue("value")); } } return rval; } } else if (sn.getType().equals("metadatarule")) { // New-style rule. // Here's the trick: We do what the first matching rule tells us to do. String pathMatch = sn.getAttributeValue("match"); // First, find out if we match... if (checkMatch(filePath, 0, pathMatch)) { // The rule "fired". Now, do what it tells us to. String action = sn.getAttributeValue("action"); if (action.equals("include")) { // Include: Process the metadata specification, then return String allMetadata = sn.getAttributeValue("allmetadata"); if (allMetadata != null && allMetadata.equals("true")) rval.setAllMetadata(); else { // Scan children looking for metadata nodes int j = 0; while (j < sn.getChildCount()) { SpecificationNode node = sn.getChild(j++); if (node.getType().equals("metafield")) rval.addMetadataField(node.getAttributeValue("value")); } } } return rval; } } } return rval; } /** Check if a file should be included. *@param filePath is the path to the file, including sites and library, beneath the root site. *@param documentSpecification is the document specification. *@return true if file should be included. */ protected boolean checkIncludeFile(String filePath, Specification documentSpecification) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Checking whether to include document '" + filePath + "'"); // Break up the file/folder part of the path int lastSlash = filePath.lastIndexOf("/"); String pathPart = filePath.substring(0, lastSlash); String filePart = filePath.substring(lastSlash + 1); // Scan the spec rules looking for a library match, and extract the information if found. // We need to understand both the old-style rules (startpoints), and the new style (matchrules) int i = 0; while (i < documentSpecification.getChildCount()) { SpecificationNode sn = documentSpecification.getChild(i++); if (sn.getType().equals("startpoint")) { // Old style rule! String site = sn.getAttributeValue("site"); String lib = sn.getAttributeValue("lib"); // Both site and lib are unencoded. The string we are matching starts with "/" if the site is empty. String pathMatch = site + "/" + lib + "/"; if (filePath.startsWith(pathMatch)) { // Hey, it matched! if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: File path '" + filePath + "' matched old-style startpoint with site '" + site + "' and library '" + lib + "'"); int restOfPathIndex = pathMatch.length(); // We need to walk through the subrules and see whether it's in or out. int j = 0; while (j < sn.getChildCount()) { SpecificationNode node = sn.getChild(j++); String flavor = node.getType(); if (flavor.equals("include") || flavor.equals("exclude")) { String match = node.getAttributeValue("match"); String type = node.getAttributeValue("type"); String sourceMatch; int sourceIndex; if (type.equals("file")) { sourceMatch = filePart; sourceIndex = 0; } else { sourceMatch = pathPart; sourceIndex = restOfPathIndex; } if (checkMatch(sourceMatch, sourceIndex, match)) { // Our file path matched the rule. if (flavor.equals("include")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: File path '" + filePath + "' matched old-style inclusion rule '" + match + "' - including"); return true; } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: File path '" + filePath + "' matched old-style exclusion rule '" + match + "' - excluding"); return false; } } } // Didn't match any of the file rules; therefore exclude. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: File path '" + filePath + "' did not match any old-style inclusion/exclusion rules - excluding"); return false; } } else if (sn.getType().equals("pathrule")) { // New style rule! String pathMatch = sn.getAttributeValue("match"); String action = sn.getAttributeValue("action"); String ruleType = sn.getAttributeValue("type"); // Find out if we match EXACTLY. There are no "partial matches" for files. if (checkMatch(filePath, 0, pathMatch)) { // If this is true, the type also has to match if the rule is to apply. if (ruleType.equals("file")) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: File '" + filePath + "' exactly matched rule path '" + pathMatch + "'"); if (action.equals("include")) { // For include rules, partial match is good enough to proceed. if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Including file '" + filePath + "'"); return true; } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Excluding file '" + filePath + "'"); return false; } } } } if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: File path '" + filePath + "' does not match any rules - excluding"); return false; } /** Check if a list item attachment should be included. *@param attachmentPath is the path to the attachment, including sites and list name, beneath the root site. *@param documentSpecification is the document specification. *@return true if file should be included. */ protected boolean checkIncludeListItemAttachment(String attachmentPath, Specification documentSpecification) { if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("SharePoint: Checking whether to include list item attachment '" + attachmentPath + "'"); // There are no attachment rules, so they are always included return true; } /** Check if a list item should be included. *@param itemPath is the path to the item, including sites and list name, beneath the root site. *@param documentSpecification is the document specification. *@return true if file should be included. */ protected boolean checkIncludeListItem(String itemPath, Specification documentSpecification) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Checking whether to include list item '" + itemPath + "'"); // There are no item rules, so they are always included return true; } /** Match a sub-path. The sub-path must match the complete starting part of the full path, in a path * sense. The returned value should point into the file name beyond the end of the matched path, or * be -1 if there is no match. *@param subPath is the sub path. *@param fullPath is the full path. *@return the index of the start of the remaining part of the full path, or -1. */ protected static int matchSubPath(String subPath, String fullPath) { if (subPath.length() > fullPath.length()) return -1; if (fullPath.startsWith(subPath) == false) return -1; int rval = subPath.length(); if (fullPath.length() == rval) return rval; char x = fullPath.charAt(rval); if (x == '/') rval++; return rval; } /** Check for a partial path match between two strings with wildcards. * Match allowance also must be made for the minimum path components in the rest of the path. */ protected static boolean checkPartialPathMatch(String sourceMatch, int sourceIndex, String match, int requiredExtraPathSections) { // The partial match must be of a complete path, with at least a specified number of trailing path components possible in what remains. // Path components can include everything but the "/" character itself. // // The match string is the one containing the wildcards. Both the "*" wildcard and the "?" wildcard will match a "/", which is intended but is why this // matcher is a little tricky to write. // // Note also that it is OK to return "true" more than strictly necessary, but it is never OK to return "false" incorrectly. // This is a partial path match. That means that we don't have to completely use up the match string, but what's left on the match string after the source // string is used up MUST either be capable of being null, or be capable of starting with a "/"integral path sections, and MUST include at least n of these sections. // boolean caseSensitive = true; if (!sourceMatch.endsWith("/")) sourceMatch = sourceMatch + "/"; return processPartialPathCheck(caseSensitive, sourceMatch, sourceIndex, match, 0, requiredExtraPathSections); } /** Recursive worker method for checkPartialPathMatch. Returns 'true' if there is a path that consumes the source string entirely, * and leaves the remainder of the match string able to match the required followup. *@param caseSensitive is true if file names are case sensitive. *@param sourceMatch is the source string (w/o wildcards) *@param sourceIndex is the current point in the source string. *@param match is the match string (w/wildcards) *@param matchIndex is the current point in the match string. *@return true if there is a match. */ protected static boolean processPartialPathCheck(boolean caseSensitive, String sourceMatch, int sourceIndex, String match, int matchIndex, int requiredExtraPathSections) { // Match up through the next * we encounter while (true) { // If we've reached the end of the source, verify that it's a match. if (sourceMatch.length() == sourceIndex) { // The "correct" way to code this is to recursively attempt to generate all different paths that correspond to the required extra sections. However, // that's computationally very nasty. In practice, we'll simply distinguish between "some" and "none". // If we've reached the end of the match string too, then it passes (or fails, if we need extra sections) if (match.length() == matchIndex) return (requiredExtraPathSections == 0); // We can match a path separator, so we win return true; } // If we have reached the end of the match (but not the source), match fails if (match.length() == matchIndex) return false; char x = sourceMatch.charAt(sourceIndex); char y = match.charAt(matchIndex); if (!caseSensitive) { if (x >= 'A' && x <= 'Z') x -= 'A' - 'a'; if (y >= 'A' && y <= 'Z') y -= 'A' - 'a'; } if (y == '*') { // Wildcard! // We will recurse at this point. // Basically, we want to combine the results for leaving the "*" in the match string // at this point and advancing the source index, with skipping the "*" and leaving the source // string alone. return processPartialPathCheck(caseSensitive, sourceMatch, sourceIndex + 1, match, matchIndex, requiredExtraPathSections) || processPartialPathCheck(caseSensitive, sourceMatch, sourceIndex, match, matchIndex + 1, requiredExtraPathSections); } if (y == '?' || x == y) { sourceIndex++; matchIndex++; } else return false; } } /** Check a match between two strings with wildcards. *@param sourceMatch is the expanded string (no wildcards) *@param sourceIndex is the starting point in the expanded string. *@param match is the wildcard-based string. *@return true if there is a match. */ protected static boolean checkMatch(String sourceMatch, int sourceIndex, String match) { // Note: The java regex stuff looks pretty heavyweight for this purpose. // I've opted to try and do a simple recursive version myself, which is not compiled. // Basically, the match proceeds by recursive descent through the string, so that all *'s cause // recursion. boolean caseSensitive = true; return processCheck(caseSensitive, sourceMatch, sourceIndex, match, 0); } /** Recursive worker method for checkMatch. Returns 'true' if there is a path that consumes both * strings in their entirety in a matched way. *@param caseSensitive is true if file names are case sensitive. *@param sourceMatch is the source string (w/o wildcards) *@param sourceIndex is the current point in the source string. *@param match is the match string (w/wildcards) *@param matchIndex is the current point in the match string. *@return true if there is a match. */ protected static boolean processCheck(boolean caseSensitive, String sourceMatch, int sourceIndex, String match, int matchIndex) { // Match up through the next * we encounter while (true) { // If we've reached the end, it's a match. if (sourceMatch.length() == sourceIndex && match.length() == matchIndex) return true; // If one has reached the end but the other hasn't, no match if (match.length() == matchIndex) return false; if (sourceMatch.length() == sourceIndex) { if (match.charAt(matchIndex) != '*') return false; matchIndex++; continue; } char x = sourceMatch.charAt(sourceIndex); char y = match.charAt(matchIndex); if (!caseSensitive) { if (x >= 'A' && x <= 'Z') x -= 'A' - 'a'; if (y >= 'A' && y <= 'Z') y -= 'A' - 'a'; } if (y == '*') { // Wildcard! // We will recurse at this point. // Basically, we want to combine the results for leaving the "*" in the match string // at this point and advancing the source index, with skipping the "*" and leaving the source // string alone. return processCheck(caseSensitive, sourceMatch, sourceIndex + 1, match, matchIndex) || processCheck(caseSensitive, sourceMatch, sourceIndex, match, matchIndex + 1); } if (y == '?' || x == y) { sourceIndex++; matchIndex++; } else return false; } } /** Grab forced acl out of document specification. *@param spec is the document specification. *@return the acls. */ protected static String[] getAcls(Specification spec) { HashMap map = new HashMap(); int i = 0; boolean securityOn = true; while (i < spec.getChildCount()) { SpecificationNode sn = spec.getChild(i++); if (sn.getType().equals("access")) { String token = sn.getAttributeValue("token"); map.put(token, token); } else if (sn.getType().equals("security")) { String value = sn.getAttributeValue("value"); if (value.equals("on")) securityOn = true; else if (value.equals("off")) securityOn = false; } } if (!securityOn) return null; String[] rval = new String[map.size()]; Iterator iter = map.keySet().iterator(); i = 0; while (iter.hasNext()) { rval[i++] = (String) iter.next(); } return rval; } /** Decode a path item. */ public static String pathItemDecode(String pathItem) { return URLDecoder.decode(pathItem.replaceAll("\\%20", "+")); } /** Encode a path item. */ public static String pathItemEncode(String pathItem) { String output = URLEncoder.encode(pathItem); return output.replaceAll("\\+", "%20"); } /** Given a path that is /-separated, and otherwise encoded, decode properly to convert to * unencoded form. */ public static String decodePath(String relPath) { StringBuilder sb = new StringBuilder(); String[] pathEntries = relPath.split("/"); int k = 0; boolean isFirst = true; while (k < pathEntries.length) { if (isFirst) isFirst = false; else sb.append("/"); sb.append(pathItemDecode(pathEntries[k++])); } return sb.toString(); } /** Given a path that is /-separated, and otherwise unencoded, encode properly for an actual * URI */ public static String encodePath(String relPath) { StringBuilder sb = new StringBuilder(); String[] pathEntries = relPath.split("/"); int k = 0; boolean isFirst = true; while (k < pathEntries.length) { if (isFirst) isFirst = false; else sb.append("/"); sb.append(pathItemEncode(pathEntries[k++])); } return sb.toString(); } /** Metadata information gleaned from document paths and specification. */ protected static class MetadataInformation { protected boolean allMetadata = false; protected HashMap metadataFields = new HashMap(); /** Constructor */ public MetadataInformation() { } /** Set "all metadata" */ public void setAllMetadata() { allMetadata = true; } /** Add a metadata field */ public void addMetadataField(String fieldName) { metadataFields.put(fieldName, fieldName); } /** Get whether "all metadata" is to be used */ public boolean getAllMetadata() { return allMetadata; } /** Get the set of metadata fields to use */ public String[] getMetadataFields() { String[] rval = new String[metadataFields.size()]; Iterator iter = metadataFields.keySet().iterator(); int i = 0; while (iter.hasNext()) { rval[i++] = (String) iter.next(); } return rval; } } /** Class that tracks paths associated with id's, and the name * of the metadata attribute to use for the path. */ protected class SystemMetadataDescription { // The path attribute name protected final String pathAttributeName; // The path name map protected final MatchMap matchMap = new MatchMap(); /** Constructor */ public SystemMetadataDescription(Specification spec) throws ManifoldCFException { String pathAttributeName = null; for (int i = 0; i < spec.getChildCount(); i++) { SpecificationNode n = spec.getChild(i); if (n.getType().equals("pathnameattribute")) pathAttributeName = n.getAttributeValue("value"); else if (n.getType().equals("pathmap")) { String pathMatch = n.getAttributeValue("match"); String pathReplace = n.getAttributeValue("replace"); matchMap.appendMatchPair(pathMatch, pathReplace); } } this.pathAttributeName = pathAttributeName; } /** Get the path attribute name. *@return the path attribute name, or null if none specified. */ public String getPathAttributeName() { return pathAttributeName; } /** Given an identifier, get the translated string that goes into the metadata. */ public String getPathAttributeValue(String documentIdentifier) throws ManifoldCFException { String path = getPathString(documentIdentifier); return matchMap.translate(path); } /** For a given id, get the portion of its path which the mapping and ingestion * should go against. Effectively this should include the whole identifer, so this * is easy to calculate. */ public String getPathString(String documentIdentifier) throws ManifoldCFException { // There will be a "//" somewhere in the string. Remove it! int dslashIndex = documentIdentifier.indexOf("//"); if (dslashIndex == -1) return documentIdentifier; return documentIdentifier.substring(0, dslashIndex) + documentIdentifier.substring(dslashIndex + 1); } } }