List of usage examples for java.lang InterruptedException getClass
@HotSpotIntrinsicCandidate public final native Class<?> getClass();
From source file:com.searchcode.app.jobs.IndexGitRepoJob.java
/** * Indexes all the documents in the path provided. Will also remove anything from the index if not on disk * Generally this is a slow update used only for the inital clone of a repository * NB this can be used for updates but it will be much slower as it needs to to walk the contents of the disk *///from w w w .j av a 2 s. c o m public void indexDocsByPath(Path path, String repoName, String repoLocations, String repoRemoteLocation, boolean existingRepo) { SearchcodeLib scl = Singleton.getSearchCodeLib(); // Should have data object by this point List<String> fileLocations = new ArrayList<>(); Queue<CodeIndexDocument> codeIndexDocumentQueue = Singleton.getCodeIndexQueue(); // Convert once outside the main loop String fileRepoLocations = FilenameUtils.separatorsToUnix(repoLocations); boolean lowMemory = this.LOWMEMORY; boolean useSystemGit = this.USESYSTEMGIT; try { Files.walkFileTree(path, new SimpleFileVisitor<Path>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { while (CodeIndexer.shouldPauseAdding()) { Singleton.getLogger().info("Pausing parser."); try { Thread.sleep(SLEEPTIME); } catch (InterruptedException ex) { } } // Convert Path file to unix style that way everything is easier to reason about String fileParent = FilenameUtils.separatorsToUnix(file.getParent().toString()); String fileToString = FilenameUtils.separatorsToUnix(file.toString()); String fileName = file.getFileName().toString(); String md5Hash = Values.EMPTYSTRING; if (fileParent.endsWith("/.git") || fileParent.contains("/.git/")) { return FileVisitResult.CONTINUE; } List<String> codeLines; try { codeLines = Helpers.readFileLines(fileToString, MAXFILELINEDEPTH); } catch (IOException ex) { return FileVisitResult.CONTINUE; } try { FileInputStream fis = new FileInputStream(new File(fileToString)); md5Hash = org.apache.commons.codec.digest.DigestUtils.md5Hex(fis); fis.close(); } catch (IOException ex) { Singleton.getLogger().warning("Unable to generate MD5 for " + fileToString); } // is the file minified? if (scl.isMinified(codeLines)) { Singleton.getLogger().info("Appears to be minified will not index " + fileToString); return FileVisitResult.CONTINUE; } String languageName = scl.languageGuesser(fileName, codeLines); String fileLocation = fileToString.replace(fileRepoLocations, Values.EMPTYSTRING) .replace(fileName, Values.EMPTYSTRING); String fileLocationFilename = fileToString.replace(fileRepoLocations, Values.EMPTYSTRING); String repoLocationRepoNameLocationFilename = fileToString; String newString = getBlameFilePath(fileLocationFilename); List<CodeOwner> owners; if (useSystemGit) { owners = getBlameInfoExternal(codeLines.size(), repoName, fileRepoLocations, newString); } else { owners = getBlameInfo(codeLines.size(), repoName, fileRepoLocations, newString); } String codeOwner = scl.codeOwner(owners); // If low memory don't add to the queue, just index it directly if (lowMemory) { CodeIndexer.indexDocument(new CodeIndexDocument(repoLocationRepoNameLocationFilename, repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLines.size(), StringUtils.join(codeLines, " "), repoRemoteLocation, codeOwner)); } else { Singleton.incrementCodeIndexLinesCount(codeLines.size()); codeIndexDocumentQueue.add(new CodeIndexDocument(repoLocationRepoNameLocationFilename, repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLines.size(), StringUtils.join(codeLines, " "), repoRemoteLocation, codeOwner)); } fileLocations.add(fileLocationFilename); return FileVisitResult.CONTINUE; } }); } catch (IOException ex) { Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass() + "\n with message: " + ex.getMessage()); } if (existingRepo) { CodeSearcher cs = new CodeSearcher(); List<String> indexLocations = cs.getRepoDocuments(repoName); for (String file : indexLocations) { if (!fileLocations.contains(file)) { Singleton.getLogger().info("Missing from disk, removing from index " + file); try { CodeIndexer.deleteByFileLocationFilename(file); } catch (IOException ex) { Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass() + "\n with message: " + ex.getMessage()); } } } } }
From source file:org.apache.manifoldcf.crawler.connectors.sharepoint.SharePointRepository.java
/** Method that fetches and indexes a file fetched from a SharePoint URL, with appropriate error handling * etc.// w w w. j av a 2 s . c o m */ protected void fetchAndIndexFile(IProcessActivity activities, String documentIdentifier, String version, String fileUrl, String fetchUrl, String[] accessTokens, String[] denyTokens, Date createdDate, Date modifiedDate, Map<String, String> metadataValues, String guid, SystemMetadataDescription sDesc) throws ManifoldCFException, ServiceInterruption { String errorCode = null; String errorDesc = null; long startTime = System.currentTimeMillis(); Long fileLengthLong = null; try { // Before we fetch, confirm that the output connector will accept the document if (!activities.checkURLIndexable(fileUrl)) { // URL failed errorCode = activities.EXCLUDED_URL; errorDesc = "Document rejected because of URL (" + fileUrl + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier + "' because output connector says URL '" + fileUrl + "' is not indexable"); activities.noDocument(documentIdentifier, version); return; } // Also check mime type String contentType = mapExtensionToMimeType(documentIdentifier); if (!activities.checkMimeTypeIndexable(contentType)) { // Mime type failed errorCode = activities.EXCLUDED_MIMETYPE; errorDesc = "Document rejected because of mime type (" + contentType + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier + "' because output connector says mime type '" + ((contentType == null) ? "null" : contentType) + "' is not indexable"); activities.noDocument(documentIdentifier, version); return; } // Now check date stamp if (!activities.checkDateIndexable(modifiedDate)) { // Date failed errorCode = activities.EXCLUDED_DATE; errorDesc = "Document rejected because of date (" + modifiedDate + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier + "' because output connector says date '" + ((modifiedDate == null) ? "null" : modifiedDate) + "' is not indexable"); activities.noDocument(documentIdentifier, version); return; } // Set stuff up for fetch activity logging try { // Read the document into a local temporary file, so I get a reliable length. File tempFile = File.createTempFile("__shp__", ".tmp"); try { // Open the output stream OutputStream os = new FileOutputStream(tempFile); try { // Catch all exceptions having to do with reading the document try { ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os); emt.start(); int returnCode = emt.finishUp(); if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415) { // Well, sharepoint thought the document was there, but it really isn't, so delete it. errorCode = "DOCUMENTNOTFOUND"; errorDesc = "Document not found; HTTP code " + returnCode; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "SharePoint: Document at '" + fileUrl + "' failed to fetch with code " + Integer.toString(returnCode) + ", deleting"); activities.noDocument(documentIdentifier, version); return; } else if (returnCode != 200) { errorCode = "UNKNOWNHTTPCODE"; errorDesc = "Unknown HTTP return code " + returnCode; throw new ManifoldCFException("Error fetching document '" + fileUrl + "': " + Integer.toString(returnCode)); } } catch (InterruptedException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (java.net.SocketTimeoutException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } catch (org.apache.http.conn.ConnectTimeoutException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } catch (InterruptedIOException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IllegalArgumentException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.error("SharePoint: Illegal argument: " + e.getMessage(), e); throw new ManifoldCFException("SharePoint: Illegal argument: " + e.getMessage(), e); } catch (org.apache.http.HttpException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: HttpException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } catch (IOException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); Logging.connectors.warn("SharePoint: IOException thrown: " + e.getMessage(), e); long currentTime = System.currentTimeMillis(); throw new ServiceInterruption( "SharePoint is down attempting to read '" + fileUrl + "', retrying: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true); } } finally { os.close(); } // Ingest the document long documentLength = tempFile.length(); if (!activities.checkLengthIndexable(documentLength)) { // Document too long errorCode = activities.EXCLUDED_LENGTH; errorDesc = "Document excluded due to length (" + documentLength + ")"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("SharePoint: Document '" + documentIdentifier + "' was too long, according to output connector"); activities.noDocument(documentIdentifier, version); return; } InputStream is = new FileInputStream(tempFile); try { RepositoryDocument data = new RepositoryDocument(); data.setBinary(is, documentLength); data.setFileName(mapToFileName(documentIdentifier)); if (contentType != null) data.setMimeType(contentType); setDataACLs(data, accessTokens, denyTokens); setPathAttribute(data, sDesc, documentIdentifier); if (modifiedDate != null) data.setModifiedDate(modifiedDate); if (createdDate != null) data.setCreatedDate(createdDate); if (metadataValues != null) { Iterator<String> iter = metadataValues.keySet().iterator(); while (iter.hasNext()) { String fieldName = iter.next(); String fieldData = metadataValues.get(fieldName); data.addField(fieldName, fieldData); } } data.addField("GUID", guid); try { activities.ingestDocumentWithException(documentIdentifier, version, fileUrl, data); errorCode = "OK"; fileLengthLong = new Long(documentLength); } catch (IOException e) { handleIOException(e, "reading document"); } return; } finally { try { is.close(); } catch (java.net.SocketTimeoutException e) { // This is not fatal Logging.connectors.debug("SharePoint: Timeout before read could finish for '" + fileUrl + "': " + e.getMessage(), e); } catch (org.apache.http.conn.ConnectTimeoutException e) { // This is not fatal Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '" + fileUrl + "': " + e.getMessage(), e); } catch (InterruptedIOException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IOException e) { // This is not fatal Logging.connectors .debug("SharePoint: Server closed connection before read could finish for '" + fileUrl + "': " + e.getMessage(), e); } } } finally { tempFile.delete(); } } catch (java.net.SocketTimeoutException e) { throw new ManifoldCFException( "Socket timeout error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e); } catch (org.apache.http.conn.ConnectTimeoutException e) { throw new ManifoldCFException( "Connect timeout error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e); } catch (InterruptedIOException e) { throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IOException e) { throw new ManifoldCFException( "IO error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e); } } catch (ManifoldCFException e) { if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) errorCode = null; throw e; } finally { if (errorCode != null) activities.recordActivity(new Long(startTime), ACTIVITY_FETCH, fileLengthLong, documentIdentifier, errorCode, errorDesc, null); } }
From source file:org.apache.manifoldcf.crawler.connectors.livelink.LivelinkConnector.java
/** * Connects to the specified Livelink document using HTTP protocol * @param documentIdentifier is the document identifier (as far as the crawler knows). * @param activities is the process activity structure, so we can ingest *//* www .j a va2s. com*/ protected void ingestFromLiveLink(LivelinkContext llc, String documentIdentifier, String version, String[] actualAcls, String[] denyAcls, String[] categoryPaths, IProcessActivity activities, MetadataDescription desc, SystemMetadataDescription sDesc) throws ManifoldCFException, ServiceInterruption { String contextMsg = "for '" + documentIdentifier + "'"; // Fetch logging long startTime = System.currentTimeMillis(); String resultCode = null; String resultDescription = null; Long readSize = null; int objID; int vol; int colonPos = documentIdentifier.indexOf(":", 1); if (colonPos == -1) { objID = new Integer(documentIdentifier.substring(1)).intValue(); vol = LLENTWK_VOL; } else { objID = new Integer(documentIdentifier.substring(colonPos + 1)).intValue(); vol = new Integer(documentIdentifier.substring(1, colonPos)).intValue(); } // Try/finally for fetch logging try { String viewHttpAddress = convertToViewURI(documentIdentifier); if (viewHttpAddress == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: No view URI " + contextMsg + " - not ingesting"); resultCode = "NOVIEWURI"; resultDescription = "Document had no view URI"; activities.noDocument(documentIdentifier, version); return; } // Check URL first if (!activities.checkURLIndexable(viewHttpAddress)) { // Document not ingestable due to URL resultCode = activities.EXCLUDED_URL; resultDescription = "URL (" + viewHttpAddress + ") was rejected by output connector"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier + " because its URL (" + viewHttpAddress + ") was rejected by output connector"); activities.noDocument(documentIdentifier, version); return; } // Add general metadata ObjectInformation objInfo = llc.getObjectInformation(vol, objID); VersionInformation versInfo = llc.getVersionInformation(vol, objID, 0); if (!objInfo.exists()) { resultCode = "OBJECTNOTFOUND"; resultDescription = "Object was not found in Livelink"; Logging.connectors.debug("Livelink: No object " + contextMsg + ": not ingesting"); activities.noDocument(documentIdentifier, version); return; } if (!versInfo.exists()) { resultCode = "VERSIONNOTFOUND"; resultDescription = "Version was not found in Livelink"; Logging.connectors.debug("Livelink: No version data " + contextMsg + ": not ingesting"); activities.noDocument(documentIdentifier, version); return; } String mimeType = versInfo.getMimeType(); if (!activities.checkMimeTypeIndexable(mimeType)) { // Document not indexable because of its mime type resultCode = activities.EXCLUDED_MIMETYPE; resultDescription = "Mime type (" + mimeType + ") was rejected by output connector"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier + " because its mime type (" + mimeType + ") was rejected by output connector"); activities.noDocument(documentIdentifier, version); return; } Long dataSize = versInfo.getDataSize(); if (dataSize == null) { // Document had no length resultCode = "DOCUMENTNOLENGTH"; resultDescription = "Document had no length in Livelink"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug( "Livelink: Excluding document " + documentIdentifier + " because it had no length"); activities.noDocument(documentIdentifier, version); return; } if (!activities.checkLengthIndexable(dataSize.longValue())) { // Document not indexable because of its length resultCode = activities.EXCLUDED_LENGTH; resultDescription = "Document length (" + dataSize + ") was rejected by output connector"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier + " because its length (" + dataSize + ") was rejected by output connector"); activities.noDocument(documentIdentifier, version); return; } Date modifyDate = versInfo.getModifyDate(); if (!activities.checkDateIndexable(modifyDate)) { // Document not indexable because of its date resultCode = activities.EXCLUDED_DATE; resultDescription = "Document date (" + modifyDate + ") was rejected by output connector"; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier + " because its date (" + modifyDate + ") was rejected by output connector"); activities.noDocument(documentIdentifier, version); return; } String fileName = versInfo.getFileName(); Date creationDate = objInfo.getCreationDate(); Integer parentID = objInfo.getParentId(); RepositoryDocument rd = new RepositoryDocument(); // Add general data we need for the output connector if (mimeType != null) rd.setMimeType(mimeType); if (fileName != null) rd.setFileName(fileName); if (creationDate != null) rd.setCreatedDate(creationDate); if (modifyDate != null) rd.setModifiedDate(modifyDate); rd.addField(GENERAL_NAME_FIELD, objInfo.getName()); rd.addField(GENERAL_DESCRIPTION_FIELD, objInfo.getComments()); if (creationDate != null) rd.addField(GENERAL_CREATIONDATE_FIELD, DateParser.formatISO8601Date(creationDate)); if (modifyDate != null) rd.addField(GENERAL_MODIFYDATE_FIELD, DateParser.formatISO8601Date(modifyDate)); if (parentID != null) rd.addField(GENERAL_PARENTID, parentID.toString()); UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue()); UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue()); UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue()); if (owner != null) rd.addField(GENERAL_OWNER, owner.getName()); if (creator != null) rd.addField(GENERAL_CREATOR, creator.getName()); if (modifier != null) rd.addField(GENERAL_MODIFIER, modifier.getName()); // Iterate over the metadata items. These are organized by category // for speed of lookup. Iterator<MetadataItem> catIter = desc.getItems(categoryPaths); while (catIter.hasNext()) { MetadataItem item = catIter.next(); MetadataPathItem pathItem = item.getPathItem(); if (pathItem != null) { int catID = pathItem.getCatID(); // grab the associated catversion LLValue catVersion = getCatVersion(objID, catID); if (catVersion != null) { // Go through attributes now Iterator<String> attrIter = item.getAttributeNames(); while (attrIter.hasNext()) { String attrName = attrIter.next(); // Create a unique metadata name String metadataName = pathItem.getCatName() + ":" + attrName; // Fetch the metadata and stuff it into the RepositoryData structure String[] metadataValue = getAttributeValue(catVersion, attrName); if (metadataValue != null) rd.addField(metadataName, metadataValue); else Logging.connectors.warn("Livelink: Metadata attribute '" + metadataName + "' does not seem to exist; please correct the job"); } } } } if (actualAcls != null && denyAcls != null) rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, actualAcls, denyAcls); // Add the path metadata item into the mix, if enabled String pathAttributeName = sDesc.getPathAttributeName(); if (pathAttributeName != null && pathAttributeName.length() > 0) { String pathString = sDesc.getPathAttributeValue(documentIdentifier); if (pathString != null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: Path attribute name is '" + pathAttributeName + "'" + contextMsg + ", value is '" + pathString + "'"); rd.addField(pathAttributeName, pathString); } } if (ingestProtocol != null) { // Use HTTP to fetch document! String ingestHttpAddress = convertToIngestURI(documentIdentifier); if (ingestHttpAddress == null) { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: No fetch URI " + contextMsg + " - not ingesting"); resultCode = "NOURI"; resultDescription = "Document had no fetch URI"; activities.noDocument(documentIdentifier, version); return; } // Set up connection HttpClient client = getInitializedClient(contextMsg); long currentTime; if (Logging.connectors.isInfoEnabled()) Logging.connectors.info("Livelink: " + ingestHttpAddress); HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress); method.setHeader(new BasicHeader("Accept", "*/*")); boolean wasInterrupted = false; ExecuteMethodThread methodThread = new ExecuteMethodThread(client, method); methodThread.start(); try { int statusCode = methodThread.getResponseCode(); switch (statusCode) { case 500: case 502: Logging.connectors.warn("Livelink: Service interruption during fetch " + contextMsg + " with Livelink HTTP Server, retrying..."); resultCode = "FETCHFAILED"; resultDescription = "HTTP error code " + statusCode + " fetching document"; throw new ServiceInterruption("Service interruption during fetch", new ManifoldCFException(Integer.toString(statusCode) + " error while fetching"), System.currentTimeMillis() + 60000L, System.currentTimeMillis() + 600000L, -1, true); case HttpStatus.SC_UNAUTHORIZED: Logging.connectors.warn("Livelink: Document fetch unauthorized for " + ingestHttpAddress + " (" + contextMsg + ")"); // Since we logged in, we should fail here if the ingestion user doesn't have access to the // the document, but if we do, don't fail hard. resultCode = "UNAUTHORIZED"; resultDescription = "Document fetch was unauthorized by IIS"; activities.noDocument(documentIdentifier, version); return; case HttpStatus.SC_OK: if (Logging.connectors.isDebugEnabled()) Logging.connectors .debug("Livelink: Created http document connection to Livelink " + contextMsg); // A non-existent content length will cause a value of -1 to be returned. This seems to indicate that the session login did not work right. if (methodThread.getResponseContentLength() < 0) { resultCode = "SESSIONLOGINFAILED"; resultDescription = "Response content length was -1, which usually means session login did not succeed"; activities.noDocument(documentIdentifier, version); return; } try { InputStream is = methodThread.getSafeInputStream(); try { rd.setBinary(is, dataSize); activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress, rd); resultCode = "OK"; readSize = dataSize; if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Livelink: Ingesting done " + contextMsg); } finally { // Close stream via thread, since otherwise this can hang is.close(); } } catch (InterruptedException e) { wasInterrupted = true; throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (HttpException e) { resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); resultDescription = e.getMessage(); handleHttpException(contextMsg, e); } catch (IOException e) { resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); resultDescription = e.getMessage(); handleIOException(contextMsg, e); } break; case HttpStatus.SC_BAD_REQUEST: case HttpStatus.SC_USE_PROXY: case HttpStatus.SC_GONE: resultCode = "HTTPERROR"; resultDescription = "Http request returned status " + Integer.toString(statusCode); throw new ManifoldCFException( "Unrecoverable request failure; error = " + Integer.toString(statusCode)); default: resultCode = "UNKNOWNHTTPCODE"; resultDescription = "Http request returned status " + Integer.toString(statusCode); Logging.connectors.warn("Livelink: Attempt to retrieve document from '" + ingestHttpAddress + "' received a response of " + Integer.toString(statusCode) + "; retrying in one minute"); currentTime = System.currentTimeMillis(); throw new ServiceInterruption("Fetch failed; retrying in 1 minute", new ManifoldCFException( "Fetch failed with unknown code " + Integer.toString(statusCode)), currentTime + 60000L, currentTime + 600000L, -1, true); } } catch (InterruptedException e) { // Drop the connection on the floor methodThread.interrupt(); methodThread = null; throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (HttpException e) { resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); resultDescription = e.getMessage(); handleHttpException(contextMsg, e); } catch (IOException e) { resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); resultDescription = e.getMessage(); handleIOException(contextMsg, e); } finally { if (methodThread != null) { methodThread.abort(); try { if (!wasInterrupted) methodThread.finishUp(); } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(), e, ManifoldCFException.INTERRUPTED); } } } } else { // Use FetchVersion instead long currentTime; // Fire up the document reading thread DocumentReadingThread t = new DocumentReadingThread(vol, objID, 0); boolean wasInterrupted = false; t.start(); try { try { InputStream is = t.getSafeInputStream(); try { // Can only index while background thread is running! rd.setBinary(is, dataSize); activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress, rd); resultCode = "OK"; readSize = dataSize; } finally { is.close(); } } catch (java.net.SocketTimeoutException e) { throw e; } catch (InterruptedIOException e) { wasInterrupted = true; throw e; } finally { if (!wasInterrupted) t.finishUp(); } // No errors. Record the fact that we made it. } catch (InterruptedException e) { t.interrupt(); throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IOException e) { resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); resultDescription = e.getMessage(); handleIOException(contextMsg, e); } catch (RuntimeException e) { resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); resultDescription = e.getMessage(); handleLivelinkRuntimeException(e, 0, true); } } } catch (ManifoldCFException e) { if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) resultCode = null; throw e; } finally { if (resultCode != null) activities.recordActivity(new Long(startTime), ACTIVITY_FETCH, readSize, vol + ":" + objID, resultCode, resultDescription, null); } }
From source file:org.apache.manifoldcf.crawler.connectors.dropbox.DropboxRepositoryConnector.java
/** Process a set of documents. * This is the method that should cause each document to be fetched, processed, and the results either added * to the queue of documents for the current job, and/or entered into the incremental ingestion manager. * The document specification allows this class to filter what is done based on the job. * The connector will be connected before this method can be called. *@param documentIdentifiers is the set of document identifiers to process. *@param statuses are the currently-stored document versions for each document in the set of document identifiers * passed in above.// w w w .j av a2s . c o m *@param activities is the interface this method should use to queue up new document references * and ingest documents. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one. */ @Override public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec, IProcessActivity activities, int jobMode, boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption { Logging.connectors.debug("DROPBOX: Inside processDocuments"); // Forced acls String[] acls = getAcls(spec); // Sort it, java.util.Arrays.sort(acls); for (String documentIdentifier : documentIdentifiers) { getSession(); String versionString; GetObjectThread objt = new GetObjectThread(documentIdentifier); objt.start(); try { objt.finishUp(); } catch (InterruptedException e) { objt.interrupt(); throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (DropboxException e) { Logging.connectors.warn("DROPBOX: Error getting object: " + e.getMessage(), e); handleDropboxException(e); } DropboxAPI.Entry dropboxObject = objt.getResponse(); if (dropboxObject.isDir) { //a folder will always be processed versionString = StringUtils.EMPTY; // adding all the children + subdirs for a folder List<DropboxAPI.Entry> children = dropboxObject.contents; for (DropboxAPI.Entry child : children) { activities.addDocumentReference(child.path, documentIdentifier, RELATIONSHIP_CHILD); } activities.noDocument(documentIdentifier, versionString); continue; } if (dropboxObject.isDeleted) { activities.deleteDocument(documentIdentifier); continue; } if (StringUtils.isEmpty(dropboxObject.rev)) { //a document that doesn't contain versioning information will never be processed activities.deleteDocument(documentIdentifier); continue; } StringBuilder sb = new StringBuilder(); // Acls packList(sb, acls, '+'); if (acls.length > 0) { sb.append('+'); pack(sb, defaultAuthorityDenyToken, '+'); } else sb.append('-'); sb.append(dropboxObject.rev); versionString = sb.toString(); if (!activities.checkDocumentNeedsReindexing(documentIdentifier, versionString)) continue; long startTime = System.currentTimeMillis(); String errorCode = null; String errorDesc = null; Long fileSize = null; String nodeId = documentIdentifier; String version = versionString; try { // Length in bytes long fileLength = dropboxObject.bytes; if (!activities.checkLengthIndexable(fileLength)) { errorCode = activities.EXCLUDED_LENGTH; errorDesc = "Document excluded because of length (" + fileLength + ")"; activities.noDocument(documentIdentifier, versionString); continue; } //documentURI String documentURI = dropboxObject.path; if (!activities.checkURLIndexable(documentURI)) { errorCode = activities.EXCLUDED_URL; errorDesc = "Document excluded because of URL ('" + documentURI + "')"; activities.noDocument(documentIdentifier, versionString); continue; } //Modified date Date modifiedDate; if (dropboxObject.modified != null) modifiedDate = com.dropbox.client2.RESTUtility.parseDate(dropboxObject.modified); else modifiedDate = null; if (!activities.checkDateIndexable(modifiedDate)) { errorCode = activities.EXCLUDED_DATE; errorDesc = "Document excluded because of date (" + modifiedDate + ")"; activities.noDocument(documentIdentifier, versionString); continue; } // Mime type String mimeType = dropboxObject.mimeType; if (!activities.checkMimeTypeIndexable(mimeType)) { errorCode = activities.EXCLUDED_MIMETYPE; errorDesc = "Document excluded because of mime type ('" + mimeType + "')"; activities.noDocument(documentIdentifier, versionString); continue; } // content ingestion RepositoryDocument rd = new RepositoryDocument(); if (acls.length > 0) { rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT, acls); String[] denyAclArray = new String[] { defaultAuthorityDenyToken }; rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT, denyAclArray); } if (dropboxObject.path != null) rd.setFileName(dropboxObject.path); if (dropboxObject.mimeType != null) rd.setMimeType(dropboxObject.mimeType); if (dropboxObject.modified != null) rd.setModifiedDate(modifiedDate); // There doesn't appear to be a created date... rd.addField("Modified", dropboxObject.modified); rd.addField("Size", dropboxObject.size); rd.addField("Path", dropboxObject.path); rd.addField("Root", dropboxObject.root); rd.addField("ClientMtime", dropboxObject.clientMtime); rd.addField("mimeType", dropboxObject.mimeType); rd.addField("rev", dropboxObject.rev); getSession(); BackgroundStreamThread t = new BackgroundStreamThread(nodeId); t.start(); try { boolean wasInterrupted = false; try { InputStream is = t.getSafeInputStream(); try { rd.setBinary(is, fileLength); activities.ingestDocumentWithException(nodeId, version, documentURI, rd); // No errors. Record the fact that we made it. errorCode = "OK"; fileSize = new Long(fileLength); } finally { is.close(); } } catch (java.net.SocketTimeoutException e) { throw e; } catch (InterruptedIOException e) { wasInterrupted = true; throw e; } catch (ManifoldCFException e) { if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) wasInterrupted = true; throw e; } finally { if (!wasInterrupted) // This does a join t.finishUp(); } } catch (InterruptedException e) { // We were interrupted out of the join, most likely. Before we abandon the thread, // send a courtesy interrupt. t.interrupt(); throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (java.net.SocketTimeoutException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); handleIOException(e); } catch (InterruptedIOException e) { t.interrupt(); throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); } catch (IOException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); handleIOException(e); } catch (DropboxException e) { Logging.connectors.warn("DROPBOX: Error getting stream: " + e.getMessage(), e); errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); handleDropboxException(e); } } catch (ManifoldCFException e) { if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) errorCode = null; throw e; } finally { if (errorCode != null) activities.recordActivity(new Long(startTime), ACTIVITY_READ, fileSize, nodeId, errorCode, errorDesc, null); } } }