List of usage examples for java.security NoSuchAlgorithmException getMessage
public String getMessage()
From source file:org.dasein.cloud.aws.storage.S3Method.java
S3Response invoke(@Nullable String bucket, @Nullable String object, @Nullable String temporaryEndpoint) throws S3Exception, CloudException, InternalException { if (wire.isDebugEnabled()) { wire.debug(""); wire.debug("----------------------------------------------------------------------------------"); }/*from w w w .ja v a2 s . c o m*/ HttpClient client = null; boolean leaveOpen = false; try { StringBuilder url = new StringBuilder(); HttpRequestBase method; int status; // Sanitise the parameters as they may have spaces and who knows what else if (bucket != null) { bucket = AWSCloud.encode(bucket, false); } if (object != null && !"?location".equalsIgnoreCase(object) && !"?acl".equalsIgnoreCase(object) && !"?tagging".equalsIgnoreCase(object)) { object = AWSCloud.encode(object, false); } if (temporaryEndpoint != null) { temporaryEndpoint = AWSCloud.encode(temporaryEndpoint, false); } if (provider.getEC2Provider().isAWS()) { url.append("https://"); String regionId = provider.getContext().getRegionId(); if (temporaryEndpoint == null) { boolean validDomainName = isValidDomainName(bucket); if (bucket != null && validDomainName) { url.append(bucket); if (regionId != null && !regionId.isEmpty() && !"us-east-1".equals(regionId)) { url.append(".s3-"); url.append(regionId); url.append(".amazonaws.com/"); } else { url.append(".s3.amazonaws.com/"); } } else { if (regionId != null && !regionId.isEmpty() && !"us-east-1".equals(regionId)) { url.append("s3-"); url.append(regionId); url.append(".amazonaws.com/"); } else { url.append("s3.amazonaws.com/"); } } if (bucket != null && !validDomainName) { url.append(bucket); url.append("/"); } } else { url.append(temporaryEndpoint); url.append("/"); } } else if (provider.getEC2Provider().isStorage() && "google".equalsIgnoreCase(provider.getProviderName())) { url.append("https://"); if (temporaryEndpoint == null) { if (bucket != null) { url.append(bucket); url.append("."); } url.append("commondatastorage.googleapis.com/"); } else { url.append(temporaryEndpoint); url.append("/"); } } else { int idx = 0; if (!provider.getContext().getEndpoint().startsWith("http")) { url.append("https://"); } else { idx = provider.getContext().getEndpoint().indexOf("https://"); if (idx == -1) { idx = "http://".length(); url.append("http://"); } else { idx = "https://".length(); url.append("https://"); } } String service = ""; if (provider.getEC2Provider().isEucalyptus()) { service = "Walrus/"; } if (temporaryEndpoint == null) { url.append(provider.getContext().getEndpoint().substring(idx)); if (!provider.getContext().getEndpoint().endsWith("/")) { url.append("/").append(service); } else { url.append(service); } } else { url.append(temporaryEndpoint); url.append("/"); url.append(service); } if (bucket != null) { url.append(bucket); url.append("/"); } } if (object != null) { url.append(object); } else if (parameters != null) { boolean first = true; if (object != null && object.indexOf('?') != -1) { first = false; } for (Map.Entry<String, String> entry : parameters.entrySet()) { String key = entry.getKey(); String val = entry.getValue(); if (first) { url.append("?"); first = false; } else { url.append("&"); } if (val != null) { url.append(AWSCloud.encode(key, false)); url.append("="); url.append(AWSCloud.encode(val, false)); } else { url.append(AWSCloud.encode(key, false)); } } } if (provider.getEC2Provider().isStorage() && provider.getProviderName().equalsIgnoreCase("Google")) { headers.put(AWSCloud.P_GOOG_DATE, getDate()); } else { headers.put(AWSCloud.P_AWS_DATE, provider.getV4HeaderDate(null)); } if (contentType == null && body != null) { contentType = "application/xml"; headers.put("Content-Type", contentType); } else if (contentType != null) { headers.put("Content-Type", contentType); } method = action.getMethod(url.toString()); String host = method.getURI().getHost(); headers.put("host", host); if (action.equals(S3Action.PUT_BUCKET_TAG)) try { headers.put("Content-MD5", toBase64(computeMD5Hash(body))); } catch (NoSuchAlgorithmException e) { logger.error(e); } catch (IOException e) { logger.error(e); } if (headers != null) { for (Map.Entry<String, String> entry : headers.entrySet()) { method.addHeader(entry.getKey(), entry.getValue()); } } if (body != null) { ((HttpEntityEnclosingRequestBase) method).setEntity(new StringEntity(body, APPLICATION_XML)); } else if (uploadFile != null) { ((HttpEntityEnclosingRequestBase) method).setEntity(new FileEntity(uploadFile, contentType)); } try { String hash = null; if (method instanceof HttpEntityEnclosingRequestBase) { try { hash = provider.getRequestBodyHash( EntityUtils.toString(((HttpEntityEnclosingRequestBase) method).getEntity())); } catch (IOException e) { throw new InternalException(e); } } else { hash = provider.getRequestBodyHash(""); } String signature; if (provider.getEC2Provider().isAWS()) { // Sign v4 for AWS signature = provider.getV4Authorization(new String(provider.getAccessKey()[0]), new String(provider.getAccessKey()[1]), method.getMethod(), url.toString(), SERVICE_ID, headers, hash); if (hash != null) { method.addHeader(AWSCloud.P_AWS_CONTENT_SHA256, hash); } } else { // Eucalyptus et al use v2 signature = provider.signS3(new String(provider.getAccessKey()[0], "utf-8"), provider.getAccessKey()[1], method.getMethod(), null, contentType, headers, bucket, object); } method.addHeader(AWSCloud.P_CFAUTH, signature); } catch (UnsupportedEncodingException e) { logger.error(e); } if (wire.isDebugEnabled()) { wire.debug("[" + url.toString() + "]"); wire.debug(method.getRequestLine().toString()); for (Header header : method.getAllHeaders()) { wire.debug(header.getName() + ": " + header.getValue()); } wire.debug(""); if (body != null) { try { wire.debug(EntityUtils.toString(((HttpEntityEnclosingRequestBase) method).getEntity())); } catch (IOException ignore) { } wire.debug(""); } else if (uploadFile != null) { wire.debug("-- file upload --"); wire.debug(""); } } attempts++; client = provider.getClient(body == null && uploadFile == null); S3Response response = new S3Response(); HttpResponse httpResponse; try { APITrace.trace(provider, action.toString()); httpResponse = client.execute(method); if (wire.isDebugEnabled()) { wire.debug(httpResponse.getStatusLine().toString()); for (Header header : httpResponse.getAllHeaders()) { wire.debug(header.getName() + ": " + header.getValue()); } wire.debug(""); } status = httpResponse.getStatusLine().getStatusCode(); } catch (IOException e) { logger.error(url + ": " + e.getMessage()); throw new InternalException(e); } response.headers = httpResponse.getAllHeaders(); HttpEntity entity = httpResponse.getEntity(); InputStream input = null; if (entity != null) { try { input = entity.getContent(); } catch (IOException e) { throw new CloudException(e); } } try { if (status == HttpStatus.SC_OK || status == HttpStatus.SC_CREATED || status == HttpStatus.SC_ACCEPTED) { Header clen = httpResponse.getFirstHeader("Content-Length"); long len = -1L; if (clen != null) { len = Long.parseLong(clen.getValue()); } if (len != 0L) { try { Header ct = httpResponse.getFirstHeader("Content-Type"); if ((ct != null && (ct.getValue().startsWith("application/xml") || ct.getValue().startsWith("text/xml"))) || (action.equals(S3Action.GET_BUCKET_TAG) && input != null)) { try { response.document = parseResponse(input); return response; } finally { input.close(); } } else if (ct != null && ct.getValue().startsWith("application/octet-stream") && len < 1) { return null; } else { response.contentLength = len; if (ct != null) { response.contentType = ct.getValue(); } response.input = input; response.method = method; leaveOpen = true; return response; } } catch (IOException e) { logger.error(e); throw new CloudException(e); } } else { return response; } } else if (status == HttpStatus.SC_NO_CONTENT) { return response; } if (status == HttpStatus.SC_FORBIDDEN) { throw new S3Exception(status, "", "AccessForbidden", "Access was denied : " + (url != null ? url.toString() : "")); } else if (status == HttpStatus.SC_NOT_FOUND) { throw new S3Exception(status, null, null, "Object not found."); } else { if (status == HttpStatus.SC_SERVICE_UNAVAILABLE || status == HttpStatus.SC_INTERNAL_SERVER_ERROR) { if (attempts >= 5) { String msg; if (status == HttpStatus.SC_SERVICE_UNAVAILABLE) { msg = "Cloud service is currently unavailable."; } else { msg = "The cloud service encountered a server error while processing your request."; } logger.error(msg); throw new CloudException(msg); } else { leaveOpen = true; if (input != null) { try { input.close(); } catch (IOException ignore) { } } try { Thread.sleep(5000L); } catch (InterruptedException ignore) { } return invoke(bucket, object); } } try { Document doc; try { logger.warn("Received error code: " + status); doc = parseResponse(input); } finally { if (input != null) { input.close(); } } if (doc != null) { String endpoint = null, code = null, message = null, requestId = null; NodeList blocks = doc.getElementsByTagName("Error"); if (blocks.getLength() > 0) { Node error = blocks.item(0); NodeList attrs; attrs = error.getChildNodes(); for (int i = 0; i < attrs.getLength(); i++) { Node attr = attrs.item(i); if (attr.getNodeName().equals("Code") && attr.hasChildNodes()) { code = attr.getFirstChild().getNodeValue().trim(); } else if (attr.getNodeName().equals("Message") && attr.hasChildNodes()) { message = attr.getFirstChild().getNodeValue().trim(); } else if (attr.getNodeName().equals("RequestId") && attr.hasChildNodes()) { requestId = attr.getFirstChild().getNodeValue().trim(); } else if (attr.getNodeName().equals("Endpoint") && attr.hasChildNodes()) { endpoint = attr.getFirstChild().getNodeValue().trim(); } } } if (endpoint != null && code.equals("TemporaryRedirect")) { if (temporaryEndpoint != null) { throw new CloudException("Too deep redirect to " + endpoint); } else { return invoke(bucket, object, endpoint); } } else { if (message == null) { throw new CloudException("Unable to identify error condition: " + status + "/" + requestId + "/" + code); } throw new S3Exception(status, requestId, code, message); } } else { throw new CloudException("Unable to parse error."); } } catch (IOException e) { if (status == HttpStatus.SC_FORBIDDEN) { throw new S3Exception(status, "", "AccessForbidden", "Access was denied without explanation."); } throw new CloudException(e); } catch (RuntimeException e) { throw new CloudException(e); } catch (Error e) { throw new CloudException(e); } } } finally { if (!leaveOpen) { if (input != null) { try { input.close(); } catch (IOException ignore) { } } } } } finally { if (!leaveOpen && client != null) { client.getConnectionManager().shutdown(); } if (wire.isDebugEnabled()) { wire.debug("----------------------------------------------------------------------------------"); wire.debug(""); } } }
From source file:org.opendatakit.briefcase.util.FileSystemUtils.java
private static boolean decryptSubmissionFiles(String base64EncryptedSymmetricKey, FormInstanceMetadata fim, List<String> mediaNames, String encryptedSubmissionFile, String base64EncryptedElementSignature, PrivateKey rsaPrivateKey, File instanceDir, File unencryptedDir) throws FileSystemException, CryptoException, ParsingException { EncryptionInformation ei = new EncryptionInformation(base64EncryptedSymmetricKey, fim.instanceId, rsaPrivateKey);/* ww w.ja v a 2 s . c om*/ byte[] elementDigest; try { // construct the base64-encoded RSA-encrypted symmetric key Cipher pkCipher; pkCipher = Cipher.getInstance(ASYMMETRIC_ALGORITHM); // extract digest pkCipher.init(Cipher.DECRYPT_MODE, rsaPrivateKey); byte[] encryptedElementSignature = Base64.decodeBase64(base64EncryptedElementSignature); elementDigest = pkCipher.doFinal(encryptedElementSignature); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); throw new CryptoException("Error decrypting base64EncryptedElementSignature Cause: " + e.toString()); } catch (NoSuchPaddingException e) { e.printStackTrace(); throw new CryptoException("Error decrypting base64EncryptedElementSignature Cause: " + e.toString()); } catch (InvalidKeyException e) { e.printStackTrace(); throw new CryptoException("Error decrypting base64EncryptedElementSignature Cause: " + e.toString()); } catch (IllegalBlockSizeException e) { e.printStackTrace(); throw new CryptoException("Error decrypting base64EncryptedElementSignature Cause: " + e.toString()); } catch (BadPaddingException e) { e.printStackTrace(); throw new CryptoException("Error decrypting base64EncryptedElementSignature Cause: " + e.toString()); } // NOTE: will decrypt only the files in the media list, plus the encryptedSubmissionFile File[] allFiles = instanceDir.listFiles(); List<File> filesToProcess = new ArrayList<File>(); for (File f : allFiles) { if (mediaNames.contains(f.getName())) { filesToProcess.add(f); } else if (encryptedSubmissionFile.equals(f.getName())) { filesToProcess.add(f); } } // should have all media files plus one submission.xml.enc file if (filesToProcess.size() != mediaNames.size() + 1) { // figure out what we're missing... int lostFileCount = 0; List<String> missing = new ArrayList<String>(); for (String name : mediaNames) { if (name == null) { // this was lost due to an pre-ODK Aggregate 1.4.5 mark-as-complete action ++lostFileCount; continue; } File f = new File(instanceDir, name); if (!filesToProcess.contains(f)) { missing.add(name); } } StringBuilder b = new StringBuilder(); for (String name : missing) { b.append(" ").append(name); } if (!filesToProcess.contains(new File(instanceDir, encryptedSubmissionFile))) { b.append(" ").append(encryptedSubmissionFile); throw new FileSystemException( "Error decrypting: " + instanceDir.getName() + " Missing files:" + b.toString()); } else { // ignore the fact that we don't have the lost files if (filesToProcess.size() + lostFileCount != mediaNames.size() + 1) { throw new FileSystemException( "Error decrypting: " + instanceDir.getName() + " Missing files:" + b.toString()); } } } // decrypt the media files IN ORDER. for (String mediaName : mediaNames) { String displayedName = (mediaName == null) ? "<missing .enc file>" : mediaName; File f = (mediaName == null) ? null : new File(instanceDir, mediaName); try { decryptFile(ei, f, unencryptedDir); } catch (InvalidKeyException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + displayedName + " Cause: " + e.toString()); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + displayedName + " Cause: " + e.toString()); } catch (InvalidAlgorithmParameterException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + displayedName + " Cause: " + e.toString()); } catch (NoSuchPaddingException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + displayedName + " Cause: " + e.toString()); } catch (IOException e) { e.printStackTrace(); throw new FileSystemException("Error decrypting:" + displayedName + " Cause: " + e.toString()); } } // decrypt the submission file File f = new File(instanceDir, encryptedSubmissionFile); try { decryptFile(ei, f, unencryptedDir); } catch (InvalidKeyException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + f.getName() + " Cause: " + e.toString()); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + f.getName() + " Cause: " + e.toString()); } catch (InvalidAlgorithmParameterException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + f.getName() + " Cause: " + e.toString()); } catch (NoSuchPaddingException e) { e.printStackTrace(); throw new CryptoException("Error decrypting:" + f.getName() + " Cause: " + e.toString()); } catch (IOException e) { e.printStackTrace(); throw new FileSystemException("Error decrypting:" + f.getName() + " Cause: " + e.toString()); } // get the FIM for the decrypted submission file File submissionFile = new File(unencryptedDir, encryptedSubmissionFile.substring(0, encryptedSubmissionFile.lastIndexOf(".enc"))); FormInstanceMetadata submissionFim; try { Document subDoc = XmlManipulationUtils.parseXml(submissionFile); submissionFim = XmlManipulationUtils.getFormInstanceMetadata(subDoc.getRootElement()); } catch (ParsingException e) { e.printStackTrace(); throw new FileSystemException( "Error decrypting: " + submissionFile.getName() + " Cause: " + e.toString()); } catch (FileSystemException e) { e.printStackTrace(); throw new FileSystemException( "Error decrypting: " + submissionFile.getName() + " Cause: " + e.getMessage()); } boolean same = submissionFim.xparam.formId.equals(fim.xparam.formId); if (!same) { throw new FileSystemException("Error decrypting:" + unencryptedDir.getName() + " Cause: form instance metadata differs from that in manifest"); } // Construct the element signature string StringBuilder b = new StringBuilder(); appendElementSignatureSource(b, fim.xparam.formId); if (fim.xparam.modelVersion != null) { appendElementSignatureSource(b, Long.toString(fim.xparam.modelVersion)); } appendElementSignatureSource(b, base64EncryptedSymmetricKey); appendElementSignatureSource(b, fim.instanceId); boolean missingFile = false; for (String encFilename : mediaNames) { if (encFilename == null) { missingFile = true; continue; } File decryptedFile = new File(unencryptedDir, encFilename.substring(0, encFilename.lastIndexOf(".enc"))); if (decryptedFile.getName().endsWith(".missing")) { // this is a missing file -- we will not be able to // confirm the signature of the submission. missingFile = true; continue; } String md5 = FileSystemUtils.getMd5Hash(decryptedFile); appendElementSignatureSource(b, decryptedFile.getName() + "::" + md5); } String md5 = FileSystemUtils.getMd5Hash(submissionFile); appendElementSignatureSource(b, submissionFile.getName() + "::" + md5); // compute the digest of the element signature string byte[] messageDigest; try { MessageDigest md = MessageDigest.getInstance("MD5"); md.update(b.toString().getBytes("UTF-8")); messageDigest = md.digest(); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); throw new CryptoException("Error computing xml signature Cause: " + e.toString()); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new CryptoException("Error computing xml signature Cause: " + e.toString()); } same = true; for (int i = 0; i < messageDigest.length; ++i) { if (messageDigest[i] != elementDigest[i]) { same = false; break; } } return same; }
From source file:org.loklak.data.DAO.java
/** * initialize the DAO//ww w.ja v a2s.co m * @param configMap * @param dataPath the path to the data directory */ public static void init(Map<String, String> configMap, Path dataPath) throws Exception { log("initializing loklak DAO"); config = configMap; conf_dir = new File("conf"); bin_dir = new File("bin"); html_dir = new File("html"); // wake up susi File susiinitpath = new File(conf_dir, "susi"); File sudiwatchpath = new File(new File("data"), "susi"); susi = new SusiMind(susiinitpath, sudiwatchpath); String susi_boilerplate_name = "susi_cognition_boilerplate.json"; File susi_boilerplate_file = new File(sudiwatchpath, susi_boilerplate_name); if (!susi_boilerplate_file.exists()) Files.copy(new File(conf_dir, "susi/" + susi_boilerplate_name + ".example"), susi_boilerplate_file); // initialize public and private keys public_settings = new Settings(new File("data/settings/public.settings.json")); File private_file = new File("data/settings/private.settings.json"); private_settings = new Settings(private_file); OS.protectPath(private_file.toPath()); if (!private_settings.loadPrivateKey() || !public_settings.loadPublicKey()) { log("Can't load key pair. Creating new one"); // create new key pair KeyPairGenerator keyGen; try { String algorithm = "RSA"; keyGen = KeyPairGenerator.getInstance(algorithm); keyGen.initialize(2048); KeyPair keyPair = keyGen.genKeyPair(); private_settings.setPrivateKey(keyPair.getPrivate(), algorithm); public_settings.setPublicKey(keyPair.getPublic(), algorithm); } catch (NoSuchAlgorithmException e) { throw e; } log("Key creation finished. Peer hash: " + public_settings.getPeerHashAlgorithm() + " " + public_settings.getPeerHash()); } else { log("Key pair loaded from file. Peer hash: " + public_settings.getPeerHashAlgorithm() + " " + public_settings.getPeerHash()); } File datadir = dataPath.toFile(); // check if elasticsearch shall be accessed as external cluster String transport = configMap.get("elasticsearch_transport.enabled"); if (transport != null && "true".equals(transport)) { String cluster_name = configMap.get("elasticsearch_transport.cluster.name"); String transport_addresses_string = configMap.get("elasticsearch_transport.addresses"); if (transport_addresses_string != null && transport_addresses_string.length() > 0) { String[] transport_addresses = transport_addresses_string.split(","); elasticsearch_client = new ElasticsearchClient(transport_addresses, cluster_name); } } else { // use all config attributes with a key starting with "elasticsearch." to set elasticsearch settings ESLoggerFactory.setDefaultFactory(new Slf4jESLoggerFactory()); org.elasticsearch.common.settings.Settings.Builder settings = org.elasticsearch.common.settings.Settings .builder(); for (Map.Entry<String, String> entry : config.entrySet()) { String key = entry.getKey(); if (key.startsWith("elasticsearch.")) settings.put(key.substring(14), entry.getValue()); } // patch the home path settings.put("path.home", datadir.getAbsolutePath()); settings.put("path.data", datadir.getAbsolutePath()); settings.build(); // start elasticsearch elasticsearch_client = new ElasticsearchClient(settings); } // open AAA storage Path settings_dir = dataPath.resolve("settings"); settings_dir.toFile().mkdirs(); Path authentication_path = settings_dir.resolve("authentication.json"); authentication = new JsonTray(authentication_path.toFile(), 10000); OS.protectPath(authentication_path); Path authorization_path = settings_dir.resolve("authorization.json"); authorization = new JsonTray(authorization_path.toFile(), 10000); OS.protectPath(authorization_path); Path passwordreset_path = settings_dir.resolve("passwordreset.json"); passwordreset = new JsonTray(passwordreset_path.toFile(), 10000); OS.protectPath(passwordreset_path); Path accounting_path = settings_dir.resolve("accounting.json"); accounting = new JsonTray(accounting_path.toFile(), 10000); OS.protectPath(accounting_path); Path login_keys_path = settings_dir.resolve("login-keys.json"); login_keys = new JsonFile(login_keys_path.toFile()); OS.protectPath(login_keys_path); Log.getLog().info("Initializing user roles"); Path userRoles_path = settings_dir.resolve("userRoles.json"); userRoles = new UserRoles(new JsonFile(userRoles_path.toFile())); OS.protectPath(userRoles_path); try { userRoles.loadUserRolesFromObject(); Log.getLog().info("Loaded user roles from file"); } catch (IllegalArgumentException e) { Log.getLog().info("Load default user roles"); userRoles.loadDefaultUserRoles(); } // open index Path index_dir = dataPath.resolve("index"); if (index_dir.toFile().exists()) OS.protectPath(index_dir); // no other permissions to this path // define the index factories messages = new MessageFactory(elasticsearch_client, IndexName.messages.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); messages_hour = new MessageFactory(elasticsearch_client, IndexName.messages_hour.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); messages_day = new MessageFactory(elasticsearch_client, IndexName.messages_day.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); messages_week = new MessageFactory(elasticsearch_client, IndexName.messages_week.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); users = new UserFactory(elasticsearch_client, IndexName.users.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); accounts = new AccountFactory(elasticsearch_client, IndexName.accounts.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); queries = new QueryFactory(elasticsearch_client, IndexName.queries.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); importProfiles = new ImportProfileFactory(elasticsearch_client, IndexName.import_profiles.name(), CACHE_MAXSIZE, EXIST_MAXSIZE); // create indices and set mapping (that shows how 'elastic' elasticsearch is: it's always good to define data types) File mappingsDir = new File(new File(conf_dir, "elasticsearch"), "mappings"); int shards = Integer.parseInt(configMap.get("elasticsearch.index.number_of_shards")); int replicas = Integer.parseInt(configMap.get("elasticsearch.index.number_of_replicas")); for (IndexName index : IndexName.values()) { log("initializing index '" + index.name() + "'..."); try { elasticsearch_client.createIndexIfNotExists(index.name(), shards, replicas); } catch (Throwable e) { Log.getLog().warn(e); } try { elasticsearch_client.setMapping(index.name(), new File(mappingsDir, index.getSchemaFilename())); } catch (Throwable e) { Log.getLog().warn(e); } } // elasticsearch will probably take some time until it is started up. We do some other stuff meanwhile.. // create and document the data dump dir assets = new File(datadir, "assets"); external_data = new File(datadir, "external"); dictionaries = new File(external_data, "dictionaries"); dictionaries.mkdirs(); // create message dump dir String message_dump_readme = "This directory contains dump files for messages which arrived the platform.\n" + "There are three subdirectories for dump files:\n" + "- own: for messages received with this peer. There is one file for each month.\n" + "- import: hand-over directory for message dumps to be imported. Drop dumps here and they are imported.\n" + "- imported: dump files which had been processed from the import directory are moved here.\n" + "You can import dump files from other peers by dropping them into the import directory.\n" + "Each dump file must start with the prefix '" + MESSAGE_DUMP_FILE_PREFIX + "' to be recognized.\n"; message_dump_dir = dataPath.resolve("dump"); message_dump = new JsonRepository(message_dump_dir.toFile(), MESSAGE_DUMP_FILE_PREFIX, message_dump_readme, JsonRepository.COMPRESSED_MODE, true, Runtime.getRuntime().availableProcessors()); account_dump_dir = dataPath.resolve("accounts"); account_dump_dir.toFile().mkdirs(); OS.protectPath(account_dump_dir); // no other permissions to this path account_dump = new JsonRepository(account_dump_dir.toFile(), ACCOUNT_DUMP_FILE_PREFIX, null, JsonRepository.REWRITABLE_MODE, false, Runtime.getRuntime().availableProcessors()); File user_dump_dir = new File(datadir, "accounts"); user_dump_dir.mkdirs(); user_dump = new JsonDataset(user_dump_dir, USER_DUMP_FILE_PREFIX, new JsonDataset.Column[] { new JsonDataset.Column("id_str", false), new JsonDataset.Column("screen_name", true) }, "retrieval_date", DateParser.PATTERN_ISO8601MILLIS, JsonRepository.REWRITABLE_MODE, false, Integer.MAX_VALUE); followers_dump = new JsonDataset(user_dump_dir, FOLLOWERS_DUMP_FILE_PREFIX, new JsonDataset.Column[] { new JsonDataset.Column("screen_name", true) }, "retrieval_date", DateParser.PATTERN_ISO8601MILLIS, JsonRepository.REWRITABLE_MODE, false, Integer.MAX_VALUE); following_dump = new JsonDataset(user_dump_dir, FOLLOWING_DUMP_FILE_PREFIX, new JsonDataset.Column[] { new JsonDataset.Column("screen_name", true) }, "retrieval_date", DateParser.PATTERN_ISO8601MILLIS, JsonRepository.REWRITABLE_MODE, false, Integer.MAX_VALUE); Path log_dump_dir = dataPath.resolve("log"); log_dump_dir.toFile().mkdirs(); OS.protectPath(log_dump_dir); // no other permissions to this path access = new AccessTracker(log_dump_dir.toFile(), ACCESS_DUMP_FILE_PREFIX, 60000, 3000); access.start(); // start monitor import_profile_dump_dir = dataPath.resolve("import-profiles"); import_profile_dump = new JsonRepository(import_profile_dump_dir.toFile(), IMPORT_PROFILE_FILE_PREFIX, null, JsonRepository.COMPRESSED_MODE, false, Runtime.getRuntime().availableProcessors()); // load schema folder conv_schema_dir = new File("conf/conversion"); schema_dir = new File("conf/schema"); // load dictionaries if they are embedded here // read the file allCountries.zip from http://download.geonames.org/export/dump/allCountries.zip //File allCountries = new File(dictionaries, "allCountries.zip"); File cities1000 = new File(dictionaries, "cities1000.zip"); if (!cities1000.exists()) { // download this file ClientConnection.download("http://download.geonames.org/export/dump/cities1000.zip", cities1000); } if (cities1000.exists()) { try { geoNames = new GeoNames(cities1000, new File(conf_dir, "iso3166.json"), 1); } catch (IOException e) { Log.getLog().warn(e.getMessage()); cities1000.delete(); geoNames = null; } } // finally wait for healthy status of elasticsearch shards ClusterHealthStatus required_status = ClusterHealthStatus .fromString(config.get("elasticsearch_requiredClusterHealthStatus")); boolean ok; do { log("Waiting for elasticsearch " + required_status.name() + " status"); ok = elasticsearch_client.wait_ready(60000l, required_status); } while (!ok); /** do { log("Waiting for elasticsearch green status"); health = elasticsearch_client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); } while (health.isTimedOut()); **/ log("elasticsearch has started up!"); // start the classifier new Thread() { public void run() { log("initializing the classifier..."); try { Classifier.init(10000, 1000); } catch (Throwable e) { Log.getLog().warn(e); } log("classifier initialized!"); } }.start(); log("initializing queries..."); File harvestingPath = new File(datadir, "queries"); if (!harvestingPath.exists()) harvestingPath.mkdirs(); String[] list = harvestingPath.list(); for (String queryfile : list) { if (queryfile.startsWith(".") || queryfile.endsWith("~")) continue; try { BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(new File(harvestingPath, queryfile)))); String line; List<IndexEntry<QueryEntry>> bulkEntries = new ArrayList<>(); while ((line = reader.readLine()) != null) { line = line.trim().toLowerCase(); if (line.length() == 0) continue; if (line.charAt(0) <= '9') { // truncate statistic int p = line.indexOf(' '); if (p < 0) continue; line = line.substring(p + 1).trim(); } // write line into query database if (!existQuery(line)) { bulkEntries.add(new IndexEntry<QueryEntry>(line, SourceType.TWITTER, new QueryEntry(line, 0, 60000, SourceType.TWITTER, false))); } if (bulkEntries.size() > 1000) { queries.writeEntries(bulkEntries); bulkEntries.clear(); } } queries.writeEntries(bulkEntries); reader.close(); } catch (IOException e) { Log.getLog().warn(e); } } log("queries initialized."); log("finished DAO initialization"); }
From source file:com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester.java
private void parse(InfiniteFile f, SourcePojo source) throws MalformedURLException, URISyntaxException { //NOTE: we only ever break out of here because of max docs in standalone mode // (because we don't know how to continue reading) DocumentPojo doc = null;//w ww.j a v a 2s.c o m //Determine File Extension String fileName = f.getName().toString(); int mid = fileName.lastIndexOf("."); String extension = fileName.substring(mid + 1, fileName.length()); //Checked to save processing time long fileTimestamp = (f.getDate() / 1000) * 1000; // (ensure truncated to seconds, since some operation somewhere hear does this...) Date modDate = new Date(fileTimestamp); //XML Data gets placed into MetaData boolean bIsXml = false; boolean bIsJson = false; boolean bIsLineOriented = false; if ((null != source.getFileConfig()) && (null != source.getFileConfig().type)) { extension = source.getFileConfig().type; } bIsXml = extension.equalsIgnoreCase("xml"); bIsJson = extension.equalsIgnoreCase("json"); bIsLineOriented = extension.endsWith("sv"); if (bIsXml || bIsJson || bIsLineOriented) { int debugMaxDocs = Integer.MAX_VALUE; // by default don't set this, it's only for debug mode if (_context.isStandalone()) { // debug mode debugMaxDocs = maxDocsPerCycle; } //fast check to see if the file has changed before processing (or if it never existed) if (needsUpdated_SourceUrl(modDate, f.getUrlString(), source)) { if (0 != modDate.getTime()) { // if it ==0 then sourceUrl doesn't exist at all, no need to delete // This file already exists - in normal/managed mode will re-create // In streaming mode, simple skip over if (_streaming) { return; } //TESTED DocumentPojo docRepresentingSrcUrl = new DocumentPojo(); docRepresentingSrcUrl.setSourceUrl(f.getUrlString()); docRepresentingSrcUrl.setSourceKey(source.getKey()); docRepresentingSrcUrl.setCommunityId(source.getCommunityIds().iterator().next()); sourceUrlsGettingUpdated.add(docRepresentingSrcUrl.getSourceUrl()); this.docsToRemove.add(docRepresentingSrcUrl); // (can add documents with just source URL, are treated differently in the core libraries) } SourceFileConfigPojo fileSystem = source.getFileConfig(); if ((null == fileSystem) && (bIsXml || bIsJson)) { fileSystem = new SourceFileConfigPojo(); } XmlToMetadataParser xmlParser = null; JsonToMetadataParser jsonParser = null; String urlType = extension; if (bIsXml) { xmlParser = new XmlToMetadataParser(fileSystem.XmlRootLevelValues, fileSystem.XmlIgnoreValues, fileSystem.XmlSourceName, fileSystem.XmlPrimaryKey, fileSystem.XmlAttributePrefix, fileSystem.XmlPreserveCase, debugMaxDocs); } //TESTED else if (bIsJson) { jsonParser = new JsonToMetadataParser(fileSystem.XmlSourceName, fileSystem.XmlRootLevelValues, fileSystem.XmlPrimaryKey, fileSystem.XmlIgnoreValues, debugMaxDocs); } //TESTED List<DocumentPojo> partials = null; try { if (bIsXml) { XMLStreamReader xmlStreamReader = null; XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); try { xmlStreamReader = factory.createXMLStreamReader(f.getInputStream()); partials = xmlParser.parseDocument(xmlStreamReader); long memUsage = xmlParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != xmlStreamReader) xmlStreamReader.close(); } } //TESTED else if (bIsJson) { JsonReader jsonReader = null; try { jsonReader = new JsonReader(new InputStreamReader(f.getInputStream(), "UTF-8")); jsonReader.setLenient(true); partials = jsonParser.parseDocument(jsonReader); long memUsage = jsonParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != jsonReader) jsonReader.close(); } } //TESTED else if (bIsLineOriented) { // Just generate a document for every line BufferedReader lineReader = null; try { lineReader = new BufferedReader(new InputStreamReader(f.getInputStream(), "UTF-8")); CsvToMetadataParser lineParser = new CsvToMetadataParser(debugMaxDocs); partials = lineParser.parseDocument(lineReader, source); long memUsage = lineParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != lineReader) lineReader.close(); } } //TESTED MessageDigest md5 = null; // (generates unique urls if the user doesn't below) try { md5 = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { // Do nothing, unlikely to happen... } int nIndex = 0; int numPartials = partials.size(); for (DocumentPojo doctoAdd : partials) { nIndex++; doctoAdd.setSource(source.getTitle()); doctoAdd.setSourceKey(source.getKey()); doctoAdd.setMediaType(source.getMediaType()); doctoAdd.setModified(new Date(fileTimestamp)); doctoAdd.setCreated(new Date()); if (null == doctoAdd.getUrl()) { // Can be set in the parser or here doctoAdd.setHasDefaultUrl(true); // (ie cannot occur in a different src URL) if (1 == numPartials) { String urlString = f.getUrlString(); if (urlString.endsWith(urlType)) { doctoAdd.setUrl(urlString); } else { doctoAdd.setUrl( new StringBuffer(urlString).append('.').append(urlType).toString()); } // (we always set sourceUrl as the true url of the file, so want to differentiate the URL with // some useful information) } else if (null == doctoAdd.getMetadata()) { // Line oriented case doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/").append(nIndex) .append('.').append(urlType).toString()); } else { if (null == md5) { // Will never happen, MD5 always exists doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(doctoAdd.getMetadata().hashCode()).append('.').append(urlType) .toString()); } else { // This is the standard call if the XML parser has not been configured to build the URL doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(DigestUtils.md5Hex(doctoAdd.getMetadata().toString())) .append('.').append(urlType).toString()); } } //TESTED } doctoAdd.setTitle(f.getName().toString()); doctoAdd.setPublishedDate(new Date(fileTimestamp)); doctoAdd.setSourceUrl(f.getUrlString()); // Always add to files because I'm deleting the source URL files.add(doctoAdd); } //TESTED } catch (XMLStreamException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (FactoryConfigurationError e1) { errors++; _context.getHarvestStatus().logMessage(e1.getMessage(), true); } catch (IOException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (Exception e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } } //(end if needs updated) } else //Tika supports Excel,Word,Powerpoint,Visio, & Outlook Documents { // (This dedup tells me if it's an add/update vs ignore - qr.isDuplicate higher up tells me if I need to add or update) if (needsUpdated_Url(modDate, f.getUrlString(), source)) { Metadata metadata = null; InputStream in = null; try { doc = new DocumentPojo(); // Create a tika object (first time only) if (null == _tika) { this.initializeTika(_context, source); } // BUGGERY // NEED TO LIKELY SET LIMIT TO BE 30MB or 50MB and BYPASS ANYTHING OVER THAT BELOW IS THE CODE TO DO THAT // tika.setMaxStringLength(30*1024*1024); // Disable the string length limit _tika.setMaxStringLength(-1); //input = new FileInputStream(new File(resourceLocation)); // Create a metadata object to contain the metadata metadata = new Metadata(); // Parse the file and get the text of the file doc.setSource(source.getTitle()); doc.setSourceKey(source.getKey()); doc.setMediaType(source.getMediaType()); String fullText = ""; in = f.getInputStream(); try { if (null == _tikaOutputFormat) { // text only fullText = _tika.parseToString(in, metadata); } //TESTED else { // XML/HMTL _tika.getParser().parse(in, _tikaOutputFormat, metadata, _tikaOutputParseContext); fullText = _tikaXmlFormatWriter.toString(); _tikaXmlFormatWriter.getBuffer().setLength(0); } //TESTED } finally { if (null != in) in.close(); } int descCap = 500; doc.setFullText(fullText); if (descCap > fullText.length()) { descCap = fullText.length(); } doc.setDescription(fullText.substring(0, descCap)); doc.setModified(new Date(fileTimestamp)); doc.setCreated(new Date()); doc.setUrl(f.getUrlString()); doc.setTitle(f.getName().toString()); doc.setPublishedDate(new Date(fileTimestamp)); long memUsage = (250L * (doc.getFullText().length() + doc.getDescription().length())) / 100L; // 25% overhead, 2x for string->byte _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); // If the metadata contains a more plausible date then use that try { String title = metadata.get(Metadata.TITLE); if (null != title) { doc.setTitle(title); } } catch (Exception e) { // Fine just carry on } try { Date date = metadata.getDate(Metadata.CREATION_DATE); // MS Word if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.DATE); // Dublin if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.ORIGINAL_DATE); if (null != date) { doc.setPublishedDate(date); } } } } catch (Exception e) { // Fine just carry on } //TESTED // If the metadata contains a geotag then apply that: try { String lat = metadata.get(Metadata.LATITUDE); String lon = metadata.get(Metadata.LONGITUDE); if ((null != lat) && (null != lon)) { GeoPojo gt = new GeoPojo(); gt.lat = Double.parseDouble(lat); gt.lon = Double.parseDouble(lon); doc.setDocGeo(gt); } } catch (Exception e) { // Fine just carry on } // Save the entire metadata: doc.addToMetadata("_FILE_METADATA_", metadata); for (ObjectId communityId : source.getCommunityIds()) { doc.setCommunityId(communityId); } files.add(doc); // Close the input stream in.close(); in = null; //TESTED } catch (SmbException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (MalformedURLException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (UnknownHostException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (IOException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (TikaException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (Exception e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } finally { // Close the input stream if an error occurs if (null != in) { try { in.close(); } catch (IOException e) { // All good, do nothing } } } // end exception handling } // end dedup check } // end XML vs "office" app //DEBUG //System.out.println("FILE=" + files.size() + " / MEM=" + _memUsage + " VS " + Runtime.getRuntime().totalMemory()); }