List of usage examples for java.util.zip GZIPOutputStream GZIPOutputStream
public GZIPOutputStream(OutputStream out, boolean syncFlush) throws IOException
From source file:com.fdwills.external.http.JsonStreamerEntity.java
@Override public void writeTo(final OutputStream out) throws IOException { if (out == null) { throw new IllegalStateException("Output stream cannot be null."); }//from ww w . ja v a 2 s . co m // Record the time when uploading started. long now = System.currentTimeMillis(); // Use GZIP compression when sending streams, otherwise just use // a buffered output stream to speed things up a bit. OutputStream os = null != contentEncoding ? new GZIPOutputStream(out, BUFFER_SIZE) : out; // Always send a JSON object. os.write('{'); // Keys used by the HashMaps. Set<String> keys = jsonParams.keySet(); boolean isFileWrapper; // Go over all keys and handle each's value. for (String key : keys) { // Evaluate the value (which cannot be null). Object value = jsonParams.get(key); // Bail out prematurely if value's null. if (value == null) { continue; } // Write the JSON object's key. os.write(escape(key)); os.write(':'); // Check if this is a FileWrapper. isFileWrapper = value instanceof RequestParams.FileWrapper; // If a file should be uploaded. if (isFileWrapper || value instanceof RequestParams.StreamWrapper) { // All uploads are sent as an object containing the file's details. os.write('{'); // Determine how to handle this entry. if (isFileWrapper) { writeToFromFile(os, (RequestParams.FileWrapper) value); } else { writeToFromStream(os, (RequestParams.StreamWrapper) value); } // End the file's object and prepare for next one. os.write('}'); } else if (value instanceof JsonValueInterface) { os.write(((JsonValueInterface) value).getEscapedJsonValue()); } else if (value instanceof org.json.JSONObject) { os.write(((org.json.JSONObject) value).toString().getBytes()); } else if (value instanceof org.json.JSONArray) { os.write(((org.json.JSONArray) value).toString().getBytes()); } else if (value instanceof Boolean) { os.write((Boolean) value ? JSON_TRUE : JSON_FALSE); } else if (value instanceof Long) { os.write((((Number) value).longValue() + "").getBytes()); } else if (value instanceof Double) { os.write((((Number) value).doubleValue() + "").getBytes()); } else if (value instanceof Float) { os.write((((Number) value).floatValue() + "").getBytes()); } else if (value instanceof Integer) { os.write((((Number) value).intValue() + "").getBytes()); } else { os.write(escape(value.toString())); } os.write(','); } // Include the elapsed time taken to upload everything. // This might be useful for somebody, but it serves us well since // there will almost always be a ',' as the last sent character. os.write(STREAM_ELAPSED); os.write(':'); long elapsedTime = System.currentTimeMillis() - now; os.write((elapsedTime + "}").getBytes()); Log.i(LOG_TAG, "Uploaded JSON in " + Math.floor(elapsedTime / 1000) + " seconds"); // Flush the contents up the stream. os.flush(); AsyncHttpClient.silentCloseOutputStream(os); }
From source file:com.lfrj.diancan.http.JsonStreamerEntity.java
@Override public void writeTo(final OutputStream out) throws IOException { if (out == null) { throw new IllegalStateException("Output stream cannot be null."); }/*from w w w . j a va 2s . co m*/ // Record the time when uploading started. long now = System.currentTimeMillis(); // Use GZIP compression when sending streams, otherwise just use // a buffered output stream to speed things up a bit. OutputStream os = null != contentEncoding ? new GZIPOutputStream(out, BUFFER_SIZE) : out; // Always send a JSON object. os.write('{'); // Keys used by the HashMaps. Set<String> keys = jsonParams.keySet(); boolean isFileWrapper; // Go over all keys and handle each's value. for (String key : keys) { // Evaluate the value (which cannot be null). Object value = jsonParams.get(key); // Bail out prematurely if value's null. if (value == null) { continue; } // Write the JSON object's key. os.write(escape(key)); os.write(':'); // Check if this is a FileWrapper. isFileWrapper = value instanceof RequestParams.FileWrapper; // If a file should be uploaded. if (isFileWrapper || value instanceof RequestParams.StreamWrapper) { // All uploads are sent as an object containing the file's // details. os.write('{'); // Determine how to handle this entry. if (isFileWrapper) { writeToFromFile(os, (RequestParams.FileWrapper) value); } else { writeToFromStream(os, (RequestParams.StreamWrapper) value); } // End the file's object and prepare for next one. os.write('}'); } else if (value instanceof JsonValueInterface) { os.write(((JsonValueInterface) value).getEscapedJsonValue()); } else if (value instanceof org.json.JSONObject) { os.write(((org.json.JSONObject) value).toString().getBytes()); } else if (value instanceof org.json.JSONArray) { os.write(((org.json.JSONArray) value).toString().getBytes()); } else if (value instanceof Boolean) { os.write((Boolean) value ? JSON_TRUE : JSON_FALSE); } else if (value instanceof Long) { os.write((((Number) value).longValue() + "").getBytes()); } else if (value instanceof Double) { os.write((((Number) value).doubleValue() + "").getBytes()); } else if (value instanceof Float) { os.write((((Number) value).floatValue() + "").getBytes()); } else if (value instanceof Integer) { os.write((((Number) value).intValue() + "").getBytes()); } else { os.write(escape(value.toString())); } os.write(','); } // Include the elapsed time taken to upload everything. // This might be useful for somebody, but it serves us well since // there will almost always be a ',' as the last sent character. os.write(STREAM_ELAPSED); os.write(':'); long elapsedTime = System.currentTimeMillis() - now; os.write((elapsedTime + "}").getBytes()); Log.i(LOG_TAG, "Uploaded JSON in " + Math.floor(elapsedTime / 1000) + " seconds"); // Flush the contents up the stream. os.flush(); AsyncHttpClient.silentCloseOutputStream(os); }
From source file:com.wen.security.http.JsonStreamerEntity.java
@Override public void writeTo(final OutputStream outstream) throws IOException { if (outstream == null) { throw new IllegalStateException("Output stream cannot be null."); }//from w w w . jav a2s .c o m // Record the time when uploading started. long now = System.currentTimeMillis(); // Keys used by the HashMaps. Set<String> keys; // Use GZIP compression when sending streams, otherwise just use // a buffered output stream to speed things up a bit. OutputStream upload; if (null != contentEncoding) { upload = new GZIPOutputStream(new BufferedOutputStream(outstream), BUFFER_SIZE); } else { upload = new BufferedOutputStream(outstream); } // Always send a JSON object. upload.write('{'); // Send the K/V values. keys = kvParams.keySet(); for (String key : keys) { // Write the JSON object's key. upload.write(escape(key)); upload.write(':'); // Evaluate the value (which cannot be null). Object value = kvParams.get(key); if (value instanceof Boolean) { upload.write((Boolean) value ? JSON_TRUE : JSON_FALSE); } else if (value instanceof Long) { upload.write((((Number) value).longValue() + "").getBytes()); } else if (value instanceof Double) { upload.write((((Number) value).doubleValue() + "").getBytes()); } else if (value instanceof Float) { upload.write((((Number) value).floatValue() + "").getBytes()); } else if (value instanceof Integer) { upload.write((((Number) value).intValue() + "").getBytes()); } else { upload.write(value.toString().getBytes()); } upload.write(','); } // Buffer used for reading from input streams. byte[] buffer = new byte[BUFFER_SIZE]; // Send the stream params. keys = streamParams.keySet(); for (String key : keys) { RequestParams.StreamWrapper entry = streamParams.get(key); // Write the JSON object's key. upload.write(escape(key)); // All uploads are sent as an object containing the file's details. upload.write(':'); upload.write('{'); // Send the streams's name. upload.write(STREAM_NAME); upload.write(':'); upload.write(escape(entry.name)); upload.write(','); // Send the streams's content type. upload.write(STREAM_TYPE); upload.write(':'); upload.write(escape(entry.contentType)); upload.write(','); // Prepare the file content's key. upload.write(STREAM_CONTENTS); upload.write(':'); upload.write('"'); // Upload the file's contents in Base64. Base64OutputStream outputStream = new Base64OutputStream(upload, Base64.NO_CLOSE | Base64.NO_WRAP); // Read from input stream until no more data's left to read. int bytesRead; while ((bytesRead = entry.inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } // Close the Base64 output stream. outputStream.close(); // End the file's object and prepare for next one. upload.write('"'); upload.write('}'); upload.write(','); } // Include the elapsed time taken to upload everything. // This might be useful for somebody, but it serves us well since // there will almost always be a ',' as the last sent character. upload.write(STREAM_ELAPSED); upload.write(':'); long elapsedTime = System.currentTimeMillis() - now; upload.write((elapsedTime + "}").getBytes()); // Flush the contents up the stream. upload.flush(); upload.close(); }
From source file:com.aoeng.degu.utils.net.asyncthhpclient.JsonStreamerEntity.java
@Override public void writeTo(final OutputStream outstream) throws IOException { if (outstream == null) { throw new IllegalStateException("Output stream cannot be null."); }// ww w. j a va 2 s . c o m // Record the time when uploading started. long now = System.currentTimeMillis(); // Keys used by the HashMaps. Set<String> keys; // Use GZIP compression when sending streams, otherwise just use // a buffered output stream to speed things up a bit. OutputStream upload; if (null != contentEncoding) { upload = new GZIPOutputStream(new BufferedOutputStream(outstream), BUFFER_SIZE); } else { upload = new BufferedOutputStream(outstream); } // Always send a JSON object. upload.write('{'); // Send the K/V values. keys = kvParams.keySet(); for (String key : keys) { // Write the JSON object's key. upload.write(escape(key)); upload.write(':'); // Evaluate the value (which cannot be null). Object value = kvParams.get(key); if (value instanceof Boolean) { upload.write((Boolean) value ? JSON_TRUE : JSON_FALSE); } else if (value instanceof Long) { upload.write((((Number) value).longValue() + "").getBytes()); } else if (value instanceof Double) { upload.write((((Number) value).doubleValue() + "").getBytes()); } else if (value instanceof Float) { upload.write((((Number) value).floatValue() + "").getBytes()); } else if (value instanceof Integer) { upload.write((((Number) value).intValue() + "").getBytes()); } else { upload.write(value.toString().getBytes()); } upload.write(','); } // Buffer used for reading from input streams. byte[] buffer = new byte[BUFFER_SIZE]; // Send the stream params. keys = streamParams.keySet(); for (String key : keys) { RequestParams.StreamWrapper entry = streamParams.get(key); // Write the JSON object's key. upload.write(escape(key)); // All uploads are sent as an object containing the file's details. upload.write(':'); upload.write('{'); // Send the streams's name. upload.write(STREAM_NAME); upload.write(':'); upload.write(escape(entry.name)); upload.write(','); // Send the streams's content type. upload.write(STREAM_TYPE); upload.write(':'); upload.write(escape(entry.contentType)); upload.write(','); // Prepare the file content's key. upload.write(STREAM_CONTENTS); upload.write(':'); upload.write('"'); // Upload the file's contents in Base64. Base64OutputStream outputStream = new Base64OutputStream(upload, Base64.NO_CLOSE | Base64.NO_WRAP); // Read from input stream until no more data's left to read. int bytesRead; while ((bytesRead = entry.inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } // Close the Base64 output stream. outputStream.close(); // End the file's object and prepare for next one. upload.write('"'); upload.write('}'); upload.write(','); } // Include the elapsed time taken to upload everything. // This might be useful for somebody, but it serves us well since // there will almost always be a ',' as the last sent character. upload.write(STREAM_ELAPSED); upload.write(':'); long elapsedTime = System.currentTimeMillis() - now; upload.write((elapsedTime + "}").getBytes()); Log.i(LOG_TAG, "Uploaded JSON in " + Math.floor(elapsedTime / 1000) + " seconds"); // Flush the contents up the stream. upload.flush(); upload.close(); }
From source file:io.ecarf.core.compress.NxGzipProcessor.java
/** * Get inflated output stream form the provided output stream * @param output//from w w w.j ava 2 s . c om * @return * @throws IOException */ private OutputStream getInflatedOutputStream(OutputStream output) throws IOException { OutputStream inflated = output; // gzip if (GzipUtils.isCompressedFilename(this.inputFile)) { inflated = new GZIPOutputStream(output, Constants.GZIP_BUF_SIZE); } // bz2 else if (BZip2Utils.isCompressedFilename(this.inputFile)) { inflated = new BZip2CompressorOutputStream(new BufferedOutputStream(output)); } return inflated; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask3.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/* w w w .j av a2s . c o m*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about Map<Term, Set<Triple>> schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(new Term(term), allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); // timestamp loop do { //List<String> inferredFiles = new ArrayList<>(); // First of all run all the queries asynchronously and remember the jobId and filename for each term for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); // add table decoration to table name String query = GenericRule.getQuery(entry.getValue(), decoratedTable); log.info("\nQuery: " + query); String jobId = cloud.startBigDataQuery(query); String encodedTerm = FileUtils.encodeFilename(term.getTerm()); String filename = Utils.TEMP_FOLDER + encodedTerm + Constants.DOT_TERMS; // remember the filename and the jobId for this query term.setFilename(filename).setJobId(jobId).setEncodedTerm(encodedTerm); } long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + start + Constants.DOT_INF; List<String> productiveTerms = new ArrayList<>(); int interimInferredTriples = 0; try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); log.info("Reasoning for Term: " + term); Set<Triple> schemaTriples = entry.getValue(); log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples)); List<String> select = GenericRule.getSelect(schemaTriples); // block and wait for each job to complete then save results to a file BigInteger rows = BigInteger.ZERO; try { rows = cloud.saveBigQueryResultsToFile(term.getJobId(), term.getFilename()).getTotalRows(); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + term.getJobId()); } log.info("Query found " + rows + ", rows"); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows, decoratedTable, writer); productiveTerms.add(term.getTerm()); interimInferredTriples += inferredTriplesCount; } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { //TODO stream smaller numbers of inferred triples //TODO try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // directly upload the data List<String> jobIds = cloud.loadLocalFilesIntoBigData(Lists.newArrayList(inferredTriplesFile), TableUtils.getBigQueryTripleTable(table), false); log.info("All inferred triples are directly loaded into Big Data table, completed jobIds: " + jobIds); } // reset empty retries emptyRetries = 0; } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } while (!(emptyRetries == maxRetries)); // end timestamp loop log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask4.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/* w ww . ja v a2s. com*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about Map<Term, Set<Triple>> schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(new Term(term), allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); // timestamp loop do { //List<String> inferredFiles = new ArrayList<>(); // First of all run all the queries asynchronously and remember the jobId and filename for each term for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); // add table decoration to table name String query = GenericRule.getQuery(entry.getValue(), decoratedTable); log.info("\nQuery: " + query); String jobId = cloud.startBigDataQuery(query); String encodedTerm = FileUtils.encodeFilename(term.getTerm()); String filename = Utils.TEMP_FOLDER + encodedTerm + Constants.DOT_TERMS; // remember the filename and the jobId for this query term.setFilename(filename).setJobId(jobId).setEncodedTerm(encodedTerm); } long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; List<String> productiveTerms = new ArrayList<>(); int interimInferredTriples = 0; try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); log.info("Reasoning for Term: " + term); Set<Triple> schemaTriples = entry.getValue(); log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples)); List<String> select = GenericRule.getSelect(schemaTriples); // block and wait for each job to complete then save results to a file BigInteger rows = BigInteger.ZERO; try { rows = cloud.saveBigQueryResultsToFile(term.getJobId(), term.getFilename()).getTotalRows(); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + term.getJobId()); } log.info("Query found " + rows + ", rows"); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows, decoratedTable, writer); productiveTerms.add(term.getTerm()); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { //TODO stream smaller numbers of inferred triples //TODO try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } while (!(emptyRetries == maxRetries)); // end timestamp loop log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/*from w w w .ja v a 2 s . c om*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about Map<Term, Set<Triple>> schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(new Term(term), allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); // timestamp loop do { List<String> productiveTerms = new ArrayList<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<Callable<Void>> queryTasks = new ArrayList<>(); List<Callable<Void>> saveTasks = new ArrayList<>(); for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); Set<Triple> triples = entry.getValue(); QuerySubTask queryTask = new QuerySubTask(term, triples, decoratedTable, cloud); queryTasks.add(queryTask); SaveResultsSubTask saveTask = new SaveResultsSubTask(term, cloud); saveTasks.add(saveTask); } // invoke all the queries in parallel this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel this.invokeAll(saveTasks); try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); BigInteger rows = term.getRows(); this.totalBytes = this.totalBytes + term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); log.info("Reasoning for Term: " + term); Set<Triple> schemaTriples = entry.getValue(); log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples)); List<String> select = GenericRule.getSelect(schemaTriples); int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows, decoratedTable, writer); productiveTerms.add(term.getTerm()); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping term as no data found: " + term); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:com.thoughtworks.go.server.websocket.ConsoleLogSender.java
byte[] maybeGzipIfLargeEnough(byte[] input) { if (input.length < 512) { return input; }/* w w w.j av a 2s.c om*/ // To avoid having to re-allocate the internal byte array, allocate an initial buffer assuming a safe 10:1 compression ratio final ByteArrayOutputStream gzipBytes = new ByteArrayOutputStream(input.length / 10); try { final GZIPOutputStream gzipOutputStream = new GZIPOutputStream(gzipBytes, 1024 * 8); gzipOutputStream.write(input); gzipOutputStream.close(); } catch (IOException e) { LOGGER.error("Could not gzip {}", input); } return gzipBytes.toByteArray(); }