List of usage examples for java.io File length
public long length()
From source file:MainClass.java
public static void main(String args[]) { File f1 = new File("MainClass.java"); System.out.println("File Name:" + f1.getName()); System.out.println("Path:" + f1.getPath()); System.out.println("Abs Path:" + f1.getAbsolutePath()); System.out.println("Parent:" + f1.getParent()); System.out.println(f1.exists() ? "exists" : "does not exist"); System.out.println(f1.canWrite() ? "is writeable" : "is not writeable"); System.out.println(f1.canRead() ? "is readable" : "is not readable"); System.out.println("is a directory" + f1.isDirectory()); System.out.println(f1.isFile() ? "is normal file" : "might be a named pipe"); System.out.println(f1.isAbsolute() ? "is absolute" : "is not absolute"); System.out.println("File last modified:" + f1.lastModified()); System.out.println("File size:" + f1.length() + " Bytes"); }
From source file:FileDemo.java
public static void main(String args[]) { File f1 = new File("/java/COPYRIGHT"); System.out.println("File Name: " + f1.getName()); System.out.println("Path: " + f1.getPath()); System.out.println("Abs Path: " + f1.getAbsolutePath()); System.out.println("Parent: " + f1.getParent()); System.out.println(f1.exists() ? "exists" : "does not exist"); System.out.println(f1.canWrite() ? "is writeable" : "is not writeable"); System.out.println(f1.canRead() ? "is readable" : "is not readable"); System.out.println("is " + (f1.isDirectory() ? "" : "not" + " a directory")); System.out.println(f1.isFile() ? "is normal file" : "might be a named pipe"); System.out.println(f1.isAbsolute() ? "is absolute" : "is not absolute"); System.out.println("File last modified: " + f1.lastModified()); System.out.println("File size: " + f1.length() + " Bytes"); }
From source file:com.aerospike.load.AerospikeLoad.java
public static void main(String[] args) throws IOException { Thread statPrinter = new Thread(new PrintStat(counters)); try {//from w w w . j a v a2 s . c o m log.info("Aerospike loader started"); Options options = new Options(); options.addOption("h", "host", true, "Server hostname (default: localhost)"); options.addOption("p", "port", true, "Server port (default: 3000)"); options.addOption("n", "namespace", true, "Namespace (default: test)"); options.addOption("s", "set", true, "Set name. (default: null)"); options.addOption("c", "config", true, "Column definition file name"); options.addOption("wt", "write-threads", true, "Number of writer threads (default: Number of cores * 5)"); options.addOption("rt", "read-threads", true, "Number of reader threads (default: Number of cores * 1)"); options.addOption("l", "rw-throttle", true, "Throttling of reader to writer(default: 10k) "); options.addOption("tt", "transaction-timeout", true, "write transaction timeout in miliseconds(default: No timeout)"); options.addOption("et", "expiration-time", true, "Expiration time of records in seconds (default: never expire)"); options.addOption("T", "timezone", true, "Timezone of source where data dump is taken (default: local timezone)"); options.addOption("ec", "abort-error-count", true, "Error count to abort (default: 0)"); options.addOption("wa", "write-action", true, "Write action if key already exists (default: update)"); options.addOption("v", "verbose", false, "Logging all"); options.addOption("u", "usage", false, "Print usage."); CommandLineParser parser = new PosixParser(); CommandLine cl = parser.parse(options, args, false); if (args.length == 0 || cl.hasOption("u")) { logUsage(options); return; } if (cl.hasOption("l")) { rwThrottle = Integer.parseInt(cl.getOptionValue("l")); } else { rwThrottle = Constants.READLOAD; } // Get all command line options params = Utils.parseParameters(cl); //Get client instance AerospikeClient client = new AerospikeClient(params.host, params.port); if (!client.isConnected()) { log.error("Client is not able to connect:" + params.host + ":" + params.port); return; } if (params.verbose) { log.setLevel(Level.DEBUG); } // Get available processors to calculate default number of threads int cpus = Runtime.getRuntime().availableProcessors(); nWriterThreads = cpus * scaleFactor; nReaderThreads = cpus; // Get writer thread count if (cl.hasOption("wt")) { nWriterThreads = Integer.parseInt(cl.getOptionValue("wt")); nWriterThreads = (nWriterThreads > 0 ? (nWriterThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nWriterThreads) : 1); log.debug("Using writer Threads: " + nWriterThreads); } writerPool = Executors.newFixedThreadPool(nWriterThreads); // Get reader thread count if (cl.hasOption("rt")) { nReaderThreads = Integer.parseInt(cl.getOptionValue("rt")); nReaderThreads = (nReaderThreads > 0 ? (nReaderThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nReaderThreads) : 1); log.debug("Using reader Threads: " + nReaderThreads); } String columnDefinitionFileName = cl.getOptionValue("c", null); log.debug("Column definition files/directory: " + columnDefinitionFileName); if (columnDefinitionFileName == null) { log.error("Column definition files/directory not specified. use -c <file name>"); return; } File columnDefinitionFile = new File(columnDefinitionFileName); if (!columnDefinitionFile.exists()) { log.error("Column definition files/directory does not exist: " + Utils.getFileName(columnDefinitionFileName)); return; } // Get data file list String[] files = cl.getArgs(); if (files.length == 0) { log.error("No data file Specified: add <file/dir name> to end of the command "); return; } List<String> allFileNames = new ArrayList<String>(); allFileNames = Utils.getFileNames(files); if (allFileNames.size() == 0) { log.error("Given datafiles/directory does not exist"); return; } for (int i = 0; i < allFileNames.size(); i++) { log.debug("File names:" + Utils.getFileName(allFileNames.get(i))); File file = new File(allFileNames.get(i)); counters.write.recordTotal = counters.write.recordTotal + file.length(); } //remove column definition file from list allFileNames.remove(columnDefinitionFileName); log.info("Number of data files:" + allFileNames.size()); /** * Process column definition file to get meta data and bin mapping. */ metadataColumnDefs = new ArrayList<ColumnDefinition>(); binColumnDefs = new ArrayList<ColumnDefinition>(); metadataConfigs = new HashMap<String, String>(); if (Parser.processJSONColumnDefinitions(columnDefinitionFile, metadataConfigs, metadataColumnDefs, binColumnDefs, params)) { log.info("Config file processed."); } else { throw new Exception("Config file parsing Error"); } // Add metadata of config to parameters String metadata; if ((metadata = metadataConfigs.get(Constants.INPUT_TYPE)) != null) { params.fileType = metadata; if (params.fileType.equals(Constants.CSV_FILE)) { // Version check metadata = metadataConfigs.get(Constants.VERSION); String[] vNumber = metadata.split("\\."); int v1 = Integer.parseInt(vNumber[0]); int v2 = Integer.parseInt(vNumber[1]); if ((v1 <= Constants.MajorV) && (v2 <= Constants.MinorV)) { log.debug("Config version used:" + metadata); } else throw new Exception("\"" + Constants.VERSION + ":" + metadata + "\" is not Supported"); // Set delimiter if ((metadata = metadataConfigs.get(Constants.DELIMITER)) != null && metadata.length() == 1) { params.delimiter = metadata.charAt(0); } else { log.warn("\"" + Constants.DELIMITER + ":" + metadata + "\" is not properly specified in config file. Default is ','"); } if ((metadata = metadataConfigs.get(Constants.IGNORE_FIRST_LINE)) != null) { params.ignoreFirstLine = metadata.equals("true"); } else { log.warn("\"" + Constants.IGNORE_FIRST_LINE + ":" + metadata + "\" is not properly specified in config file. Default is false"); } if ((metadata = metadataConfigs.get(Constants.COLUMNS)) != null) { counters.write.colTotal = Integer.parseInt(metadata); } else { throw new Exception("\"" + Constants.COLUMNS + ":" + metadata + "\" is not properly specified in config file"); } } else { throw new Exception("\"" + params.fileType + "\" is not supported in config file"); } } else { throw new Exception("\"" + Constants.INPUT_TYPE + "\" is not specified in config file"); } // add config input to column definitions if (params.fileType.equals(Constants.CSV_FILE)) { List<String> binName = null; if (params.ignoreFirstLine) { String line; BufferedReader br = new BufferedReader( new InputStreamReader(new FileInputStream(allFileNames.get(0)), "UTF8")); if ((line = br.readLine()) != null) { binName = Parser.getCSVRawColumns(line, params.delimiter); br.close(); if (binName.size() != counters.write.colTotal) { throw new Exception("Number of column in config file and datafile are mismatch." + " Datafile: " + Utils.getFileName(allFileNames.get(0)) + " Configfile: " + Utils.getFileName(columnDefinitionFileName)); } } } //update columndefs for metadata for (int i = 0; i < metadataColumnDefs.size(); i++) { if (metadataColumnDefs.get(i).staticValue) { } else { if (metadataColumnDefs.get(i).binValuePos < 0) { if (metadataColumnDefs.get(i).columnName == null) { if (metadataColumnDefs.get(i).jsonPath == null) { log.error("dynamic metadata having improper info" + metadataColumnDefs.toString()); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.indexOf(metadataColumnDefs.get(i).binValueHeader) != -1) { metadataColumnDefs.get(i).binValuePos = binName .indexOf(metadataColumnDefs.get(i).binValueHeader); } else { throw new Exception("binName missing in data file:" + metadataColumnDefs.get(i).binValueHeader); } } } } else { if (params.ignoreFirstLine) metadataColumnDefs.get(i).binValueHeader = binName .get(metadataColumnDefs.get(i).binValuePos); } } if ((!metadataColumnDefs.get(i).staticValue) && (metadataColumnDefs.get(i).binValuePos < 0)) { throw new Exception("Information for bin mapping is missing in config file:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).srcType == null) { throw new Exception( "Source data type is not properly mentioned:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).binNameHeader == Constants.SET && !metadataColumnDefs.get(i).srcType.equals(SrcColumnType.STRING)) { throw new Exception("Set name should be string type:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).binNameHeader.equalsIgnoreCase(Constants.SET) && params.set != null) { throw new Exception( "Set name is given both in config file and commandline. Provide only once."); } } //update columndefs for bins for (int i = 0; i < binColumnDefs.size(); i++) { if (binColumnDefs.get(i).staticName) { } else { if (binColumnDefs.get(i).binNamePos < 0) { if (binColumnDefs.get(i).columnName == null) { if (binColumnDefs.get(i).jsonPath == null) { log.error("dynamic bin having improper info"); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.indexOf(binColumnDefs.get(i).binNameHeader) != -1) { binColumnDefs.get(i).binNamePos = binName .indexOf(binColumnDefs.get(i).binNameHeader); } else { throw new Exception("binName missing in data file:" + binColumnDefs.get(i).binNameHeader); } } } } else { if (params.ignoreFirstLine) binColumnDefs.get(i).binNameHeader = binName.get(binColumnDefs.get(i).binNamePos); } } if (binColumnDefs.get(i).staticValue) { } else { if (binColumnDefs.get(i).binValuePos < 0) { if (binColumnDefs.get(i).columnName == null) { if (binColumnDefs.get(i).jsonPath == null) { log.error("dynamic bin having improper info"); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.contains(binColumnDefs.get(i).binValueHeader)) { binColumnDefs.get(i).binValuePos = binName .indexOf(binColumnDefs.get(i).binValueHeader); } else if (!binColumnDefs.get(i).binValueHeader.toLowerCase() .equals(Constants.SYSTEM_TIME)) { throw new Exception("Wrong column name mentioned in config file:" + binColumnDefs.get(i).binValueHeader); } } } } else { if (params.ignoreFirstLine) binColumnDefs.get(i).binValueHeader = binName.get(binColumnDefs.get(i).binValuePos); } //check for missing entries in config file if (binColumnDefs.get(i).binValuePos < 0 && binColumnDefs.get(i).binValueHeader == null) { throw new Exception("Information missing(Value header or bin mapping) in config file:" + binColumnDefs.get(i)); } //check for proper data type in config file. if (binColumnDefs.get(i).srcType == null) { throw new Exception( "Source data type is not properly mentioned:" + binColumnDefs.get(i)); } //check for valid destination type if ((binColumnDefs.get(i).srcType.equals(SrcColumnType.TIMESTAMP) || binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB)) && binColumnDefs.get(i).dstType == null) { throw new Exception("Destination type is not mentioned: " + binColumnDefs.get(i)); } //check for encoding if (binColumnDefs.get(i).dstType != null && binColumnDefs.get(i).encoding == null) { throw new Exception( "Encoding is not given for src-dst type conversion:" + binColumnDefs.get(i)); } //check for valid encoding if (binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB) && !binColumnDefs.get(i).encoding.equals(Constants.HEX_ENCODING)) { throw new Exception("Wrong encoding for blob data:" + binColumnDefs.get(i)); } } //Check static bin name mapped to dynamic bin value if ((binColumnDefs.get(i).binNamePos == binColumnDefs.get(i).binValuePos) && (binColumnDefs.get(i).binNamePos != -1)) { throw new Exception("Static bin name mapped to dynamic bin value:" + binColumnDefs.get(i)); } //check for missing entries in config file if (binColumnDefs.get(i).binNameHeader == null && binColumnDefs.get(i).binNameHeader.length() > Constants.BIN_NAME_LENGTH) { throw new Exception("Information missing binName or large binName in config file:" + binColumnDefs.get(i)); } } } log.info(params.toString()); log.debug("MetadataConfig:" + metadataColumnDefs); log.debug("BinColumnDefs:" + binColumnDefs); // Start PrintStat thread statPrinter.start(); // Reader pool size ExecutorService readerPool = Executors.newFixedThreadPool( nReaderThreads > allFileNames.size() ? allFileNames.size() : nReaderThreads); log.info("Reader pool size : " + nReaderThreads); // Submit all tasks to writer threadpool. for (String aFile : allFileNames) { log.debug("Submitting task for: " + aFile); readerPool.submit(new AerospikeLoad(aFile, client, params)); } // Wait for reader pool to complete readerPool.shutdown(); log.info("Shutdown down reader thread pool"); while (!readerPool.isTerminated()) ; //readerPool.awaitTermination(20, TimeUnit.MINUTES); log.info("Reader thread pool terminated"); // Wait for writer pool to complete after getting all tasks from reader pool writerPool.shutdown(); log.info("Shutdown down writer thread pool"); while (!writerPool.isTerminated()) ; log.info("Writer thread pool terminated"); // Print final statistic of aerospike-loader. log.info("Final Statistics of importer: (Succesfull Writes = " + counters.write.writeCount.get() + ", " + "Errors=" + (counters.write.writeErrors.get() + counters.write.readErrors.get() + counters.write.processingErrors.get()) + "(" + (counters.write.writeErrors.get()) + "-Write," + counters.write.readErrors.get() + "-Read," + counters.write.processingErrors.get() + "-Processing)"); } catch (Exception e) { log.error(e); if (log.isDebugEnabled()) { e.printStackTrace(); } } finally { // Stop statistic printer thread. statPrinter.interrupt(); log.info("Aerospike loader completed"); } }
From source file:de.unileipzig.ub.indexer.App.java
public static void main(String[] args) throws IOException { // create Options object Options options = new Options(); options.addOption("h", "help", false, "display this help"); options.addOption("f", "filename", true, "name of the JSON file whose content should be indexed"); options.addOption("i", "index", true, "the name of the target index"); options.addOption("d", "doctype", true, "the name of the doctype (title, local, ...)"); options.addOption("t", "host", true, "elasticsearch hostname (default: 0.0.0.0)"); options.addOption("p", "port", true, "transport port (that's NOT the http port, default: 9300)"); options.addOption("c", "cluster", true, "cluster name (default: elasticsearch_mdma)"); options.addOption("b", "bulksize", true, "number of docs sent in one request (default: 3000)"); options.addOption("v", "verbose", false, "show processing speed while indexing"); options.addOption("s", "status", false, "only show status of index for file"); options.addOption("r", "repair", false, "attempt to repair recoverable inconsistencies on the go"); options.addOption("e", "debug", false, "set logging level to debug"); options.addOption("l", "logfile", true, "logfile - in not specified only log to stdout"); options.addOption("m", "memcached", true, "host and port of memcached (default: localhost:11211)"); options.addOption("z", "latest-flag-on", true, "enable latest flag according to field (within content, e.g. 001)"); options.addOption("a", "flat", false, "flat-mode: do not check for inconsistencies"); CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/*w ww .jav a 2 s . c o m*/ try { cmd = parser.parse(options, args); } catch (ParseException ex) { logger.error(ex); System.exit(1); } // setup logging Properties systemProperties = System.getProperties(); systemProperties.put("net.spy.log.LoggerImpl", "net.spy.memcached.compat.log.Log4JLogger"); System.setProperties(systemProperties); Logger.getLogger("net.spy.memcached").setLevel(Level.ERROR); Properties props = new Properties(); props.load(props.getClass().getResourceAsStream("/log4j.properties")); if (cmd.hasOption("debug")) { props.setProperty("log4j.logger.de.unileipzig", "DEBUG"); } if (cmd.hasOption("logfile")) { props.setProperty("log4j.rootLogger", "INFO, stdout, F"); props.setProperty("log4j.appender.F", "org.apache.log4j.FileAppender"); props.setProperty("log4j.appender.F.File", cmd.getOptionValue("logfile")); props.setProperty("log4j.appender.F.layout", "org.apache.log4j.PatternLayout"); props.setProperty("log4j.appender.F.layout.ConversionPattern", "%5p | %d | %F | %L | %m%n"); } PropertyConfigurator.configure(props); InetAddress addr = InetAddress.getLocalHost(); String memcachedHostAndPort = addr.getHostAddress() + ":11211"; if (cmd.hasOption("m")) { memcachedHostAndPort = cmd.getOptionValue("m"); } // setup caching try { if (memcachedClient == null) { memcachedClient = new MemcachedClient( new ConnectionFactoryBuilder().setFailureMode(FailureMode.Cancel).build(), AddrUtil.getAddresses("0.0.0.0:11211")); try { // give client and server 500ms Thread.sleep(300); } catch (InterruptedException ex) { } Collection availableServers = memcachedClient.getAvailableServers(); logger.info(availableServers); if (availableServers.size() == 0) { logger.info("no memcached servers found"); memcachedClient.shutdown(); memcachedClient = null; } else { logger.info(availableServers.size() + " memcached server(s) detected, fine."); } } } catch (IOException ex) { logger.warn("couldn't create a connection, bailing out: " + ex.getMessage()); } // process options if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("indexer", options, true); quit(0); } boolean verbose = false; if (cmd.hasOption("verbose")) { verbose = true; } // ES options String[] hosts = new String[] { "0.0.0.0" }; int port = 9300; String clusterName = "elasticsearch_mdma"; int bulkSize = 3000; if (cmd.hasOption("host")) { hosts = cmd.getOptionValues("host"); } if (cmd.hasOption("port")) { port = Integer.parseInt(cmd.getOptionValue("port")); } if (cmd.hasOption("cluster")) { clusterName = cmd.getOptionValue("cluster"); } if (cmd.hasOption("bulksize")) { bulkSize = Integer.parseInt(cmd.getOptionValue("bulksize")); if (bulkSize < 1 || bulkSize > 100000) { logger.error("bulksize must be between 1 and 100,000"); quit(1); } } // ES Client final Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "elasticsearch_mdma") .build(); final TransportClient client = new TransportClient(settings); for (String host : hosts) { client.addTransportAddress(new InetSocketTransportAddress(host, port)); } if (cmd.hasOption("filename") && cmd.hasOption("index") && cmd.hasOption("doctype")) { final String filename = cmd.getOptionValue("filename"); final File _file = new File(filename); if (_file.length() == 0) { logger.info(_file.getAbsolutePath() + " is empty, skipping"); quit(0); // file is empty } // for flat mode: leave a stampfile beside the json to // indicate previous successful processing File directory = new File(filename).getParentFile(); File stampfile = new File(directory, DigestUtils.shaHex(filename) + ".indexed"); long start = System.currentTimeMillis(); long lineCount = 0; final String indexName = cmd.getOptionValue("index"); final String docType = cmd.getOptionValue("doctype"); BulkRequestBuilder bulkRequest = client.prepareBulk(); try { if (cmd.hasOption("flat")) { // flat mode // ......... if (stampfile.exists()) { logger.info("SKIPPING, since it seems this file has already " + "been imported (found: " + stampfile.getAbsolutePath() + ")"); quit(0); } } else { final String srcSHA1 = extractSrcSHA1(filename); logger.debug(filename + " srcsha1: " + srcSHA1); long docsInIndex = getIndexedRecordCount(client, indexName, srcSHA1); logger.debug(filename + " indexed: " + docsInIndex); long docsInFile = getLineCount(filename); logger.debug(filename + " lines: " + docsInFile); // in non-flat-mode, indexing would take care // of inconsistencies if (docsInIndex == docsInFile) { logger.info("UP-TO DATE: " + filename + " (" + docsInIndex + ", " + srcSHA1 + ")"); client.close(); quit(0); } if (docsInIndex > 0) { logger.warn("INCONSISTENCY DETECTED: " + filename + ": indexed:" + docsInIndex + " lines:" + docsInFile); if (!cmd.hasOption("r")) { logger.warn( "Please re-run indexer with --repair flag or delete residues first with: $ curl -XDELETE " + hosts[0] + ":9200/" + indexName + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'"); client.close(); quit(1); } else { logger.info("Attempting to clear residues..."); // attempt to repair once DeleteByQueryResponse dbqr = client.prepareDeleteByQuery(indexName) .setQuery(termQuery("meta.srcsha1", srcSHA1)).execute().actionGet(); Iterator<IndexDeleteByQueryResponse> it = dbqr.iterator(); long deletions = 0; while (it.hasNext()) { IndexDeleteByQueryResponse response = it.next(); deletions += 1; } logger.info("Deleted residues of " + filename); logger.info("Refreshing [" + indexName + "]"); RefreshResponse refreshResponse = client.admin().indices() .refresh(new RefreshRequest(indexName)).actionGet(); long indexedAfterDelete = getIndexedRecordCount(client, indexName, srcSHA1); logger.info(indexedAfterDelete + " docs remained"); if (indexedAfterDelete > 0) { logger.warn("Not all residues cleaned. Try to fix this manually: $ curl -XDELETE " + hosts[0] + ":9200/" + indexName + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'"); quit(1); } else { logger.info("Residues are gone. Now trying to reindex: " + filename); } } } } logger.info("INDEXING-REQUIRED: " + filename); if (cmd.hasOption("status")) { quit(0); } HashSet idsInBatch = new HashSet(); String idField = null; if (cmd.hasOption("z")) { idField = cmd.getOptionValue("z"); } final FileReader fr = new FileReader(filename); final BufferedReader br = new BufferedReader(fr); String line; // one line is one document while ((line = br.readLine()) != null) { // "Latest-Flag" machine // This gets obsolete with a "flat" index if (cmd.hasOption("z")) { // flag that indicates, whether the document // about to be indexed will be the latest boolean willBeLatest = true; // check if there is a previous (lower meta.timestamp) document with // the same identifier (whatever that may be - queried under "content") final String contentIdentifier = getContentIdentifier(line, idField); idsInBatch.add(contentIdentifier); // assumed in meta.timestamp final Long timestamp = Long.parseLong(getTimestamp(line)); logger.debug("Checking whether record is latest (line: " + lineCount + ")"); logger.debug(contentIdentifier + ", " + timestamp); // get all docs, which match the contentIdentifier // by filter, which doesn't score final TermFilterBuilder idFilter = new TermFilterBuilder("content." + idField, contentIdentifier); final TermFilterBuilder kindFilter = new TermFilterBuilder("meta.kind", docType); final AndFilterBuilder afb = new AndFilterBuilder(); afb.add(idFilter).add(kindFilter); final FilteredQueryBuilder fb = filteredQuery(matchAllQuery(), afb); final SearchResponse searchResponse = client.prepareSearch(indexName) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(fb).setFrom(0) .setSize(1200) // 3 years and 105 days assuming daily updates at the most .setExplain(false).execute().actionGet(); final SearchHits searchHits = searchResponse.getHits(); logger.debug("docs with this id in the index: " + searchHits.getTotalHits()); for (final SearchHit hit : searchHits.getHits()) { final String docId = hit.id(); final Map<String, Object> source = hit.sourceAsMap(); final Map meta = (Map) source.get("meta"); final Long docTimestamp = Long.parseLong(meta.get("timestamp").toString()); // if the indexed doc timestamp is lower the the current one, // remove any latest flag if (timestamp >= docTimestamp) { source.remove("latest"); final ObjectMapper mapper = new ObjectMapper(); // put the updated doc back // IndexResponse response = client.prepareIndex(indexName, docType).setCreate(false).setId(docId) .setSource(mapper.writeValueAsBytes(source)) .execute(new ActionListener<IndexResponse>() { public void onResponse(IndexResponse rspns) { logger.debug("Removed latest flag from " + contentIdentifier + ", " + docTimestamp + ", " + hit.id() + " since (" + timestamp + " > " + docTimestamp + ")"); } public void onFailure(Throwable thrwbl) { logger.error("Could not remove flag from " + hit.id() + ", " + contentIdentifier); } }); // .execute() //.actionGet(); } else { logger.debug("Doc " + hit.id() + " is newer (" + docTimestamp + ")"); willBeLatest = false; } } if (willBeLatest) { line = setLatestFlag(line); logger.info("Setting latest flag on " + contentIdentifier + ", " + timestamp); } // end of latest-flag machine // beware - this will be correct as long as there // are no dups within one bulk! } bulkRequest.add(client.prepareIndex(indexName, docType).setSource(line)); lineCount++; logger.debug("Added line " + lineCount + " to BULK"); logger.debug(line); if (lineCount % bulkSize == 0) { if (idsInBatch.size() != bulkSize && cmd.hasOption("z")) { logger.error( "This batch has duplications in the ID. That's not bad for the index, just makes the latest flag fuzzy"); logger.error( "Bulk size was: " + bulkSize + ", but " + idsInBatch.size() + " IDs (only)"); } idsInBatch.clear(); logger.debug("Issuing BULK request"); final long actionCount = bulkRequest.numberOfActions(); final BulkResponse bulkResponse = bulkRequest.execute().actionGet(); final long tookInMillis = bulkResponse.getTookInMillis(); if (bulkResponse.hasFailures()) { logger.fatal("FAILED, bulk not indexed. exiting now."); Iterator<BulkItemResponse> it = bulkResponse.iterator(); while (it.hasNext()) { BulkItemResponse bir = it.next(); if (bir.isFailed()) { Failure failure = bir.getFailure(); logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage() + ", type: " + failure.getType() + ", index: " + failure.getIndex()); } } quit(1); } else { if (verbose) { final double elapsed = System.currentTimeMillis() - start; final double speed = (lineCount / elapsed * 1000); logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)"); } } bulkRequest = client.prepareBulk(); } } // handle the remaining items final long actionCount = bulkRequest.numberOfActions(); if (actionCount > 0) { final BulkResponse bulkResponse = bulkRequest.execute().actionGet(); final long tookInMillis = bulkResponse.getTookInMillis(); if (bulkResponse.hasFailures()) { logger.fatal("FAILED, bulk not indexed. exiting now."); Iterator<BulkItemResponse> it = bulkResponse.iterator(); while (it.hasNext()) { BulkItemResponse bir = it.next(); if (bir.isFailed()) { Failure failure = bir.getFailure(); logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage() + ", type: " + failure.getType() + ", index: " + failure.getIndex()); } } quit(1); } else { // trigger update now RefreshResponse refreshResponse = client.admin().indices() .refresh(new RefreshRequest(indexName)).actionGet(); if (verbose) { final double elapsed = System.currentTimeMillis() - start; final double speed = (lineCount / elapsed * 1000); logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)"); } } } br.close(); client.close(); final double elapsed = (System.currentTimeMillis() - start) / 1000; final double speed = (lineCount / elapsed); logger.info("indexing (" + filename + ") " + lineCount + " docs took " + elapsed + "s (speed: " + String.format("%.2f", speed) + "r/s)"); if (cmd.hasOption("flat")) { try { FileUtils.touch(stampfile); } catch (IOException ioe) { logger.warn(".indexed files not created. Will reindex everything everytime."); } } } catch (IOException e) { client.close(); logger.error(e); quit(1); } finally { client.close(); } } quit(0); }
From source file:com.linkedin.pinot.perf.MultiValueReaderWriterBenchmark.java
public static void main(String[] args) throws Exception { List<String> lines = IOUtils.readLines(new FileReader(new File(args[0]))); int totalDocs = lines.size(); int max = Integer.MIN_VALUE; int maxNumberOfMultiValues = Integer.MIN_VALUE; int totalNumValues = 0; int data[][] = new int[totalDocs][]; for (int i = 0; i < lines.size(); i++) { String line = lines.get(i); String[] split = line.split(","); totalNumValues = totalNumValues + split.length; if (split.length > maxNumberOfMultiValues) { maxNumberOfMultiValues = split.length; }// w w w . ja v a 2 s. c o m data[i] = new int[split.length]; for (int j = 0; j < split.length; j++) { String token = split[j]; int val = Integer.parseInt(token); data[i][j] = val; if (val > max) { max = val; } } } int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2)); int size = 2048; int[] offsets = new int[size]; int bitMapSize = 0; File outputFile = new File("output.mv.fwd"); FixedBitSkipListSCMVWriter fixedBitSkipListSCMVWriter = new FixedBitSkipListSCMVWriter(outputFile, totalDocs, totalNumValues, maxBitsNeeded); for (int i = 0; i < totalDocs; i++) { fixedBitSkipListSCMVWriter.setIntArray(i, data[i]); if (i % size == size - 1) { MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bos); rr1.serialize(dos); dos.close(); //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size()); bitMapSize += bos.size(); } else if (i == totalDocs - 1) { MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size)); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bos); rr1.serialize(dos); dos.close(); //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size()); bitMapSize += bos.size(); } } fixedBitSkipListSCMVWriter.close(); System.out.println("Output file size:" + outputFile.length()); System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs); System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues); System.out.println("chunk size\t\t\t\t:" + size); System.out.println("Num chunks\t\t\t\t:" + totalDocs / size); int numChunks = totalDocs / size + 1; int totalBits = (totalNumValues * maxBitsNeeded); int dataSizeinBytes = (totalBits + 7) / 8; System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes); System.out.println("\nPer encoding size"); System.out.println(); System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes)); System.out.println(); System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes)); System.out.println(); System.out.println("bitMapSize\t\t\t\t:" + bitMapSize); System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes)); System.out.println(); System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8); System.out.println("size (with custom bitset)\t\t\t:" + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes)); }
From source file:edu.stanford.epad.common.pixelmed.SegmentationObjectsFileWriter.java
/** * This demo gets segmentation maps from map_file, then inserts the maps twice as two segments. * //from w ww . ja v a2 s . co m * @param args: java SegmentationObjectsFileWriter dicom_file map_file output_file mode mode is "BINARY" or * "FRACTIONAL". */ public static void main(String[] args) { String input_file = args[0];// "./DicomFiles/CT0010";// String map_file = args[1];// "./TEMP/CT0010.mapbin";// String output_file = args[2];// "./TEMP/CT0010.sobin";//"segmentation_test_out.dcm"; // String mode = args[3]; //"BINARY"; byte[] pixels = null; SegmentationObjectsFileWriter obj = null; short image_width = 0, image_height = 0, image_frames = 0; // Read pixel array from the map_file. File file = new File(map_file); DataInputStream dis = null; pixels = new byte[(int) file.length()]; try { dis = new DataInputStream((new FileInputStream(file))); dis.readFully(pixels); dis.close(); } catch (Exception e1) { e1.printStackTrace(); } finally { IOUtils.closeQuietly(dis); } try { DicomInputStream i_stream = new DicomInputStream(new FileInputStream(input_file)); AttributeList list = new AttributeList(); list.read(i_stream); { // Get sequence format. image_width = (short) Attribute.getSingleIntegerValueOrDefault(list, TagFromName.Columns, 1); image_height = (short) Attribute.getSingleIntegerValueOrDefault(list, TagFromName.Rows, 1); image_frames = (short) Attribute.getSingleIntegerValueOrDefault(list, TagFromName.NumberOfFrames, 1); } short[] orientation = new short[] { 1, 0, 0, 0, 0, 1 }; double[] spacing = new double[] { 0.65, 0.8 }; double thickness = 0.5; AttributeList[] lists = new AttributeList[1]; lists[0] = list; obj = new SegmentationObjectsFileWriter(lists, orientation, spacing, thickness); CodedConcept category = new CodedConcept("C0085089" /* conceptUniqueIdentifier */, "260787004" /* SNOMED CID */, "SRT" /* codingSchemeDesignator */, "SNM3" /* legacyCodingSchemeDesignator */, null /* codingSchemeVersion */, "A-00004" /* codeValue */, "Physical Object" /* codeMeaning */, null /* codeStringEquivalent */, null /* synonynms */); CodedConcept type = new CodedConcept("C0018787" /* conceptUniqueIdentifier */, "80891009" /* SNOMED CID */, "SRT" /* codingSchemeDesignator */, null /* legacyCodingSchemeDesignator */, null /* codingSchemeVersion */, "T-32000" /* codeValue */, "Heart" /* codeMeaning */, null /* codeStringEquivalent */, null /* synonynms */); double[][] positions = new double[image_frames][3]; /* * void AddAllFrames(byte [] frames, int frame_num, int image_width, int image_height, String type, double [][] * positions, short [] patient_orientation, double slice_thickness, double [] pixel_spacing, short stack_id) */ // Segment 1 obj.addOneSegment("Segment No.1 is for ...", category, type); obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", (short) 0, positions); short stack_id = 2; positions[0][0] = 0.2; obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", stack_id, positions); // Segment 2 // obj.AddOneSegment(null, null, null); obj.addOneSegment("Segment No.2 is for ...", category, type); obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", (short) 1, positions); obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", stack_id, positions); obj.saveDicomFile(output_file); } catch (Exception e) { System.err.println(e); e.printStackTrace(System.err); System.exit(0); } }
From source file:Main.java
public static void append(String fileName, String text) throws Exception { File f = new File(fileName); long fileLength = f.length(); RandomAccessFile raf = new RandomAccessFile(f, "rw"); raf.seek(fileLength);//from w ww .j a v a 2 s.com raf.writeBytes(text); raf.close(); }
From source file:Main.java
public static void append(String fileName, byte[] bytes) throws Exception { File f = new File(fileName); long fileLength = f.length(); RandomAccessFile raf = new RandomAccessFile(f, "rw"); raf.seek(fileLength);/*from ww w . j ava2s .co m*/ raf.write(bytes); raf.close(); }
From source file:Main.java
public static float getSizeBytes(File file) { return file.length() / SIZE_KB; }
From source file:Main.java
static private void setLengthFile(File file) { lengthFile = file.length(); }