Example usage for java.io File length

List of usage examples for java.io File length

Introduction

In this page you can find the example usage for java.io File length.

Prototype

public long length() 

Source Link

Document

Returns the length of the file denoted by this abstract pathname.

Usage

From source file:MainClass.java

public static void main(String args[]) {
    File f1 = new File("MainClass.java");
    System.out.println("File Name:" + f1.getName());
    System.out.println("Path:" + f1.getPath());
    System.out.println("Abs Path:" + f1.getAbsolutePath());
    System.out.println("Parent:" + f1.getParent());
    System.out.println(f1.exists() ? "exists" : "does not exist");
    System.out.println(f1.canWrite() ? "is writeable" : "is not writeable");
    System.out.println(f1.canRead() ? "is readable" : "is not readable");
    System.out.println("is a directory" + f1.isDirectory());
    System.out.println(f1.isFile() ? "is normal file" : "might be a named pipe");
    System.out.println(f1.isAbsolute() ? "is absolute" : "is not absolute");
    System.out.println("File last modified:" + f1.lastModified());
    System.out.println("File size:" + f1.length() + " Bytes");
}

From source file:FileDemo.java

public static void main(String args[]) {
    File f1 = new File("/java/COPYRIGHT");
    System.out.println("File Name: " + f1.getName());
    System.out.println("Path: " + f1.getPath());
    System.out.println("Abs Path: " + f1.getAbsolutePath());
    System.out.println("Parent: " + f1.getParent());
    System.out.println(f1.exists() ? "exists" : "does not exist");
    System.out.println(f1.canWrite() ? "is writeable" : "is not writeable");
    System.out.println(f1.canRead() ? "is readable" : "is not readable");
    System.out.println("is " + (f1.isDirectory() ? "" : "not" + " a directory"));
    System.out.println(f1.isFile() ? "is normal file" : "might be a named pipe");
    System.out.println(f1.isAbsolute() ? "is absolute" : "is not absolute");
    System.out.println("File last modified: " + f1.lastModified());
    System.out.println("File size: " + f1.length() + " Bytes");
}

From source file:com.aerospike.load.AerospikeLoad.java

public static void main(String[] args) throws IOException {

    Thread statPrinter = new Thread(new PrintStat(counters));
    try {//from   w  w w . j a  v  a2 s .  c o m
        log.info("Aerospike loader started");
        Options options = new Options();
        options.addOption("h", "host", true, "Server hostname (default: localhost)");
        options.addOption("p", "port", true, "Server port (default: 3000)");
        options.addOption("n", "namespace", true, "Namespace (default: test)");
        options.addOption("s", "set", true, "Set name. (default: null)");
        options.addOption("c", "config", true, "Column definition file name");
        options.addOption("wt", "write-threads", true,
                "Number of writer threads (default: Number of cores * 5)");
        options.addOption("rt", "read-threads", true,
                "Number of reader threads (default: Number of cores * 1)");
        options.addOption("l", "rw-throttle", true, "Throttling of reader to writer(default: 10k) ");
        options.addOption("tt", "transaction-timeout", true,
                "write transaction timeout in miliseconds(default: No timeout)");
        options.addOption("et", "expiration-time", true,
                "Expiration time of records in seconds (default: never expire)");
        options.addOption("T", "timezone", true,
                "Timezone of source where data dump is taken (default: local timezone)");
        options.addOption("ec", "abort-error-count", true, "Error count to abort (default: 0)");
        options.addOption("wa", "write-action", true, "Write action if key already exists (default: update)");
        options.addOption("v", "verbose", false, "Logging all");
        options.addOption("u", "usage", false, "Print usage.");

        CommandLineParser parser = new PosixParser();
        CommandLine cl = parser.parse(options, args, false);

        if (args.length == 0 || cl.hasOption("u")) {
            logUsage(options);
            return;
        }

        if (cl.hasOption("l")) {
            rwThrottle = Integer.parseInt(cl.getOptionValue("l"));
        } else {
            rwThrottle = Constants.READLOAD;
        }
        // Get all command line options
        params = Utils.parseParameters(cl);

        //Get client instance
        AerospikeClient client = new AerospikeClient(params.host, params.port);
        if (!client.isConnected()) {
            log.error("Client is not able to connect:" + params.host + ":" + params.port);
            return;
        }

        if (params.verbose) {
            log.setLevel(Level.DEBUG);
        }

        // Get available processors to calculate default number of threads
        int cpus = Runtime.getRuntime().availableProcessors();
        nWriterThreads = cpus * scaleFactor;
        nReaderThreads = cpus;

        // Get writer thread count
        if (cl.hasOption("wt")) {
            nWriterThreads = Integer.parseInt(cl.getOptionValue("wt"));
            nWriterThreads = (nWriterThreads > 0
                    ? (nWriterThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nWriterThreads)
                    : 1);
            log.debug("Using writer Threads: " + nWriterThreads);
        }
        writerPool = Executors.newFixedThreadPool(nWriterThreads);

        // Get reader thread count
        if (cl.hasOption("rt")) {
            nReaderThreads = Integer.parseInt(cl.getOptionValue("rt"));
            nReaderThreads = (nReaderThreads > 0
                    ? (nReaderThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nReaderThreads)
                    : 1);
            log.debug("Using reader Threads: " + nReaderThreads);
        }

        String columnDefinitionFileName = cl.getOptionValue("c", null);

        log.debug("Column definition files/directory: " + columnDefinitionFileName);
        if (columnDefinitionFileName == null) {
            log.error("Column definition files/directory not specified. use -c <file name>");
            return;
        }

        File columnDefinitionFile = new File(columnDefinitionFileName);
        if (!columnDefinitionFile.exists()) {
            log.error("Column definition files/directory does not exist: "
                    + Utils.getFileName(columnDefinitionFileName));
            return;
        }

        // Get data file list
        String[] files = cl.getArgs();
        if (files.length == 0) {
            log.error("No data file Specified: add <file/dir name> to end of the command ");
            return;
        }
        List<String> allFileNames = new ArrayList<String>();
        allFileNames = Utils.getFileNames(files);
        if (allFileNames.size() == 0) {
            log.error("Given datafiles/directory does not exist");
            return;
        }
        for (int i = 0; i < allFileNames.size(); i++) {
            log.debug("File names:" + Utils.getFileName(allFileNames.get(i)));
            File file = new File(allFileNames.get(i));
            counters.write.recordTotal = counters.write.recordTotal + file.length();
        }

        //remove column definition file from list
        allFileNames.remove(columnDefinitionFileName);

        log.info("Number of data files:" + allFileNames.size());

        /**
         * Process column definition file to get meta data and bin mapping.
         */
        metadataColumnDefs = new ArrayList<ColumnDefinition>();
        binColumnDefs = new ArrayList<ColumnDefinition>();
        metadataConfigs = new HashMap<String, String>();

        if (Parser.processJSONColumnDefinitions(columnDefinitionFile, metadataConfigs, metadataColumnDefs,
                binColumnDefs, params)) {
            log.info("Config file processed.");
        } else {
            throw new Exception("Config file parsing Error");
        }

        // Add metadata of config to parameters
        String metadata;
        if ((metadata = metadataConfigs.get(Constants.INPUT_TYPE)) != null) {
            params.fileType = metadata;
            if (params.fileType.equals(Constants.CSV_FILE)) {

                // Version check
                metadata = metadataConfigs.get(Constants.VERSION);
                String[] vNumber = metadata.split("\\.");
                int v1 = Integer.parseInt(vNumber[0]);
                int v2 = Integer.parseInt(vNumber[1]);
                if ((v1 <= Constants.MajorV) && (v2 <= Constants.MinorV)) {
                    log.debug("Config version used:" + metadata);
                } else
                    throw new Exception("\"" + Constants.VERSION + ":" + metadata + "\" is not Supported");

                // Set delimiter 
                if ((metadata = metadataConfigs.get(Constants.DELIMITER)) != null && metadata.length() == 1) {
                    params.delimiter = metadata.charAt(0);
                } else {
                    log.warn("\"" + Constants.DELIMITER + ":" + metadata
                            + "\" is not properly specified in config file. Default is ','");
                }

                if ((metadata = metadataConfigs.get(Constants.IGNORE_FIRST_LINE)) != null) {
                    params.ignoreFirstLine = metadata.equals("true");
                } else {
                    log.warn("\"" + Constants.IGNORE_FIRST_LINE + ":" + metadata
                            + "\" is not properly specified in config file. Default is false");
                }

                if ((metadata = metadataConfigs.get(Constants.COLUMNS)) != null) {
                    counters.write.colTotal = Integer.parseInt(metadata);
                } else {
                    throw new Exception("\"" + Constants.COLUMNS + ":" + metadata
                            + "\" is not properly specified in config file");
                }
            } else {
                throw new Exception("\"" + params.fileType + "\" is not supported in config file");
            }
        } else {
            throw new Exception("\"" + Constants.INPUT_TYPE + "\" is not specified in config file");
        }

        // add config input to column definitions
        if (params.fileType.equals(Constants.CSV_FILE)) {
            List<String> binName = null;
            if (params.ignoreFirstLine) {
                String line;
                BufferedReader br = new BufferedReader(
                        new InputStreamReader(new FileInputStream(allFileNames.get(0)), "UTF8"));
                if ((line = br.readLine()) != null) {
                    binName = Parser.getCSVRawColumns(line, params.delimiter);
                    br.close();
                    if (binName.size() != counters.write.colTotal) {
                        throw new Exception("Number of column in config file and datafile are mismatch."
                                + " Datafile: " + Utils.getFileName(allFileNames.get(0)) + " Configfile: "
                                + Utils.getFileName(columnDefinitionFileName));
                    }
                }
            }

            //update columndefs for metadata
            for (int i = 0; i < metadataColumnDefs.size(); i++) {
                if (metadataColumnDefs.get(i).staticValue) {

                } else {
                    if (metadataColumnDefs.get(i).binValuePos < 0) {
                        if (metadataColumnDefs.get(i).columnName == null) {
                            if (metadataColumnDefs.get(i).jsonPath == null) {
                                log.error("dynamic metadata having improper info"
                                        + metadataColumnDefs.toString()); //TODO
                            } else {
                                //TODO check for json_path   
                            }
                        } else {
                            if (params.ignoreFirstLine) {
                                if (binName.indexOf(metadataColumnDefs.get(i).binValueHeader) != -1) {
                                    metadataColumnDefs.get(i).binValuePos = binName
                                            .indexOf(metadataColumnDefs.get(i).binValueHeader);
                                } else {
                                    throw new Exception("binName missing in data file:"
                                            + metadataColumnDefs.get(i).binValueHeader);
                                }
                            }
                        }
                    } else {
                        if (params.ignoreFirstLine)
                            metadataColumnDefs.get(i).binValueHeader = binName
                                    .get(metadataColumnDefs.get(i).binValuePos);
                    }
                }
                if ((!metadataColumnDefs.get(i).staticValue) && (metadataColumnDefs.get(i).binValuePos < 0)) {
                    throw new Exception("Information for bin mapping is missing in config file:"
                            + metadataColumnDefs.get(i));
                }

                if (metadataColumnDefs.get(i).srcType == null) {
                    throw new Exception(
                            "Source data type is not properly mentioned:" + metadataColumnDefs.get(i));
                }

                if (metadataColumnDefs.get(i).binNameHeader == Constants.SET
                        && !metadataColumnDefs.get(i).srcType.equals(SrcColumnType.STRING)) {
                    throw new Exception("Set name should be string type:" + metadataColumnDefs.get(i));
                }

                if (metadataColumnDefs.get(i).binNameHeader.equalsIgnoreCase(Constants.SET)
                        && params.set != null) {
                    throw new Exception(
                            "Set name is given both in config file and commandline. Provide only once.");
                }
            }

            //update columndefs for bins
            for (int i = 0; i < binColumnDefs.size(); i++) {
                if (binColumnDefs.get(i).staticName) {

                } else {
                    if (binColumnDefs.get(i).binNamePos < 0) {
                        if (binColumnDefs.get(i).columnName == null) {
                            if (binColumnDefs.get(i).jsonPath == null) {
                                log.error("dynamic bin having improper info"); //TODO
                            } else {
                                //TODO check for json_path
                            }
                        } else {
                            if (params.ignoreFirstLine) {
                                if (binName.indexOf(binColumnDefs.get(i).binNameHeader) != -1) {
                                    binColumnDefs.get(i).binNamePos = binName
                                            .indexOf(binColumnDefs.get(i).binNameHeader);
                                } else {
                                    throw new Exception("binName missing in data file:"
                                            + binColumnDefs.get(i).binNameHeader);
                                }
                            }
                        }
                    } else {
                        if (params.ignoreFirstLine)
                            binColumnDefs.get(i).binNameHeader = binName.get(binColumnDefs.get(i).binNamePos);
                    }
                }

                if (binColumnDefs.get(i).staticValue) {

                } else {
                    if (binColumnDefs.get(i).binValuePos < 0) {
                        if (binColumnDefs.get(i).columnName == null) {
                            if (binColumnDefs.get(i).jsonPath == null) {
                                log.error("dynamic bin having improper info"); //TODO
                            } else {
                                //TODO check for json_path
                            }
                        } else {
                            if (params.ignoreFirstLine) {
                                if (binName.contains(binColumnDefs.get(i).binValueHeader)) {
                                    binColumnDefs.get(i).binValuePos = binName
                                            .indexOf(binColumnDefs.get(i).binValueHeader);
                                } else if (!binColumnDefs.get(i).binValueHeader.toLowerCase()
                                        .equals(Constants.SYSTEM_TIME)) {
                                    throw new Exception("Wrong column name mentioned in config file:"
                                            + binColumnDefs.get(i).binValueHeader);
                                }
                            }
                        }
                    } else {
                        if (params.ignoreFirstLine)
                            binColumnDefs.get(i).binValueHeader = binName.get(binColumnDefs.get(i).binValuePos);
                    }

                    //check for missing entries in config file
                    if (binColumnDefs.get(i).binValuePos < 0 && binColumnDefs.get(i).binValueHeader == null) {
                        throw new Exception("Information missing(Value header or bin mapping) in config file:"
                                + binColumnDefs.get(i));
                    }

                    //check for proper data type in config file.
                    if (binColumnDefs.get(i).srcType == null) {
                        throw new Exception(
                                "Source data type is not properly mentioned:" + binColumnDefs.get(i));
                    }

                    //check for valid destination type
                    if ((binColumnDefs.get(i).srcType.equals(SrcColumnType.TIMESTAMP)
                            || binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB))
                            && binColumnDefs.get(i).dstType == null) {
                        throw new Exception("Destination type is not mentioned: " + binColumnDefs.get(i));
                    }

                    //check for encoding
                    if (binColumnDefs.get(i).dstType != null && binColumnDefs.get(i).encoding == null) {
                        throw new Exception(
                                "Encoding is not given for src-dst type conversion:" + binColumnDefs.get(i));
                    }

                    //check for valid encoding
                    if (binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB)
                            && !binColumnDefs.get(i).encoding.equals(Constants.HEX_ENCODING)) {
                        throw new Exception("Wrong encoding for blob data:" + binColumnDefs.get(i));
                    }
                }

                //Check static bin name mapped to dynamic bin value
                if ((binColumnDefs.get(i).binNamePos == binColumnDefs.get(i).binValuePos)
                        && (binColumnDefs.get(i).binNamePos != -1)) {
                    throw new Exception("Static bin name mapped to dynamic bin value:" + binColumnDefs.get(i));
                }

                //check for missing entries in config file
                if (binColumnDefs.get(i).binNameHeader == null
                        && binColumnDefs.get(i).binNameHeader.length() > Constants.BIN_NAME_LENGTH) {
                    throw new Exception("Information missing binName or large binName in config file:"
                            + binColumnDefs.get(i));
                }
            }
        }

        log.info(params.toString());
        log.debug("MetadataConfig:" + metadataColumnDefs);
        log.debug("BinColumnDefs:" + binColumnDefs);

        // Start PrintStat thread
        statPrinter.start();

        // Reader pool size
        ExecutorService readerPool = Executors.newFixedThreadPool(
                nReaderThreads > allFileNames.size() ? allFileNames.size() : nReaderThreads);
        log.info("Reader pool size : " + nReaderThreads);

        // Submit all tasks to writer threadpool.
        for (String aFile : allFileNames) {
            log.debug("Submitting task for: " + aFile);
            readerPool.submit(new AerospikeLoad(aFile, client, params));
        }

        // Wait for reader pool to complete
        readerPool.shutdown();
        log.info("Shutdown down reader thread pool");

        while (!readerPool.isTerminated())
            ;
        //readerPool.awaitTermination(20, TimeUnit.MINUTES);
        log.info("Reader thread pool terminated");

        // Wait for writer pool to complete after getting all tasks from reader pool
        writerPool.shutdown();
        log.info("Shutdown down writer thread pool");

        while (!writerPool.isTerminated())
            ;
        log.info("Writer thread pool terminated");

        // Print final statistic of aerospike-loader.
        log.info("Final Statistics of importer: (Succesfull Writes = " + counters.write.writeCount.get() + ", "
                + "Errors="
                + (counters.write.writeErrors.get() + counters.write.readErrors.get()
                        + counters.write.processingErrors.get())
                + "(" + (counters.write.writeErrors.get()) + "-Write," + counters.write.readErrors.get()
                + "-Read," + counters.write.processingErrors.get() + "-Processing)");
    } catch (Exception e) {
        log.error(e);
        if (log.isDebugEnabled()) {
            e.printStackTrace();
        }
    } finally {
        // Stop statistic printer thread.
        statPrinter.interrupt();
        log.info("Aerospike loader completed");
    }
}

From source file:de.unileipzig.ub.indexer.App.java

public static void main(String[] args) throws IOException {

    // create Options object
    Options options = new Options();

    options.addOption("h", "help", false, "display this help");

    options.addOption("f", "filename", true, "name of the JSON file whose content should be indexed");
    options.addOption("i", "index", true, "the name of the target index");
    options.addOption("d", "doctype", true, "the name of the doctype (title, local, ...)");

    options.addOption("t", "host", true, "elasticsearch hostname (default: 0.0.0.0)");
    options.addOption("p", "port", true, "transport port (that's NOT the http port, default: 9300)");
    options.addOption("c", "cluster", true, "cluster name (default: elasticsearch_mdma)");

    options.addOption("b", "bulksize", true, "number of docs sent in one request (default: 3000)");
    options.addOption("v", "verbose", false, "show processing speed while indexing");
    options.addOption("s", "status", false, "only show status of index for file");

    options.addOption("r", "repair", false, "attempt to repair recoverable inconsistencies on the go");
    options.addOption("e", "debug", false, "set logging level to debug");
    options.addOption("l", "logfile", true, "logfile - in not specified only log to stdout");

    options.addOption("m", "memcached", true, "host and port of memcached (default: localhost:11211)");
    options.addOption("z", "latest-flag-on", true,
            "enable latest flag according to field (within content, e.g. 001)");
    options.addOption("a", "flat", false, "flat-mode: do not check for inconsistencies");

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;/*w ww  .jav a  2  s  .  c  o m*/

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException ex) {
        logger.error(ex);
        System.exit(1);
    }

    // setup logging
    Properties systemProperties = System.getProperties();
    systemProperties.put("net.spy.log.LoggerImpl", "net.spy.memcached.compat.log.Log4JLogger");
    System.setProperties(systemProperties);
    Logger.getLogger("net.spy.memcached").setLevel(Level.ERROR);

    Properties props = new Properties();
    props.load(props.getClass().getResourceAsStream("/log4j.properties"));

    if (cmd.hasOption("debug")) {
        props.setProperty("log4j.logger.de.unileipzig", "DEBUG");
    }

    if (cmd.hasOption("logfile")) {
        props.setProperty("log4j.rootLogger", "INFO, stdout, F");
        props.setProperty("log4j.appender.F", "org.apache.log4j.FileAppender");
        props.setProperty("log4j.appender.F.File", cmd.getOptionValue("logfile"));
        props.setProperty("log4j.appender.F.layout", "org.apache.log4j.PatternLayout");
        props.setProperty("log4j.appender.F.layout.ConversionPattern", "%5p | %d | %F | %L | %m%n");
    }

    PropertyConfigurator.configure(props);

    InetAddress addr = InetAddress.getLocalHost();
    String memcachedHostAndPort = addr.getHostAddress() + ":11211";
    if (cmd.hasOption("m")) {
        memcachedHostAndPort = cmd.getOptionValue("m");
    }

    // setup caching
    try {
        if (memcachedClient == null) {
            memcachedClient = new MemcachedClient(
                    new ConnectionFactoryBuilder().setFailureMode(FailureMode.Cancel).build(),
                    AddrUtil.getAddresses("0.0.0.0:11211"));
            try {
                // give client and server 500ms
                Thread.sleep(300);
            } catch (InterruptedException ex) {
            }

            Collection availableServers = memcachedClient.getAvailableServers();
            logger.info(availableServers);
            if (availableServers.size() == 0) {
                logger.info("no memcached servers found");
                memcachedClient.shutdown();
                memcachedClient = null;
            } else {
                logger.info(availableServers.size() + " memcached server(s) detected, fine.");
            }
        }
    } catch (IOException ex) {
        logger.warn("couldn't create a connection, bailing out: " + ex.getMessage());
    }

    // process options

    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("indexer", options, true);
        quit(0);
    }

    boolean verbose = false;
    if (cmd.hasOption("verbose")) {
        verbose = true;
    }

    // ES options
    String[] hosts = new String[] { "0.0.0.0" };
    int port = 9300;
    String clusterName = "elasticsearch_mdma";
    int bulkSize = 3000;

    if (cmd.hasOption("host")) {
        hosts = cmd.getOptionValues("host");
    }
    if (cmd.hasOption("port")) {
        port = Integer.parseInt(cmd.getOptionValue("port"));
    }
    if (cmd.hasOption("cluster")) {
        clusterName = cmd.getOptionValue("cluster");
    }
    if (cmd.hasOption("bulksize")) {
        bulkSize = Integer.parseInt(cmd.getOptionValue("bulksize"));
        if (bulkSize < 1 || bulkSize > 100000) {
            logger.error("bulksize must be between 1 and 100,000");
            quit(1);
        }
    }

    // ES Client
    final Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "elasticsearch_mdma")
            .build();
    final TransportClient client = new TransportClient(settings);
    for (String host : hosts) {
        client.addTransportAddress(new InetSocketTransportAddress(host, port));
    }

    if (cmd.hasOption("filename") && cmd.hasOption("index") && cmd.hasOption("doctype")) {

        final String filename = cmd.getOptionValue("filename");

        final File _file = new File(filename);
        if (_file.length() == 0) {
            logger.info(_file.getAbsolutePath() + " is empty, skipping");
            quit(0); // file is empty
        }

        // for flat mode: leave a stampfile beside the json to 
        // indicate previous successful processing
        File directory = new File(filename).getParentFile();
        File stampfile = new File(directory, DigestUtils.shaHex(filename) + ".indexed");

        long start = System.currentTimeMillis();
        long lineCount = 0;

        final String indexName = cmd.getOptionValue("index");
        final String docType = cmd.getOptionValue("doctype");
        BulkRequestBuilder bulkRequest = client.prepareBulk();

        try {
            if (cmd.hasOption("flat")) {
                // flat mode
                // .........
                if (stampfile.exists()) {
                    logger.info("SKIPPING, since it seems this file has already " + "been imported (found: "
                            + stampfile.getAbsolutePath() + ")");
                    quit(0);
                }
            } else {

                final String srcSHA1 = extractSrcSHA1(filename);

                logger.debug(filename + " srcsha1: " + srcSHA1);

                long docsInIndex = getIndexedRecordCount(client, indexName, srcSHA1);
                logger.debug(filename + " indexed: " + docsInIndex);

                long docsInFile = getLineCount(filename);
                logger.debug(filename + " lines: " + docsInFile);

                // in non-flat-mode, indexing would take care
                // of inconsistencies
                if (docsInIndex == docsInFile) {
                    logger.info("UP-TO DATE: " + filename + " (" + docsInIndex + ", " + srcSHA1 + ")");
                    client.close();
                    quit(0);
                }

                if (docsInIndex > 0) {
                    logger.warn("INCONSISTENCY DETECTED: " + filename + ": indexed:" + docsInIndex + " lines:"
                            + docsInFile);

                    if (!cmd.hasOption("r")) {
                        logger.warn(
                                "Please re-run indexer with --repair flag or delete residues first with: $ curl -XDELETE "
                                        + hosts[0] + ":9200/" + indexName
                                        + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1
                                        + "\" }}'");
                        client.close();
                        quit(1);
                    } else {
                        logger.info("Attempting to clear residues...");
                        // attempt to repair once
                        DeleteByQueryResponse dbqr = client.prepareDeleteByQuery(indexName)
                                .setQuery(termQuery("meta.srcsha1", srcSHA1)).execute().actionGet();

                        Iterator<IndexDeleteByQueryResponse> it = dbqr.iterator();
                        long deletions = 0;
                        while (it.hasNext()) {
                            IndexDeleteByQueryResponse response = it.next();
                            deletions += 1;
                        }
                        logger.info("Deleted residues of " + filename);
                        logger.info("Refreshing [" + indexName + "]");
                        RefreshResponse refreshResponse = client.admin().indices()
                                .refresh(new RefreshRequest(indexName)).actionGet();

                        long indexedAfterDelete = getIndexedRecordCount(client, indexName, srcSHA1);
                        logger.info(indexedAfterDelete + " docs remained");
                        if (indexedAfterDelete > 0) {
                            logger.warn("Not all residues cleaned. Try to fix this manually: $ curl -XDELETE "
                                    + hosts[0] + ":9200/" + indexName
                                    + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'");
                            quit(1);
                        } else {
                            logger.info("Residues are gone. Now trying to reindex: " + filename);
                        }
                    }
                }
            }

            logger.info("INDEXING-REQUIRED: " + filename);
            if (cmd.hasOption("status")) {
                quit(0);
            }

            HashSet idsInBatch = new HashSet();

            String idField = null;
            if (cmd.hasOption("z")) {
                idField = cmd.getOptionValue("z");
            }

            final FileReader fr = new FileReader(filename);
            final BufferedReader br = new BufferedReader(fr);

            String line;
            // one line is one document
            while ((line = br.readLine()) != null) {

                // "Latest-Flag" machine
                // This gets obsolete with a "flat" index
                if (cmd.hasOption("z")) {
                    // flag that indicates, whether the document
                    // about to be indexed will be the latest
                    boolean willBeLatest = true;

                    // check if there is a previous (lower meta.timestamp) document with 
                    // the same identifier (whatever that may be - queried under "content")
                    final String contentIdentifier = getContentIdentifier(line, idField);
                    idsInBatch.add(contentIdentifier);

                    // assumed in meta.timestamp
                    final Long timestamp = Long.parseLong(getTimestamp(line));

                    logger.debug("Checking whether record is latest (line: " + lineCount + ")");
                    logger.debug(contentIdentifier + ", " + timestamp);

                    // get all docs, which match the contentIdentifier
                    // by filter, which doesn't score
                    final TermFilterBuilder idFilter = new TermFilterBuilder("content." + idField,
                            contentIdentifier);
                    final TermFilterBuilder kindFilter = new TermFilterBuilder("meta.kind", docType);
                    final AndFilterBuilder afb = new AndFilterBuilder();
                    afb.add(idFilter).add(kindFilter);
                    final FilteredQueryBuilder fb = filteredQuery(matchAllQuery(), afb);

                    final SearchResponse searchResponse = client.prepareSearch(indexName)
                            .setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(fb).setFrom(0)
                            .setSize(1200) // 3 years and 105 days assuming daily updates at the most
                            .setExplain(false).execute().actionGet();

                    final SearchHits searchHits = searchResponse.getHits();

                    logger.debug("docs with this id in the index: " + searchHits.getTotalHits());

                    for (final SearchHit hit : searchHits.getHits()) {
                        final String docId = hit.id();
                        final Map<String, Object> source = hit.sourceAsMap();
                        final Map meta = (Map) source.get("meta");
                        final Long docTimestamp = Long.parseLong(meta.get("timestamp").toString());
                        // if the indexed doc timestamp is lower the the current one, 
                        // remove any latest flag
                        if (timestamp >= docTimestamp) {
                            source.remove("latest");
                            final ObjectMapper mapper = new ObjectMapper();
                            // put the updated doc back
                            // IndexResponse response = 
                            client.prepareIndex(indexName, docType).setCreate(false).setId(docId)
                                    .setSource(mapper.writeValueAsBytes(source))
                                    .execute(new ActionListener<IndexResponse>() {
                                        public void onResponse(IndexResponse rspns) {
                                            logger.debug("Removed latest flag from " + contentIdentifier + ", "
                                                    + docTimestamp + ", " + hit.id() + " since (" + timestamp
                                                    + " > " + docTimestamp + ")");
                                        }

                                        public void onFailure(Throwable thrwbl) {
                                            logger.error("Could not remove flag from " + hit.id() + ", "
                                                    + contentIdentifier);
                                        }
                                    });
                            // .execute()
                            //.actionGet();
                        } else {
                            logger.debug("Doc " + hit.id() + " is newer (" + docTimestamp + ")");
                            willBeLatest = false;
                        }
                    }

                    if (willBeLatest) {
                        line = setLatestFlag(line);
                        logger.info("Setting latest flag on " + contentIdentifier + ", " + timestamp);
                    }

                    // end of latest-flag machine
                    // beware - this will be correct as long as there
                    // are no dups within one bulk!
                }

                bulkRequest.add(client.prepareIndex(indexName, docType).setSource(line));
                lineCount++;
                logger.debug("Added line " + lineCount + " to BULK");
                logger.debug(line);

                if (lineCount % bulkSize == 0) {

                    if (idsInBatch.size() != bulkSize && cmd.hasOption("z")) {
                        logger.error(
                                "This batch has duplications in the ID. That's not bad for the index, just makes the latest flag fuzzy");
                        logger.error(
                                "Bulk size was: " + bulkSize + ", but " + idsInBatch.size() + " IDs (only)");
                    }
                    idsInBatch.clear();

                    logger.debug("Issuing BULK request");

                    final long actionCount = bulkRequest.numberOfActions();
                    final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                    final long tookInMillis = bulkResponse.getTookInMillis();

                    if (bulkResponse.hasFailures()) {
                        logger.fatal("FAILED, bulk not indexed. exiting now.");
                        Iterator<BulkItemResponse> it = bulkResponse.iterator();
                        while (it.hasNext()) {
                            BulkItemResponse bir = it.next();
                            if (bir.isFailed()) {
                                Failure failure = bir.getFailure();
                                logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                        + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                            }
                        }
                        quit(1);
                    } else {
                        if (verbose) {
                            final double elapsed = System.currentTimeMillis() - start;
                            final double speed = (lineCount / elapsed * 1000);
                            logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount
                                    + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                        }
                    }
                    bulkRequest = client.prepareBulk();
                }
            }

            // handle the remaining items
            final long actionCount = bulkRequest.numberOfActions();
            if (actionCount > 0) {
                final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                final long tookInMillis = bulkResponse.getTookInMillis();

                if (bulkResponse.hasFailures()) {
                    logger.fatal("FAILED, bulk not indexed. exiting now.");
                    Iterator<BulkItemResponse> it = bulkResponse.iterator();
                    while (it.hasNext()) {
                        BulkItemResponse bir = it.next();
                        if (bir.isFailed()) {
                            Failure failure = bir.getFailure();
                            logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                    + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                        }
                    }
                    quit(1);
                } else {

                    // trigger update now
                    RefreshResponse refreshResponse = client.admin().indices()
                            .refresh(new RefreshRequest(indexName)).actionGet();

                    if (verbose) {
                        final double elapsed = System.currentTimeMillis() - start;
                        final double speed = (lineCount / elapsed * 1000);
                        logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/"
                                + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                    }

                }

            }

            br.close();
            client.close();
            final double elapsed = (System.currentTimeMillis() - start) / 1000;
            final double speed = (lineCount / elapsed);
            logger.info("indexing (" + filename + ") " + lineCount + " docs took " + elapsed + "s (speed: "
                    + String.format("%.2f", speed) + "r/s)");
            if (cmd.hasOption("flat")) {
                try {
                    FileUtils.touch(stampfile);
                } catch (IOException ioe) {
                    logger.warn(".indexed files not created. Will reindex everything everytime.");
                }
            }
        } catch (IOException e) {
            client.close();
            logger.error(e);
            quit(1);
        } finally {
            client.close();
        }
    }
    quit(0);
}

From source file:com.linkedin.pinot.perf.MultiValueReaderWriterBenchmark.java

public static void main(String[] args) throws Exception {
    List<String> lines = IOUtils.readLines(new FileReader(new File(args[0])));
    int totalDocs = lines.size();
    int max = Integer.MIN_VALUE;
    int maxNumberOfMultiValues = Integer.MIN_VALUE;
    int totalNumValues = 0;
    int data[][] = new int[totalDocs][];
    for (int i = 0; i < lines.size(); i++) {
        String line = lines.get(i);
        String[] split = line.split(",");
        totalNumValues = totalNumValues + split.length;
        if (split.length > maxNumberOfMultiValues) {
            maxNumberOfMultiValues = split.length;
        }// w w w . ja  v a 2 s.  c  o  m
        data[i] = new int[split.length];
        for (int j = 0; j < split.length; j++) {
            String token = split[j];
            int val = Integer.parseInt(token);
            data[i][j] = val;
            if (val > max) {
                max = val;
            }
        }
    }
    int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2));
    int size = 2048;
    int[] offsets = new int[size];
    int bitMapSize = 0;
    File outputFile = new File("output.mv.fwd");

    FixedBitSkipListSCMVWriter fixedBitSkipListSCMVWriter = new FixedBitSkipListSCMVWriter(outputFile,
            totalDocs, totalNumValues, maxBitsNeeded);

    for (int i = 0; i < totalDocs; i++) {
        fixedBitSkipListSCMVWriter.setIntArray(i, data[i]);
        if (i % size == size - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            rr1.serialize(dos);
            dos.close();
            //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
        } else if (i == totalDocs - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size));
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            rr1.serialize(dos);
            dos.close();
            //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
        }
    }
    fixedBitSkipListSCMVWriter.close();
    System.out.println("Output file size:" + outputFile.length());
    System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs);
    System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues);
    System.out.println("chunk size\t\t\t\t:" + size);
    System.out.println("Num chunks\t\t\t\t:" + totalDocs / size);
    int numChunks = totalDocs / size + 1;
    int totalBits = (totalNumValues * maxBitsNeeded);
    int dataSizeinBytes = (totalBits + 7) / 8;

    System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes);
    System.out.println("\nPer encoding size");
    System.out.println();
    System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes));
    System.out.println();
    System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes));
    System.out.println();
    System.out.println("bitMapSize\t\t\t\t:" + bitMapSize);
    System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes));

    System.out.println();
    System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8);
    System.out.println("size (with custom bitset)\t\t\t:"
            + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes));

}

From source file:edu.stanford.epad.common.pixelmed.SegmentationObjectsFileWriter.java

/**
 * This demo gets segmentation maps from map_file, then inserts the maps twice as two segments.
 * //from  w  ww  .  ja  v  a2  s  . co  m
 * @param args: java SegmentationObjectsFileWriter dicom_file map_file output_file mode mode is "BINARY" or
 *          "FRACTIONAL".
 */
public static void main(String[] args) {
    String input_file = args[0];// "./DicomFiles/CT0010";//
    String map_file = args[1];// "./TEMP/CT0010.mapbin";//
    String output_file = args[2];// "./TEMP/CT0010.sobin";//"segmentation_test_out.dcm";
    // String mode = args[3]; //"BINARY";

    byte[] pixels = null;
    SegmentationObjectsFileWriter obj = null;
    short image_width = 0, image_height = 0, image_frames = 0;

    // Read pixel array from the map_file.
    File file = new File(map_file);
    DataInputStream dis = null;
    pixels = new byte[(int) file.length()];
    try {
        dis = new DataInputStream((new FileInputStream(file)));
        dis.readFully(pixels);
        dis.close();
    } catch (Exception e1) {
        e1.printStackTrace();
    } finally {
        IOUtils.closeQuietly(dis);
    }

    try {
        DicomInputStream i_stream = new DicomInputStream(new FileInputStream(input_file));
        AttributeList list = new AttributeList();
        list.read(i_stream);

        { // Get sequence format.
            image_width = (short) Attribute.getSingleIntegerValueOrDefault(list, TagFromName.Columns, 1);
            image_height = (short) Attribute.getSingleIntegerValueOrDefault(list, TagFromName.Rows, 1);
            image_frames = (short) Attribute.getSingleIntegerValueOrDefault(list, TagFromName.NumberOfFrames,
                    1);
        }

        short[] orientation = new short[] { 1, 0, 0, 0, 0, 1 };
        double[] spacing = new double[] { 0.65, 0.8 };
        double thickness = 0.5;
        AttributeList[] lists = new AttributeList[1];
        lists[0] = list;
        obj = new SegmentationObjectsFileWriter(lists, orientation, spacing, thickness);

        CodedConcept category = new CodedConcept("C0085089" /* conceptUniqueIdentifier */,
                "260787004" /* SNOMED CID */, "SRT" /* codingSchemeDesignator */,
                "SNM3" /* legacyCodingSchemeDesignator */, null /* codingSchemeVersion */,
                "A-00004" /* codeValue */, "Physical Object" /* codeMeaning */, null /* codeStringEquivalent */,
                null /* synonynms */);
        CodedConcept type = new CodedConcept("C0018787" /* conceptUniqueIdentifier */,
                "80891009" /* SNOMED CID */, "SRT" /* codingSchemeDesignator */,
                null /* legacyCodingSchemeDesignator */, null /* codingSchemeVersion */,
                "T-32000" /* codeValue */, "Heart" /* codeMeaning */, null /* codeStringEquivalent */,
                null /* synonynms */);
        double[][] positions = new double[image_frames][3];
        /*
         * void AddAllFrames(byte [] frames, int frame_num, int image_width, int image_height, String type, double [][]
         * positions, short [] patient_orientation, double slice_thickness, double [] pixel_spacing, short stack_id)
         */
        // Segment 1
        obj.addOneSegment("Segment No.1 is for ...", category, type);
        obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", (short) 0, positions);
        short stack_id = 2;
        positions[0][0] = 0.2;
        obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", stack_id, positions);
        // Segment 2
        // obj.AddOneSegment(null, null, null);
        obj.addOneSegment("Segment No.2 is for ...", category, type);
        obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", (short) 1, positions);
        obj.addAllFrames(pixels, image_frames, image_width, image_height, "binary", stack_id, positions);

        obj.saveDicomFile(output_file);

    } catch (Exception e) {
        System.err.println(e);
        e.printStackTrace(System.err);
        System.exit(0);
    }
}

From source file:Main.java

public static void append(String fileName, String text) throws Exception {
    File f = new File(fileName);
    long fileLength = f.length();
    RandomAccessFile raf = new RandomAccessFile(f, "rw");
    raf.seek(fileLength);//from  w  ww .j  a v  a  2 s.com
    raf.writeBytes(text);
    raf.close();
}

From source file:Main.java

public static void append(String fileName, byte[] bytes) throws Exception {
    File f = new File(fileName);
    long fileLength = f.length();
    RandomAccessFile raf = new RandomAccessFile(f, "rw");
    raf.seek(fileLength);/*from ww  w  .  j  ava2s  .co  m*/
    raf.write(bytes);
    raf.close();
}

From source file:Main.java

public static float getSizeBytes(File file) {
    return file.length() / SIZE_KB;
}

From source file:Main.java

static private void setLengthFile(File file) {
    lengthFile = file.length();
}