List of usage examples for java.io BufferedReader readLine
public String readLine() throws IOException
From source file:drmaas.sandbox.http.LoginTest.java
public static void main(String[] args) throws Exception { //1. For SSL//from www . j ava 2s. com DefaultHttpClient base = new DefaultHttpClient(); SSLContext ctx = SSLContext.getInstance("TLS"); X509TrustManager tm = new X509TrustManager() { public void checkClientTrusted(X509Certificate[] xcs, String string) throws CertificateException { } public void checkServerTrusted(X509Certificate[] xcs, String string) throws CertificateException { } public X509Certificate[] getAcceptedIssuers() { return null; } }; X509HostnameVerifier verifier = new X509HostnameVerifier() { @Override public void verify(String string, SSLSocket ssls) throws IOException { } @Override public void verify(String string, X509Certificate xc) throws SSLException { } @Override public void verify(String string, String[] strings, String[] strings1) throws SSLException { } @Override public boolean verify(String string, SSLSession ssls) { return true; } }; ctx.init(null, new TrustManager[] { tm }, null); SSLSocketFactory ssf = new SSLSocketFactory(ctx, verifier); ClientConnectionManager ccm = base.getConnectionManager(); SchemeRegistry sr = ccm.getSchemeRegistry(); sr.register(new Scheme("https", 443, ssf)); DefaultHttpClient httpclient = new DefaultHttpClient(ccm, base.getParams()); httpclient.setRedirectStrategy(new LaxRedirectStrategy()); try { HttpPost httpost; HttpResponse response; HttpEntity entity; List<Cookie> cookies; BufferedReader rd; String line; List<NameValuePair> nvps = new ArrayList<NameValuePair>(); //log in httpost = new HttpPost("myloginurl"); nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("login", "Log In")); nvps.add(new BasicNameValuePair("os_username", "foo")); nvps.add(new BasicNameValuePair("os_password", "foobar")); nvps.add(new BasicNameValuePair("os_cookie", "true")); nvps.add(new BasicNameValuePair("os_destination", "")); httpost.setEntity(new UrlEncodedFormEntity(nvps)); response = httpclient.execute(httpost); System.out.println(response.toString()); rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent())); line = ""; while ((line = rd.readLine()) != null) { System.out.println(line); } } finally { // When HttpClient instance is no longer needed, // shut down the connection manager to ensure // immediate deallocation of all system resources httpclient.getConnectionManager().shutdown(); } }
From source file:DIA_Umpire_Quant.DIA_Umpire_IntLibSearch.java
/** * @param args the command line arguments *//* ww w. j a v a 2s . c o m*/ public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire targeted re-extraction analysis using internal library (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be : java -jar -Xmx10G DIA_Umpire_IntLibSearch.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_intlibsearch.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String InternalLibID = ""; float ProbThreshold = 0.99f; float RTWindow_Int = -1f; float Freq = 0f; int TopNFrag = 6; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "InternalLibID": { InternalLibID = value; break; } case "RTWindow_Int": { RTWindow_Int = Float.parseFloat(value); break; } case "ProbThreshold": { ProbThreshold = Float.parseFloat(value); break; } case "TopNFrag": { TopNFrag = Integer.parseInt(value); break; } case "Freq": { Freq = Float.parseFloat(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); try { File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.FastaPath = tandemPara.FastaPath; FileList.add(DiaFile); } } } //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library"> Logger.getRootLogger().info( "================================================================================================="); if (FileList.size() > 1) { Logger.getRootLogger().info("Targeted re-extraction using internal library"); FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, InternalLibID); if (libManager == null) { Logger.getRootLogger().info("Building internal spectral library"); libManager = new FragmentLibManager(InternalLibID); ArrayList<LCMSID> LCMSIDList = new ArrayList<>(); for (DIAPack dia : FileList) { LCMSIDList.add(dia.IDsummary); } libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag); libManager.WriteFragmentLibSerialization(WorkFolder); } libManager.ReduceMemoryUsage(); Logger.getRootLogger() .info("Building retention time prediction model and generate candidate peptide list"); for (int i = 0; i < FileList.size(); i++) { FileList.get(i).IDsummary.ClearMappedPep(); } for (int i = 0; i < FileList.size(); i++) { for (int j = i + 1; j < FileList.size(); j++) { RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder, FileList.get(i).GetParameter(), FileList.get(i).IDsummary, FileList.get(j).IDsummary); alignment.GenerateModel(); alignment.GenerateMappedPepIon(); } FileList.get(i).ExportID(); FileList.get(i).IDsummary = null; } Logger.getRootLogger().info("Targeted matching........"); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) { diafile.UseMappedIon = true; diafile.FilterMappedIonByProb = false; diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); diafile.GenerateMassCalibrationRTMap(); diafile.TargetedExtractionQuant(false, libManager, ProbThreshold, RTWindow_Int); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold); diafile.ExportID(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); diafile.ClearStructure(); } diafile.IDsummary = null; System.gc(); } Logger.getRootLogger().info( "================================================================================================="); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:gov.nih.nci.ncicb.tcga.dcc.dam.util.TempClinicalDataLoader.java
public static void main(String[] args) { // first get the db connection properties String url = urlSet.get(args[1]); String user = args[2];/*from ww w . j av a 2 s . com*/ String word = args[3]; // make sure we have the Oracle driver somewhere try { Class.forName("oracle.jdbc.OracleDriver"); Class.forName("org.postgresql.Driver"); } catch (Exception x) { System.out.println("Unable to load the driver class!"); System.exit(0); } // connect to the database try { dbConnection = DriverManager.getConnection(url, user, word); ClinicalBean.setDBConnection(dbConnection); } catch (SQLException x) { x.printStackTrace(); System.exit(1); } final String xmlList = args[0]; BufferedReader br = null; try { final Map<String, String> clinicalFiles = new HashMap<String, String>(); final Map<String, String> biospecimenFiles = new HashMap<String, String>(); final Map<String, String> fullFiles = new HashMap<String, String>(); //noinspection IOResourceOpenedButNotSafelyClosed br = new BufferedReader(new FileReader(xmlList)); // read the file list to get all the files to load while (br.ready()) { final String[] in = br.readLine().split("\\t"); String xmlfile = in[0]; String archive = in[1]; if (xmlfile.contains("_clinical")) { clinicalFiles.put(xmlfile, archive); } else if (xmlfile.contains("_biospecimen")) { biospecimenFiles.put(xmlfile, archive); } else { fullFiles.put(xmlfile, archive); } } Date dateAdded = Calendar.getInstance().getTime(); // NOTE!!! This deletes all data before the load starts, assuming we are re-loading everything. // a better way would be to figure out what has changed and load that, or to be able to load multiple versions of the data in the schema emptyClinicalTables(user); // load any "full" files first -- in case some archives aren't split yet for (final String file : fullFiles.keySet()) { String archive = fullFiles.get(file); System.out.println("Full file " + file + " in " + archive); // need to re-instantiate the disease-specific beans for each file createDiseaseSpecificBeans(xmlList); String disease = getDiseaseName(archive); processFullXmlFile(file, archive, disease, dateAdded); // memory leak or something... have to commit and close all connections and re-get connection // after each file to keep from using too much heap space. this troubles me, but I have never had // the time to figure out why it happens resetConnections(url, user, word); } // now process all clinical files, and insert patients and clinical data for (final String clinicalFile : clinicalFiles.keySet()) { createDiseaseSpecificBeans(xmlList); String archive = clinicalFiles.get(clinicalFile); System.out.println("Clinical file " + clinicalFile + " in " + archive); String disease = getDiseaseName(archive); processClinicalXmlFile(clinicalFile, archive, disease, dateAdded); resetConnections(url, user, word); } // now process biospecimen files for (final String biospecimenFile : biospecimenFiles.keySet()) { createDiseaseSpecificBeans(xmlList); String archive = biospecimenFiles.get(biospecimenFile); String disease = getDiseaseName(archive); System.out.println("Biospecimen file " + biospecimenFile); processBiospecimenXmlFile(biospecimenFile, archive, disease, dateAdded); resetConnections(url, user, word); } // this sets relationships between these clinical tables and data browser tables, since we delete // and reload every time setForeignKeys(); dbConnection.commit(); dbConnection.close(); } catch (Exception e) { e.printStackTrace(); System.exit(-1); } finally { IOUtils.closeQuietly(br); } }
From source file:com.aerospike.load.AerospikeLoad.java
public static void main(String[] args) throws IOException { Thread statPrinter = new Thread(new PrintStat(counters)); try {/*from w ww.j ava2s . c o m*/ log.info("Aerospike loader started"); Options options = new Options(); options.addOption("h", "host", true, "Server hostname (default: localhost)"); options.addOption("p", "port", true, "Server port (default: 3000)"); options.addOption("n", "namespace", true, "Namespace (default: test)"); options.addOption("s", "set", true, "Set name. (default: null)"); options.addOption("c", "config", true, "Column definition file name"); options.addOption("wt", "write-threads", true, "Number of writer threads (default: Number of cores * 5)"); options.addOption("rt", "read-threads", true, "Number of reader threads (default: Number of cores * 1)"); options.addOption("l", "rw-throttle", true, "Throttling of reader to writer(default: 10k) "); options.addOption("tt", "transaction-timeout", true, "write transaction timeout in miliseconds(default: No timeout)"); options.addOption("et", "expiration-time", true, "Expiration time of records in seconds (default: never expire)"); options.addOption("T", "timezone", true, "Timezone of source where data dump is taken (default: local timezone)"); options.addOption("ec", "abort-error-count", true, "Error count to abort (default: 0)"); options.addOption("wa", "write-action", true, "Write action if key already exists (default: update)"); options.addOption("v", "verbose", false, "Logging all"); options.addOption("u", "usage", false, "Print usage."); CommandLineParser parser = new PosixParser(); CommandLine cl = parser.parse(options, args, false); if (args.length == 0 || cl.hasOption("u")) { logUsage(options); return; } if (cl.hasOption("l")) { rwThrottle = Integer.parseInt(cl.getOptionValue("l")); } else { rwThrottle = Constants.READLOAD; } // Get all command line options params = Utils.parseParameters(cl); //Get client instance AerospikeClient client = new AerospikeClient(params.host, params.port); if (!client.isConnected()) { log.error("Client is not able to connect:" + params.host + ":" + params.port); return; } if (params.verbose) { log.setLevel(Level.DEBUG); } // Get available processors to calculate default number of threads int cpus = Runtime.getRuntime().availableProcessors(); nWriterThreads = cpus * scaleFactor; nReaderThreads = cpus; // Get writer thread count if (cl.hasOption("wt")) { nWriterThreads = Integer.parseInt(cl.getOptionValue("wt")); nWriterThreads = (nWriterThreads > 0 ? (nWriterThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nWriterThreads) : 1); log.debug("Using writer Threads: " + nWriterThreads); } writerPool = Executors.newFixedThreadPool(nWriterThreads); // Get reader thread count if (cl.hasOption("rt")) { nReaderThreads = Integer.parseInt(cl.getOptionValue("rt")); nReaderThreads = (nReaderThreads > 0 ? (nReaderThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nReaderThreads) : 1); log.debug("Using reader Threads: " + nReaderThreads); } String columnDefinitionFileName = cl.getOptionValue("c", null); log.debug("Column definition files/directory: " + columnDefinitionFileName); if (columnDefinitionFileName == null) { log.error("Column definition files/directory not specified. use -c <file name>"); return; } File columnDefinitionFile = new File(columnDefinitionFileName); if (!columnDefinitionFile.exists()) { log.error("Column definition files/directory does not exist: " + Utils.getFileName(columnDefinitionFileName)); return; } // Get data file list String[] files = cl.getArgs(); if (files.length == 0) { log.error("No data file Specified: add <file/dir name> to end of the command "); return; } List<String> allFileNames = new ArrayList<String>(); allFileNames = Utils.getFileNames(files); if (allFileNames.size() == 0) { log.error("Given datafiles/directory does not exist"); return; } for (int i = 0; i < allFileNames.size(); i++) { log.debug("File names:" + Utils.getFileName(allFileNames.get(i))); File file = new File(allFileNames.get(i)); counters.write.recordTotal = counters.write.recordTotal + file.length(); } //remove column definition file from list allFileNames.remove(columnDefinitionFileName); log.info("Number of data files:" + allFileNames.size()); /** * Process column definition file to get meta data and bin mapping. */ metadataColumnDefs = new ArrayList<ColumnDefinition>(); binColumnDefs = new ArrayList<ColumnDefinition>(); metadataConfigs = new HashMap<String, String>(); if (Parser.processJSONColumnDefinitions(columnDefinitionFile, metadataConfigs, metadataColumnDefs, binColumnDefs, params)) { log.info("Config file processed."); } else { throw new Exception("Config file parsing Error"); } // Add metadata of config to parameters String metadata; if ((metadata = metadataConfigs.get(Constants.INPUT_TYPE)) != null) { params.fileType = metadata; if (params.fileType.equals(Constants.CSV_FILE)) { // Version check metadata = metadataConfigs.get(Constants.VERSION); String[] vNumber = metadata.split("\\."); int v1 = Integer.parseInt(vNumber[0]); int v2 = Integer.parseInt(vNumber[1]); if ((v1 <= Constants.MajorV) && (v2 <= Constants.MinorV)) { log.debug("Config version used:" + metadata); } else throw new Exception("\"" + Constants.VERSION + ":" + metadata + "\" is not Supported"); // Set delimiter if ((metadata = metadataConfigs.get(Constants.DELIMITER)) != null && metadata.length() == 1) { params.delimiter = metadata.charAt(0); } else { log.warn("\"" + Constants.DELIMITER + ":" + metadata + "\" is not properly specified in config file. Default is ','"); } if ((metadata = metadataConfigs.get(Constants.IGNORE_FIRST_LINE)) != null) { params.ignoreFirstLine = metadata.equals("true"); } else { log.warn("\"" + Constants.IGNORE_FIRST_LINE + ":" + metadata + "\" is not properly specified in config file. Default is false"); } if ((metadata = metadataConfigs.get(Constants.COLUMNS)) != null) { counters.write.colTotal = Integer.parseInt(metadata); } else { throw new Exception("\"" + Constants.COLUMNS + ":" + metadata + "\" is not properly specified in config file"); } } else { throw new Exception("\"" + params.fileType + "\" is not supported in config file"); } } else { throw new Exception("\"" + Constants.INPUT_TYPE + "\" is not specified in config file"); } // add config input to column definitions if (params.fileType.equals(Constants.CSV_FILE)) { List<String> binName = null; if (params.ignoreFirstLine) { String line; BufferedReader br = new BufferedReader( new InputStreamReader(new FileInputStream(allFileNames.get(0)), "UTF8")); if ((line = br.readLine()) != null) { binName = Parser.getCSVRawColumns(line, params.delimiter); br.close(); if (binName.size() != counters.write.colTotal) { throw new Exception("Number of column in config file and datafile are mismatch." + " Datafile: " + Utils.getFileName(allFileNames.get(0)) + " Configfile: " + Utils.getFileName(columnDefinitionFileName)); } } } //update columndefs for metadata for (int i = 0; i < metadataColumnDefs.size(); i++) { if (metadataColumnDefs.get(i).staticValue) { } else { if (metadataColumnDefs.get(i).binValuePos < 0) { if (metadataColumnDefs.get(i).columnName == null) { if (metadataColumnDefs.get(i).jsonPath == null) { log.error("dynamic metadata having improper info" + metadataColumnDefs.toString()); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.indexOf(metadataColumnDefs.get(i).binValueHeader) != -1) { metadataColumnDefs.get(i).binValuePos = binName .indexOf(metadataColumnDefs.get(i).binValueHeader); } else { throw new Exception("binName missing in data file:" + metadataColumnDefs.get(i).binValueHeader); } } } } else { if (params.ignoreFirstLine) metadataColumnDefs.get(i).binValueHeader = binName .get(metadataColumnDefs.get(i).binValuePos); } } if ((!metadataColumnDefs.get(i).staticValue) && (metadataColumnDefs.get(i).binValuePos < 0)) { throw new Exception("Information for bin mapping is missing in config file:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).srcType == null) { throw new Exception( "Source data type is not properly mentioned:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).binNameHeader == Constants.SET && !metadataColumnDefs.get(i).srcType.equals(SrcColumnType.STRING)) { throw new Exception("Set name should be string type:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).binNameHeader.equalsIgnoreCase(Constants.SET) && params.set != null) { throw new Exception( "Set name is given both in config file and commandline. Provide only once."); } } //update columndefs for bins for (int i = 0; i < binColumnDefs.size(); i++) { if (binColumnDefs.get(i).staticName) { } else { if (binColumnDefs.get(i).binNamePos < 0) { if (binColumnDefs.get(i).columnName == null) { if (binColumnDefs.get(i).jsonPath == null) { log.error("dynamic bin having improper info"); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.indexOf(binColumnDefs.get(i).binNameHeader) != -1) { binColumnDefs.get(i).binNamePos = binName .indexOf(binColumnDefs.get(i).binNameHeader); } else { throw new Exception("binName missing in data file:" + binColumnDefs.get(i).binNameHeader); } } } } else { if (params.ignoreFirstLine) binColumnDefs.get(i).binNameHeader = binName.get(binColumnDefs.get(i).binNamePos); } } if (binColumnDefs.get(i).staticValue) { } else { if (binColumnDefs.get(i).binValuePos < 0) { if (binColumnDefs.get(i).columnName == null) { if (binColumnDefs.get(i).jsonPath == null) { log.error("dynamic bin having improper info"); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.contains(binColumnDefs.get(i).binValueHeader)) { binColumnDefs.get(i).binValuePos = binName .indexOf(binColumnDefs.get(i).binValueHeader); } else if (!binColumnDefs.get(i).binValueHeader.toLowerCase() .equals(Constants.SYSTEM_TIME)) { throw new Exception("Wrong column name mentioned in config file:" + binColumnDefs.get(i).binValueHeader); } } } } else { if (params.ignoreFirstLine) binColumnDefs.get(i).binValueHeader = binName.get(binColumnDefs.get(i).binValuePos); } //check for missing entries in config file if (binColumnDefs.get(i).binValuePos < 0 && binColumnDefs.get(i).binValueHeader == null) { throw new Exception("Information missing(Value header or bin mapping) in config file:" + binColumnDefs.get(i)); } //check for proper data type in config file. if (binColumnDefs.get(i).srcType == null) { throw new Exception( "Source data type is not properly mentioned:" + binColumnDefs.get(i)); } //check for valid destination type if ((binColumnDefs.get(i).srcType.equals(SrcColumnType.TIMESTAMP) || binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB)) && binColumnDefs.get(i).dstType == null) { throw new Exception("Destination type is not mentioned: " + binColumnDefs.get(i)); } //check for encoding if (binColumnDefs.get(i).dstType != null && binColumnDefs.get(i).encoding == null) { throw new Exception( "Encoding is not given for src-dst type conversion:" + binColumnDefs.get(i)); } //check for valid encoding if (binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB) && !binColumnDefs.get(i).encoding.equals(Constants.HEX_ENCODING)) { throw new Exception("Wrong encoding for blob data:" + binColumnDefs.get(i)); } } //Check static bin name mapped to dynamic bin value if ((binColumnDefs.get(i).binNamePos == binColumnDefs.get(i).binValuePos) && (binColumnDefs.get(i).binNamePos != -1)) { throw new Exception("Static bin name mapped to dynamic bin value:" + binColumnDefs.get(i)); } //check for missing entries in config file if (binColumnDefs.get(i).binNameHeader == null && binColumnDefs.get(i).binNameHeader.length() > Constants.BIN_NAME_LENGTH) { throw new Exception("Information missing binName or large binName in config file:" + binColumnDefs.get(i)); } } } log.info(params.toString()); log.debug("MetadataConfig:" + metadataColumnDefs); log.debug("BinColumnDefs:" + binColumnDefs); // Start PrintStat thread statPrinter.start(); // Reader pool size ExecutorService readerPool = Executors.newFixedThreadPool( nReaderThreads > allFileNames.size() ? allFileNames.size() : nReaderThreads); log.info("Reader pool size : " + nReaderThreads); // Submit all tasks to writer threadpool. for (String aFile : allFileNames) { log.debug("Submitting task for: " + aFile); readerPool.submit(new AerospikeLoad(aFile, client, params)); } // Wait for reader pool to complete readerPool.shutdown(); log.info("Shutdown down reader thread pool"); while (!readerPool.isTerminated()) ; //readerPool.awaitTermination(20, TimeUnit.MINUTES); log.info("Reader thread pool terminated"); // Wait for writer pool to complete after getting all tasks from reader pool writerPool.shutdown(); log.info("Shutdown down writer thread pool"); while (!writerPool.isTerminated()) ; log.info("Writer thread pool terminated"); // Print final statistic of aerospike-loader. log.info("Final Statistics of importer: (Succesfull Writes = " + counters.write.writeCount.get() + ", " + "Errors=" + (counters.write.writeErrors.get() + counters.write.readErrors.get() + counters.write.processingErrors.get()) + "(" + (counters.write.writeErrors.get()) + "-Write," + counters.write.readErrors.get() + "-Read," + counters.write.processingErrors.get() + "-Processing)"); } catch (Exception e) { log.error(e); if (log.isDebugEnabled()) { e.printStackTrace(); } } finally { // Stop statistic printer thread. statPrinter.interrupt(); log.info("Aerospike loader completed"); } }
From source file:EchoClient.java
public static void main(String[] args) throws IOException { Socket kkSocket = null;// w w w. j av a 2 s .c o m PrintWriter out = null; BufferedReader in = null; try { kkSocket = new Socket("taranis", 4444); out = new PrintWriter(kkSocket.getOutputStream(), true); in = new BufferedReader(new InputStreamReader(kkSocket.getInputStream())); } catch (UnknownHostException e) { System.err.println("Don't know about host: taranis."); System.exit(1); } catch (IOException e) { System.err.println("Couldn't get I/O for the connection to: taranis."); System.exit(1); } BufferedReader stdIn = new BufferedReader(new InputStreamReader(System.in)); String fromServer; String fromUser; while ((fromServer = in.readLine()) != null) { System.out.println("Server: " + fromServer); if (fromServer.equals("Bye.")) break; fromUser = stdIn.readLine(); if (fromUser != null) { System.out.println("Client: " + fromUser); out.println(fromUser); } } out.close(); in.close(); stdIn.close(); kkSocket.close(); }
From source file:mx.com.pixup.portal.demo.DemoDisqueraUpdate.java
public static void main(String[] args) { System.out.println("BIENVENIDO A PIXUP"); System.out.println("Mantenimiento catlogo disquera"); System.out.println("Actualizacin de Disquera"); InputStreamReader isr = new InputStreamReader(System.in); BufferedReader br = new BufferedReader(isr); Connection connection = null; Statement statement = null;/*from w w w . j av a 2 s . c om*/ ResultSet resultSet = null; try { BasicDataSource dataSource = new BasicDataSource(); dataSource.setDriverClassName("com.mysql.jdbc.Driver"); dataSource.setUsername("root"); dataSource.setPassword("admin"); dataSource.setUrl("jdbc:mysql://127.0.0.1:3306/pixup"); connection = dataSource.getConnection(); statement = connection.createStatement(); String sql = "select id, nombre from disquera order by nombre"; resultSet = statement.executeQuery(sql); System.out.println("Id Disquera: \t Nombre Disquera"); while (resultSet.next()) { System.out.println(resultSet.getInt("id") + " \t " + resultSet.getString("nombre")); } System.out.println("Proporcione el id de la disquera a actualizar: "); String idDisquera = br.readLine(); System.out.println("Proporcione el nuevo nombre de la disquera: "); String nombreDisquera = br.readLine(); sql = "update disquera set nombre = '" + nombreDisquera + "' where id = " + idDisquera; statement.execute(sql); System.out.println("Disqueras Actualizadas:"); sql = "select id, nombre from disquera order by nombre desc"; resultSet = statement.executeQuery(sql); System.out.println("Id Disquera: \t Nombre Disquera"); while (resultSet.next()) { System.out.println(resultSet.getInt("id") + " \t " + resultSet.getString("nombre")); } } catch (Exception e) { System.out.println("Error en el sistema, intente ms tarde!!"); } finally { if (resultSet != null) { try { resultSet.close(); } catch (Exception e) { } } if (statement != null) { try { statement.close(); } catch (Exception e) { } } if (connection != null) { try { connection.close(); } catch (Exception e) { } } } }
From source file:edu.nyu.tandon.tool.BinnedRawHits.java
@SuppressWarnings("unchecked") public static void main(final String[] arg) throws Exception { SimpleJSAP jsap = new SimpleJSAP(BinnedRawHits.class.getName(), "Loads indices relative to a collection, possibly loads the collection, and answers to queries.", new Parameter[] { new FlaggedOption("collection", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c', "collection", "The collection of documents indexed by the given indices."), new FlaggedOption("objectCollection", new ObjectParser(DocumentCollection.class, MG4JClassParser.PACKAGE), JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "object-collection", "An object specification describing a document collection."), new FlaggedOption("titleList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 't', "title-list", "A serialized big list of titles (will override collection titles if specified)."), new FlaggedOption("titleFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'T', "title-file", "A file of newline-separated, UTF-8 titles (will override collection titles if specified)."), new FlaggedOption("input", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'I', "input", "A file containing the input."), new Switch("noSizes", 'n', "no-sizes", "Disable loading document sizes (they are necessary for BM25 scoring)."), new Switch("http", 'h', "http", "Starts an HTTP query server."), new Switch("verbose", 'v', "verbose", "Print full exception stack traces."), new FlaggedOption("itemClass", MG4JClassParser.getParser(), JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'i', "item-class", "The class that will handle item display in the HTTP server."), new FlaggedOption("itemMimeType", JSAP.STRING_PARSER, "text/html", JSAP.NOT_REQUIRED, 'm', "item-mime-type", "A MIME type suggested to the class handling item display in the HTTP server."), new FlaggedOption("port", JSAP.INTEGER_PARSER, "4242", JSAP.NOT_REQUIRED, 'p', "port", "The port on localhost where the server will appear."), new UnflaggedOption("basenameWeight", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.GREEDY, "The indices that the servlet will use. Indices are specified using their basename, optionally followed by a colon and a double representing the weight used to score results from that index. Indices without a specified weight are weighted 1."), new Switch("noMplex", 'P', "noMplex", "Starts with multiplex disabled."), new FlaggedOption("results", JSAP.INTEGER_PARSER, "1000", JSAP.NOT_REQUIRED, 'r', "results", "The # of results to display"), new FlaggedOption("mode", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'M', "time", "The results display mode"), new FlaggedOption("divert", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "divert", "output file"), new FlaggedOption("dumpsize", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'D', "dumpsize", "number of queries before dumping") });//from ww w.j a v a2 s . com final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return; final DocumentCollection documentCollection = (DocumentCollection) (jsapResult.userSpecified("collection") ? AbstractDocumentSequence.load(jsapResult.getString("collection")) : jsapResult.userSpecified("objectCollection") ? jsapResult.getObject("objectCollection") : null); final BigList<? extends CharSequence> titleList = (BigList<? extends CharSequence>) (jsapResult .userSpecified("titleList") ? BinIO.loadObject(jsapResult.getString("titleList")) : jsapResult.userSpecified("titleFile") ? new FileLinesBigList(jsapResult.getString("titleFile"), "UTF-8") : null); final String[] basenameWeight = jsapResult.getStringArray("basenameWeight"); final Object2ReferenceLinkedOpenHashMap<String, Index> indexMap = new Object2ReferenceLinkedOpenHashMap<String, Index>( Hash.DEFAULT_INITIAL_SIZE, .5f); final Reference2DoubleOpenHashMap<Index> index2Weight = new Reference2DoubleOpenHashMap<Index>(); final boolean verbose = jsapResult.getBoolean("verbose"); final boolean loadSizes = !jsapResult.getBoolean("noSizes"); BinnedRawHits.loadIndicesFromSpec(basenameWeight, loadSizes, documentCollection, indexMap, index2Weight); final long numberOfDocuments = indexMap.values().iterator().next().numberOfDocuments; if (titleList != null && titleList.size64() != numberOfDocuments) throw new IllegalArgumentException("The number of titles (" + titleList.size64() + " and the number of documents (" + numberOfDocuments + ") do not match"); final Object2ObjectOpenHashMap<String, TermProcessor> termProcessors = new Object2ObjectOpenHashMap<String, TermProcessor>( indexMap.size()); for (String alias : indexMap.keySet()) termProcessors.put(alias, indexMap.get(alias).termProcessor); final SimpleParser simpleParser = new SimpleParser(indexMap.keySet(), indexMap.firstKey(), termProcessors); final Reference2ReferenceMap<Index, Object> index2Parser = new Reference2ReferenceOpenHashMap<Index, Object>(); /* // Fetch parsers for payload-based fields. for( Index index: indexMap.values() ) if ( index.hasPayloads ) { if ( index.payload.getClass() == DatePayload.class ) index2Parser.put( index, DateFormat.getDateInstance( DateFormat.SHORT, Locale.UK ) ); } */ final HitsQueryEngine queryEngine = new HitsQueryEngine(simpleParser, new DocumentIteratorBuilderVisitor( indexMap, index2Parser, indexMap.get(indexMap.firstKey()), MAX_STEMMING), indexMap); queryEngine.setWeights(index2Weight); queryEngine.score(new Scorer[] { new BM25Scorer(), new VignaScorer() }, new double[] { 1, 1 }); queryEngine.multiplex = !jsapResult.userSpecified("moPlex") || jsapResult.getBoolean("noMplex"); queryEngine.intervalSelector = null; queryEngine.equalize(1000); BinnedRawHits query = new BinnedRawHits(queryEngine); // start docHits with at least 10K results query.interpretCommand("$score BM25Scorer"); query.interpretCommand("$mode time"); if (jsapResult.userSpecified("divert")) query.interpretCommand("$divert " + jsapResult.getObject("divert")); query.displayMode = OutputType.DOCHHITS; query.maxOutput = jsapResult.getInt("results", 10000); String q; int n = 0; int dumpsize = jsapResult.userSpecified("dumpsize") ? jsapResult.getInt("dumpsize", 10000) : 10000; buildBins(query.maxOutput, (int) numberOfDocuments); String lastQ = ""; try { final BufferedReader br = new BufferedReader( new InputStreamReader(new FileInputStream(jsapResult.getString("input")))); final ObjectArrayList<DocumentScoreInfo<ObjectArrayList<Byte>>> results = new ObjectArrayList<DocumentScoreInfo<ObjectArrayList<Byte>>>(); for (;;) { q = br.readLine(); if (q == null) { System.err.println(); break; // CTRL-D } if (q.length() == 0) continue; if (q.charAt(0) == '$') { if (!query.interpretCommand(q)) break; continue; } queryCount++; long time = -System.nanoTime(); if (q.compareTo(lastQ) != 0) { try { n = queryEngine.process(q, 0, query.maxOutput, results); } catch (QueryParserException e) { if (verbose) e.getCause().printStackTrace(System.err); else System.err.println(e.getCause()); continue; } catch (Exception e) { if (verbose) e.printStackTrace(System.err); else System.err.println(e); continue; } lastQ = q; } time += System.nanoTime(); query.output(results, documentCollection, titleList, TextMarker.TEXT_BOLDFACE); // dump batch if (queryCount % dumpsize == 0) { dumpBatch(query, numberOfDocuments, false); } // check postHits if (query.postHits.size() > 100000000) dumpPosthits(query, numberOfDocuments, false); } } finally { dumpBatch(query, numberOfDocuments, true); dumpPosthits(query, numberOfDocuments, true); if (query.outputDH != System.out) query.outputDH.close(); } }
From source file:net.iiit.siel.analysis.lang.LanguageIdentifier.java
/** * The main method./*from www. ja v a2 s. c o m*/ * * @param args the arguments */ public static void main(String args[]) { String usage = "Usage: LanguageIdentifier " + "[-identifyrows filename maxlines] " + "[-identifyfile charset filename] " + "[-identifyfileset charset files] " + "[-identifytext text] " + "[-identifyurl url]"; int command = 0; final int IDFILE = 1; final int IDTEXT = 2; final int IDURL = 3; final int IDFILESET = 4; final int IDROWS = 5; Vector fileset = new Vector(); String filename = ""; String charset = ""; String url = ""; String text = ""; int max = 0; // TODO niket writing test args here.. /* args = new String[2]; args[0] = "-identifyurl"; args[1] = "file:/home1/niket/TamilSamplePage.html"; //args[2] = "/home1/niket/nutch-clia/input.txt"; */ // TODO niket end here if (args.length == 0) { System.err.println(usage); System.exit(-1); } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].equals("-identifyfile")) { command = IDFILE; charset = args[++i]; filename = args[++i]; } if (args[i].equals("-identifyurl")) { command = IDURL; filename = args[++i]; } if (args[i].equals("-identifyrows")) { command = IDROWS; filename = args[++i]; max = Integer.parseInt(args[++i]); } if (args[i].equals("-identifytext")) { command = IDTEXT; for (i++; i < args.length - 1; i++) text += args[i] + " "; } if (args[i].equals("-identifyfileset")) { command = IDFILESET; charset = args[++i]; for (i++; i < args.length; i++) { File[] files = null; File f = new File(args[i]); if (f.isDirectory()) { files = f.listFiles(); } else { files = new File[] { f }; } for (int j = 0; j < files.length; j++) { fileset.add(files[j].getAbsolutePath()); } } } } Configuration conf = NutchConfiguration.create(); String lang = null; LanguageIdentifier idfr = new LanguageIdentifier(conf); File f; FileInputStream fis; try { switch (command) { case IDTEXT: lang = idfr.identify(text); System.out.println("Lang :" + lang); break; case IDFILE: f = new File(filename); fis = new FileInputStream(f); lang = idfr.identify(fis, charset); fis.close(); break; case IDURL: lang = LangIdentifierUtility.IdentifyLangFromURLDirectly(filename); /* * our url identifier is confused or couldn't identify lang from * URL */ if (lang == null || lang.equalsIgnoreCase("en")) { System.out.println("Ambuguity in identifying language from URL"); } else { System.out.println("Lang was identified(using URL) as: " + lang); } break; case IDROWS: f = new File(filename); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line; while (max > 0 && (line = br.readLine()) != null) { line = line.trim(); if (line.length() > 2) { max--; lang = idfr.identify(line); System.out.println("R=" + lang + ":" + line); } } br.close(); System.exit(0); break; case IDFILESET: /* * used for benchs for (int j=128; j<=524288; j*=2) { long start * = System.currentTimeMillis(); idfr.analyzeLength = j; */ System.out.println("FILESET"); Iterator i = fileset.iterator(); while (i.hasNext()) { try { filename = (String) i.next(); f = new File(filename); fis = new FileInputStream(f); lang = idfr.identify(fis, charset); fis.close(); } catch (Exception e) { System.out.println(e); } System.out.println(filename + " was identified as " + lang); } /* * used for benchs System.out.println(j + "/" + * (System.currentTimeMillis()-start)); } */ System.exit(0); break; } } catch (Exception e) { System.out.println(e); System.out.println("lang could not be identified properly"); e.printStackTrace(); } System.out.println("text was identified as " + lang); /* * DONOT delete the next few lines, they should be enabled, when a lang. * mapping map needs to be generated. TODO this is for printing * the hashMapRangeLangIDTable only * * idfr.langMarkerObject.printHashmapTableWithFormatting(); * * System.out * .println("\n\n\n Printing english text contents in this file:\n"); * System.out.println(idfr.langMarkerObject.getLangCharacters( * LanguageIdentifierConstants.LangShortNames.ENGLISH * .langShortName()).toString()); * * System.out * .println("\n\n\n Printing telugu text contents in this file:\n"); * System.out.println(idfr.langMarkerObject.getLangCharacters( * LanguageIdentifierConstants.LangShortNames.TELUGU * .langShortName()).toString()); * * System.out * .println("\n\n\n Printing unknown text contents in this file:\n"); * System.out.println(idfr.langMarkerObject.getLangCharacters( * LanguageIdentifierConstants.LangShortNames.UNKNOWN_LANG * .langShortName()).toString()); */ }
From source file:de.unileipzig.ub.indexer.App.java
public static void main(String[] args) throws IOException { // create Options object Options options = new Options(); options.addOption("h", "help", false, "display this help"); options.addOption("f", "filename", true, "name of the JSON file whose content should be indexed"); options.addOption("i", "index", true, "the name of the target index"); options.addOption("d", "doctype", true, "the name of the doctype (title, local, ...)"); options.addOption("t", "host", true, "elasticsearch hostname (default: 0.0.0.0)"); options.addOption("p", "port", true, "transport port (that's NOT the http port, default: 9300)"); options.addOption("c", "cluster", true, "cluster name (default: elasticsearch_mdma)"); options.addOption("b", "bulksize", true, "number of docs sent in one request (default: 3000)"); options.addOption("v", "verbose", false, "show processing speed while indexing"); options.addOption("s", "status", false, "only show status of index for file"); options.addOption("r", "repair", false, "attempt to repair recoverable inconsistencies on the go"); options.addOption("e", "debug", false, "set logging level to debug"); options.addOption("l", "logfile", true, "logfile - in not specified only log to stdout"); options.addOption("m", "memcached", true, "host and port of memcached (default: localhost:11211)"); options.addOption("z", "latest-flag-on", true, "enable latest flag according to field (within content, e.g. 001)"); options.addOption("a", "flat", false, "flat-mode: do not check for inconsistencies"); CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/* ww w .j a v a 2s . co m*/ try { cmd = parser.parse(options, args); } catch (ParseException ex) { logger.error(ex); System.exit(1); } // setup logging Properties systemProperties = System.getProperties(); systemProperties.put("net.spy.log.LoggerImpl", "net.spy.memcached.compat.log.Log4JLogger"); System.setProperties(systemProperties); Logger.getLogger("net.spy.memcached").setLevel(Level.ERROR); Properties props = new Properties(); props.load(props.getClass().getResourceAsStream("/log4j.properties")); if (cmd.hasOption("debug")) { props.setProperty("log4j.logger.de.unileipzig", "DEBUG"); } if (cmd.hasOption("logfile")) { props.setProperty("log4j.rootLogger", "INFO, stdout, F"); props.setProperty("log4j.appender.F", "org.apache.log4j.FileAppender"); props.setProperty("log4j.appender.F.File", cmd.getOptionValue("logfile")); props.setProperty("log4j.appender.F.layout", "org.apache.log4j.PatternLayout"); props.setProperty("log4j.appender.F.layout.ConversionPattern", "%5p | %d | %F | %L | %m%n"); } PropertyConfigurator.configure(props); InetAddress addr = InetAddress.getLocalHost(); String memcachedHostAndPort = addr.getHostAddress() + ":11211"; if (cmd.hasOption("m")) { memcachedHostAndPort = cmd.getOptionValue("m"); } // setup caching try { if (memcachedClient == null) { memcachedClient = new MemcachedClient( new ConnectionFactoryBuilder().setFailureMode(FailureMode.Cancel).build(), AddrUtil.getAddresses("0.0.0.0:11211")); try { // give client and server 500ms Thread.sleep(300); } catch (InterruptedException ex) { } Collection availableServers = memcachedClient.getAvailableServers(); logger.info(availableServers); if (availableServers.size() == 0) { logger.info("no memcached servers found"); memcachedClient.shutdown(); memcachedClient = null; } else { logger.info(availableServers.size() + " memcached server(s) detected, fine."); } } } catch (IOException ex) { logger.warn("couldn't create a connection, bailing out: " + ex.getMessage()); } // process options if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("indexer", options, true); quit(0); } boolean verbose = false; if (cmd.hasOption("verbose")) { verbose = true; } // ES options String[] hosts = new String[] { "0.0.0.0" }; int port = 9300; String clusterName = "elasticsearch_mdma"; int bulkSize = 3000; if (cmd.hasOption("host")) { hosts = cmd.getOptionValues("host"); } if (cmd.hasOption("port")) { port = Integer.parseInt(cmd.getOptionValue("port")); } if (cmd.hasOption("cluster")) { clusterName = cmd.getOptionValue("cluster"); } if (cmd.hasOption("bulksize")) { bulkSize = Integer.parseInt(cmd.getOptionValue("bulksize")); if (bulkSize < 1 || bulkSize > 100000) { logger.error("bulksize must be between 1 and 100,000"); quit(1); } } // ES Client final Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "elasticsearch_mdma") .build(); final TransportClient client = new TransportClient(settings); for (String host : hosts) { client.addTransportAddress(new InetSocketTransportAddress(host, port)); } if (cmd.hasOption("filename") && cmd.hasOption("index") && cmd.hasOption("doctype")) { final String filename = cmd.getOptionValue("filename"); final File _file = new File(filename); if (_file.length() == 0) { logger.info(_file.getAbsolutePath() + " is empty, skipping"); quit(0); // file is empty } // for flat mode: leave a stampfile beside the json to // indicate previous successful processing File directory = new File(filename).getParentFile(); File stampfile = new File(directory, DigestUtils.shaHex(filename) + ".indexed"); long start = System.currentTimeMillis(); long lineCount = 0; final String indexName = cmd.getOptionValue("index"); final String docType = cmd.getOptionValue("doctype"); BulkRequestBuilder bulkRequest = client.prepareBulk(); try { if (cmd.hasOption("flat")) { // flat mode // ......... if (stampfile.exists()) { logger.info("SKIPPING, since it seems this file has already " + "been imported (found: " + stampfile.getAbsolutePath() + ")"); quit(0); } } else { final String srcSHA1 = extractSrcSHA1(filename); logger.debug(filename + " srcsha1: " + srcSHA1); long docsInIndex = getIndexedRecordCount(client, indexName, srcSHA1); logger.debug(filename + " indexed: " + docsInIndex); long docsInFile = getLineCount(filename); logger.debug(filename + " lines: " + docsInFile); // in non-flat-mode, indexing would take care // of inconsistencies if (docsInIndex == docsInFile) { logger.info("UP-TO DATE: " + filename + " (" + docsInIndex + ", " + srcSHA1 + ")"); client.close(); quit(0); } if (docsInIndex > 0) { logger.warn("INCONSISTENCY DETECTED: " + filename + ": indexed:" + docsInIndex + " lines:" + docsInFile); if (!cmd.hasOption("r")) { logger.warn( "Please re-run indexer with --repair flag or delete residues first with: $ curl -XDELETE " + hosts[0] + ":9200/" + indexName + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'"); client.close(); quit(1); } else { logger.info("Attempting to clear residues..."); // attempt to repair once DeleteByQueryResponse dbqr = client.prepareDeleteByQuery(indexName) .setQuery(termQuery("meta.srcsha1", srcSHA1)).execute().actionGet(); Iterator<IndexDeleteByQueryResponse> it = dbqr.iterator(); long deletions = 0; while (it.hasNext()) { IndexDeleteByQueryResponse response = it.next(); deletions += 1; } logger.info("Deleted residues of " + filename); logger.info("Refreshing [" + indexName + "]"); RefreshResponse refreshResponse = client.admin().indices() .refresh(new RefreshRequest(indexName)).actionGet(); long indexedAfterDelete = getIndexedRecordCount(client, indexName, srcSHA1); logger.info(indexedAfterDelete + " docs remained"); if (indexedAfterDelete > 0) { logger.warn("Not all residues cleaned. Try to fix this manually: $ curl -XDELETE " + hosts[0] + ":9200/" + indexName + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'"); quit(1); } else { logger.info("Residues are gone. Now trying to reindex: " + filename); } } } } logger.info("INDEXING-REQUIRED: " + filename); if (cmd.hasOption("status")) { quit(0); } HashSet idsInBatch = new HashSet(); String idField = null; if (cmd.hasOption("z")) { idField = cmd.getOptionValue("z"); } final FileReader fr = new FileReader(filename); final BufferedReader br = new BufferedReader(fr); String line; // one line is one document while ((line = br.readLine()) != null) { // "Latest-Flag" machine // This gets obsolete with a "flat" index if (cmd.hasOption("z")) { // flag that indicates, whether the document // about to be indexed will be the latest boolean willBeLatest = true; // check if there is a previous (lower meta.timestamp) document with // the same identifier (whatever that may be - queried under "content") final String contentIdentifier = getContentIdentifier(line, idField); idsInBatch.add(contentIdentifier); // assumed in meta.timestamp final Long timestamp = Long.parseLong(getTimestamp(line)); logger.debug("Checking whether record is latest (line: " + lineCount + ")"); logger.debug(contentIdentifier + ", " + timestamp); // get all docs, which match the contentIdentifier // by filter, which doesn't score final TermFilterBuilder idFilter = new TermFilterBuilder("content." + idField, contentIdentifier); final TermFilterBuilder kindFilter = new TermFilterBuilder("meta.kind", docType); final AndFilterBuilder afb = new AndFilterBuilder(); afb.add(idFilter).add(kindFilter); final FilteredQueryBuilder fb = filteredQuery(matchAllQuery(), afb); final SearchResponse searchResponse = client.prepareSearch(indexName) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(fb).setFrom(0) .setSize(1200) // 3 years and 105 days assuming daily updates at the most .setExplain(false).execute().actionGet(); final SearchHits searchHits = searchResponse.getHits(); logger.debug("docs with this id in the index: " + searchHits.getTotalHits()); for (final SearchHit hit : searchHits.getHits()) { final String docId = hit.id(); final Map<String, Object> source = hit.sourceAsMap(); final Map meta = (Map) source.get("meta"); final Long docTimestamp = Long.parseLong(meta.get("timestamp").toString()); // if the indexed doc timestamp is lower the the current one, // remove any latest flag if (timestamp >= docTimestamp) { source.remove("latest"); final ObjectMapper mapper = new ObjectMapper(); // put the updated doc back // IndexResponse response = client.prepareIndex(indexName, docType).setCreate(false).setId(docId) .setSource(mapper.writeValueAsBytes(source)) .execute(new ActionListener<IndexResponse>() { public void onResponse(IndexResponse rspns) { logger.debug("Removed latest flag from " + contentIdentifier + ", " + docTimestamp + ", " + hit.id() + " since (" + timestamp + " > " + docTimestamp + ")"); } public void onFailure(Throwable thrwbl) { logger.error("Could not remove flag from " + hit.id() + ", " + contentIdentifier); } }); // .execute() //.actionGet(); } else { logger.debug("Doc " + hit.id() + " is newer (" + docTimestamp + ")"); willBeLatest = false; } } if (willBeLatest) { line = setLatestFlag(line); logger.info("Setting latest flag on " + contentIdentifier + ", " + timestamp); } // end of latest-flag machine // beware - this will be correct as long as there // are no dups within one bulk! } bulkRequest.add(client.prepareIndex(indexName, docType).setSource(line)); lineCount++; logger.debug("Added line " + lineCount + " to BULK"); logger.debug(line); if (lineCount % bulkSize == 0) { if (idsInBatch.size() != bulkSize && cmd.hasOption("z")) { logger.error( "This batch has duplications in the ID. That's not bad for the index, just makes the latest flag fuzzy"); logger.error( "Bulk size was: " + bulkSize + ", but " + idsInBatch.size() + " IDs (only)"); } idsInBatch.clear(); logger.debug("Issuing BULK request"); final long actionCount = bulkRequest.numberOfActions(); final BulkResponse bulkResponse = bulkRequest.execute().actionGet(); final long tookInMillis = bulkResponse.getTookInMillis(); if (bulkResponse.hasFailures()) { logger.fatal("FAILED, bulk not indexed. exiting now."); Iterator<BulkItemResponse> it = bulkResponse.iterator(); while (it.hasNext()) { BulkItemResponse bir = it.next(); if (bir.isFailed()) { Failure failure = bir.getFailure(); logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage() + ", type: " + failure.getType() + ", index: " + failure.getIndex()); } } quit(1); } else { if (verbose) { final double elapsed = System.currentTimeMillis() - start; final double speed = (lineCount / elapsed * 1000); logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)"); } } bulkRequest = client.prepareBulk(); } } // handle the remaining items final long actionCount = bulkRequest.numberOfActions(); if (actionCount > 0) { final BulkResponse bulkResponse = bulkRequest.execute().actionGet(); final long tookInMillis = bulkResponse.getTookInMillis(); if (bulkResponse.hasFailures()) { logger.fatal("FAILED, bulk not indexed. exiting now."); Iterator<BulkItemResponse> it = bulkResponse.iterator(); while (it.hasNext()) { BulkItemResponse bir = it.next(); if (bir.isFailed()) { Failure failure = bir.getFailure(); logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage() + ", type: " + failure.getType() + ", index: " + failure.getIndex()); } } quit(1); } else { // trigger update now RefreshResponse refreshResponse = client.admin().indices() .refresh(new RefreshRequest(indexName)).actionGet(); if (verbose) { final double elapsed = System.currentTimeMillis() - start; final double speed = (lineCount / elapsed * 1000); logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)"); } } } br.close(); client.close(); final double elapsed = (System.currentTimeMillis() - start) / 1000; final double speed = (lineCount / elapsed); logger.info("indexing (" + filename + ") " + lineCount + " docs took " + elapsed + "s (speed: " + String.format("%.2f", speed) + "r/s)"); if (cmd.hasOption("flat")) { try { FileUtils.touch(stampfile); } catch (IOException ioe) { logger.warn(".indexed files not created. Will reindex everything everytime."); } } } catch (IOException e) { client.close(); logger.error(e); quit(1); } finally { client.close(); } } quit(0); }
From source file:org.fcrepo.client.test.PerformanceTests.java
public static void main(String[] args) throws Exception { if (args.length < 8 || args.length > 9) { usage();//w w w. jav a 2s . c om } String host = args[0]; String port = args[1]; String username = args[2]; String password = args[3]; String itr = args[4]; String thrds = args[5]; String output = args[6]; String name = args[7]; String context = Constants.FEDORA_DEFAULT_APP_CONTEXT; if (args.length == 9 && !args[8].equals("")) { context = args[8]; } if (host == null || host.startsWith("$") || port == null || port.startsWith("$") || username == null || username.startsWith("$") || password == null || password.startsWith("$") || itr == null || itr.startsWith("$") || thrds == null || thrds.startsWith("$") || output == null || output.startsWith("$") || name == null || name.startsWith("$")) { usage(); } name = name.replaceAll(",", ";"); iterations = Integer.parseInt(itr); threads = Integer.parseInt(thrds); boolean newFile = true; File outputFile = new File(output); File tempFile = null; BufferedReader reader = null; String line = ""; if (outputFile.exists()) { newFile = false; // Create a copy of the file to read from tempFile = File.createTempFile("performance-test", "tmp"); BufferedReader input = new BufferedReader(new FileReader(outputFile)); PrintStream tempOut = new PrintStream(tempFile); while ((line = input.readLine()) != null) { tempOut.println(line); } input.close(); tempOut.close(); reader = new BufferedReader(new FileReader(tempFile)); } PrintStream out = new PrintStream(outputFile); if (newFile) { out.println( "--------------------------------------------------------------" + " Performance Test Results " + "--------------------------------------------------------------"); } PerformanceTests tests = new PerformanceTests(); tests.init(host, port, context, username, password); System.out.println("Running Ingest Round-Trip Test..."); long ingestResults = tests.runIngestTest(); System.out.println("Running AddDatastream Round-Trip Test..."); long addDsResults = tests.runAddDatastreamTest(); System.out.println("Running ModifyDatastreamByReference Round-Trip Test..."); long modifyRefResults = tests.runModifyDatastreamByRefTest(); System.out.println("Running ModifyDatastreamByValue Round-Trip Test..."); long modifyValResults = tests.runModifyDatastreamByValueTest(); System.out.println("Running PurgeDatastream Round-Trip Test..."); long purgeDsResults = tests.runPurgeDatastreamTest(); System.out.println("Running PurgeObject Round-Trip Test..."); long purgeObjectResults = tests.runPurgeObjectTest(); System.out.println("Running GetDatastream Round-Trip Test..."); long getDatastreamResults = tests.runGetDatastreamTest(); System.out.println("Running GetDatastreamREST Round-Trip Test..."); long getDatastreamRestResults = tests.runGetDatastreamRestTest(); System.out.println("Running Throughput Tests..."); long[] tpResults = tests.runThroughputTests(); System.out.println("Running Threaded Throughput Tests..."); long[] tptResults = tests.runThreadedThroughputTests(); if (newFile) { out.println( "1. Test performing each operation in isolation. Time (in ms) is the average required to perform each operation."); out.println( "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST"); } else { line = reader.readLine(); while (line != null && line.length() > 2) { out.println(line); line = reader.readLine(); } } out.println(name + ", " + ingestResults + ", " + addDsResults + ", " + modifyRefResults + ", " + modifyValResults + ", " + purgeDsResults + ", " + purgeObjectResults + ", " + getDatastreamResults / iterations + ", " + getDatastreamRestResults / iterations); out.println(); if (newFile) { out.println("2. Operations-Per-Second based on results listed in item 1."); out.println( "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST"); } else { line = reader.readLine(); while (line != null && line.length() > 2) { out.println(line); line = reader.readLine(); } } double ingestPerSecond = 1000 / (double) ingestResults; double addDsPerSecond = 1000 / (double) addDsResults; double modifyRefPerSecond = 1000 / (double) modifyRefResults; double modifyValPerSecond = 1000 / (double) modifyValResults; double purgeDsPerSecond = 1000 / (double) purgeDsResults; double purgeObjPerSecond = 1000 / (double) purgeObjectResults; double getDatastreamPerSecond = 1000 / ((double) getDatastreamResults / iterations); double getDatastreamRestPerSecond = 1000 / ((double) getDatastreamRestResults / iterations); out.println(name + ", " + round(ingestPerSecond) + ", " + round(addDsPerSecond) + ", " + round(modifyRefPerSecond) + ", " + round(modifyValPerSecond) + ", " + round(purgeDsPerSecond) + ", " + round(purgeObjPerSecond) + ", " + round(getDatastreamPerSecond) + ", " + round(getDatastreamRestPerSecond)); out.println(); if (newFile) { out.println( "3. Test performing operations back-to-back. Time (in ms) is that required to perform all iterations."); out.println( "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST"); } else { line = reader.readLine(); while (line != null && line.length() > 2) { out.println(line); line = reader.readLine(); } } out.println(name + ", " + tpResults[0] + ", " + tpResults[1] + ", " + tpResults[2] + ", " + tpResults[3] + ", " + tpResults[4] + ", " + tpResults[5] + ", " + getDatastreamResults + ", " + getDatastreamRestResults); out.println(); if (newFile) { out.println("4. Operations-Per-Second based on results listed in item 3."); out.println( "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST"); } else { line = reader.readLine(); while (line != null && line.length() > 2) { out.println(line); line = reader.readLine(); } } double ingestItPerSecond = (double) (iterations * 1000) / tpResults[0]; double addDsItPerSecond = (double) (iterations * 1000) / tpResults[1]; double modifyRefItPerSecond = (double) (iterations * 1000) / tpResults[2]; double modifyValItPerSecond = (double) (iterations * 1000) / tpResults[3]; double purgeDsItPerSecond = (double) (iterations * 1000) / tpResults[4]; double purgeObjItPerSecond = (double) (iterations * 1000) / tpResults[5]; double getDsItPerSecond = (double) (iterations * 1000) / getDatastreamResults; double getDsRestItPerSecond = (double) (iterations * 1000) / getDatastreamRestResults; out.println(name + ", " + round(ingestItPerSecond) + ", " + round(addDsItPerSecond) + ", " + round(modifyRefItPerSecond) + ", " + round(modifyValItPerSecond) + ", " + round(purgeDsItPerSecond) + ", " + round(purgeObjItPerSecond) + ", " + round(getDsItPerSecond) + ", " + round(getDsRestItPerSecond)); out.println(); if (newFile) { out.println( "5. Test performing operations using a thread pool. Time (in ms) is that required to perform all iterations."); out.println( "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST"); } else { line = reader.readLine(); while (line != null && line.length() > 2) { out.println(line); line = reader.readLine(); } } out.println(name + ", " + tptResults[0] + ", " + tptResults[1] + ", " + tptResults[2] + ", " + tptResults[3] + ", " + tptResults[4] + ", " + tptResults[5] + ", " + tptResults[6] + ", " + tptResults[7]); out.println(); if (newFile) { out.println("6. Operations-Per-Second based on results listed in item 5."); out.println( "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST"); } else { line = reader.readLine(); while (line != null && line.length() > 2) { out.println(line); line = reader.readLine(); } } double thrdIngestItPerSecond = (double) (iterations * 1000) / tptResults[0]; double thrdAddDsItPerSecond = (double) (iterations * 1000) / tptResults[1]; double thrdModifyRefItPerSecond = (double) (iterations * 1000) / tptResults[2]; double thrdModifyValItPerSecond = (double) (iterations * 1000) / tptResults[3]; double thrdPurgeDsItPerSecond = (double) (iterations * 1000) / tptResults[4]; double thrdPurgeObjItPerSecond = (double) (iterations * 1000) / tptResults[5]; double thrdGetDsItPerSecond = (double) (iterations * 1000) / tptResults[6]; double thrdGetDsRestItPerSecond = (double) (iterations * 1000) / tptResults[7]; out.println(name + ", " + round(thrdIngestItPerSecond) + ", " + round(thrdAddDsItPerSecond) + ", " + round(thrdModifyRefItPerSecond) + ", " + round(thrdModifyValItPerSecond) + ", " + round(thrdPurgeDsItPerSecond) + ", " + round(thrdPurgeObjItPerSecond) + ", " + round(thrdGetDsItPerSecond) + ", " + round(thrdGetDsRestItPerSecond)); if (!newFile) { reader.close(); tempFile.delete(); } out.close(); System.out.println("Performance Tests Complete."); }