List of usage examples for java.lang String toLowerCase
public String toLowerCase()
From source file:Main.java
public static void main(String[] args) { String url = "jdbc:mySubprotocol:myDataSource"; Connection con;//from ww w .j av a2s.co m Statement stmt; try { Class.forName("myDriver.ClassName"); } catch (java.lang.ClassNotFoundException e) { System.err.print("ClassNotFoundException: "); System.err.println(e.getMessage()); } try { con = DriverManager.getConnection(url, "myLogin", "myPassword"); stmt = con.createStatement(); Vector dataTypes = getDataTypes(con); String tableName; String columnName; String sqlType; String prompt = "Enter the new table name and hit Return: "; tableName = getInput(prompt); String createTableString = "create table " + tableName + " ("; String commaAndSpace = ", "; boolean firstTime = true; while (true) { System.out.println(""); prompt = "Enter a column name " + "(or nothing when finished) \nand hit Return: "; columnName = getInput(prompt); if (firstTime) { if (columnName.length() == 0) { System.out.print("Need at least one column;"); System.out.println(" please try again"); continue; } else { createTableString += columnName + " "; firstTime = false; } } else if (columnName.length() == 0) { break; } else { createTableString += commaAndSpace + columnName + " "; } String localTypeName = null; String paramString = ""; while (true) { System.out.println(""); System.out.println("LIST OF TYPES YOU MAY USE: "); boolean firstPrinted = true; int length = 0; for (int i = 0; i < dataTypes.size(); i++) { DataType dataType = (DataType) dataTypes.get(i); if (!dataType.needsToBeSet()) { if (!firstPrinted) System.out.print(commaAndSpace); else firstPrinted = false; System.out.print(dataType.getSQLType()); length += dataType.getSQLType().length(); if (length > 50) { System.out.println(""); length = 0; firstPrinted = true; } } } System.out.println(""); int index; prompt = "Enter a column type " + "from the list and hit Return: "; sqlType = getInput(prompt); for (index = 0; index < dataTypes.size(); index++) { DataType dataType = (DataType) dataTypes.get(index); if (dataType.getSQLType().equalsIgnoreCase(sqlType) && !dataType.needsToBeSet()) { break; } } localTypeName = null; paramString = ""; if (index < dataTypes.size()) { // there was a match String params; DataType dataType = (DataType) dataTypes.get(index); params = dataType.getParams(); localTypeName = dataType.getLocalType(); if (params != null) { prompt = "Enter " + params + ": "; paramString = "(" + getInput(prompt) + ")"; } break; } else { // use the name as given prompt = "Are you sure? " + "Enter 'y' or 'n' and hit Return: "; String check = getInput(prompt) + " "; check = check.toLowerCase().substring(0, 1); if (check.equals("n")) continue; else { localTypeName = sqlType; break; } } } createTableString += localTypeName + paramString; } createTableString += ")"; System.out.println(""); System.out.print("Your CREATE TABLE statement as "); System.out.println("sent to your DBMS: "); System.out.println(createTableString); System.out.println(""); stmt.executeUpdate(createTableString); stmt.close(); con.close(); } catch (SQLException ex) { System.err.println("SQLException: " + ex.getMessage()); } }
From source file:com.seavus.wordcountermaven.WordCounter.java
/** * @param args the command line arguments * @throws java.io.FileNotFoundException *//*w w w. j ava2 s . c om*/ public static void main(String[] args) throws FileNotFoundException { InputStream fileStream = WordCounter.class.getClassLoader().getResourceAsStream("test.txt"); BufferedReader br = new BufferedReader(new InputStreamReader(fileStream)); Map<String, Integer> wordMap = new HashMap<>(); String line; boolean tokenFound = false; try { while ((line = br.readLine()) != null) { String[] tokens = line.trim().split("\\s+"); //trims surrounding whitespaces and splits lines into tokens for (String token : tokens) { for (Map.Entry<String, Integer> entry : wordMap.entrySet()) { if (StringUtils.equalsIgnoreCase(token, entry.getKey())) { wordMap.put(entry.getKey(), (wordMap.get(entry.getKey()) + 1)); tokenFound = true; } } if (!token.equals("") && !tokenFound) { wordMap.put(token.toLowerCase(), 1); } tokenFound = false; } } br.close(); } catch (IOException ex) { Logger.getLogger(WordCounter.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("string : " + "frequency\r\n" + "-------------------"); //prints out each unique word (i.e. case-insensitive string token) and its frequency to the console for (Map.Entry<String, Integer> entry : wordMap.entrySet()) { System.out.println(entry.getKey() + " : " + entry.getValue()); } }
From source file:DIA_Umpire_Quant.DIA_Umpire_ExtLibSearch.java
/** * @param args the command line arguments *//* www .ja v a 2s . c o m*/ public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire targeted re-extraction analysis using external library (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be: java -jar -Xmx10G DIA_Umpire_ExtLibSearch.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_extlibsearch.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String ExternalLibPath = ""; String ExternalLibDecoyTag = "DECOY"; float ExtProbThreshold = 0.99f; float RTWindow_Ext = -1f; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } case "DecoyPrefix": { if (!"".equals(value)) { tandemPara.DecoyPrefix = value; } break; } case "ExternalLibPath": { ExternalLibPath = value; break; } case "ExtProbThreshold": { ExtProbThreshold = Float.parseFloat(value); break; } case "RTWindow_Ext": { RTWindow_Ext = Float.parseFloat(value); break; } case "ExternalLibDecoyTag": { ExternalLibDecoyTag = value; if (ExternalLibDecoyTag.endsWith("_")) { ExternalLibDecoyTag = ExternalLibDecoyTag.substring(0, ExternalLibDecoyTag.length() - 1); } break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.FastaPath = tandemPara.FastaPath; FileList.add(DiaFile); } } } //<editor-fold defaultstate="collapsed" desc="Targeted re-extraction using external library"> //External library search Logger.getRootLogger().info("Targeted extraction using external library"); //Read exteranl library FragmentLibManager ExlibManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, FilenameUtils.getBaseName(ExternalLibPath)); if (ExlibManager == null) { ExlibManager = new FragmentLibManager(FilenameUtils.getBaseName(ExternalLibPath)); //Import traML file ExlibManager.ImportFragLibByTraML(ExternalLibPath, ExternalLibDecoyTag); //Check if there are decoy spectra ExlibManager.CheckDecoys(); //ExlibManager.ImportFragLibBySPTXT(ExternalLibPath); ExlibManager.WriteFragmentLibSerialization(WorkFolder); } Logger.getRootLogger() .info("No. of peptide ions in external lib:" + ExlibManager.PeptideFragmentLib.size()); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } //Generate RT mapping RTMappingExtLib RTmap = new RTMappingExtLib(diafile.IDsummary, ExlibManager, diafile.GetParameter()); RTmap.GenerateModel(); RTmap.GenerateMappedPepIon(); diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.GenerateMassCalibrationRTMap(); //Perform targeted re-extraction diafile.TargetedExtractionQuant(false, ExlibManager, ExtProbThreshold, RTWindow_Ext); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); //Remove target IDs below the defined probability threshold diafile.IDsummary.RemoveLowProbMappedIon(ExtProbThreshold); diafile.ExportID(); diafile.ClearStructure(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); }
From source file:com.google.api.codegen.GeneratorMain.java
public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("No artifact type given!"); printAvailableCommands();//w w w .jav a 2 s .c o m System.exit(1); return; } String command = args[0].toUpperCase(); ArtifactType artifactType; try { artifactType = ArtifactType.valueOf(command); } catch (Exception e) { System.err.println("Unrecognized artifact type: '" + command.toLowerCase() + "'"); printAvailableCommands(); System.exit(1); return; } switch (artifactType) { case GAPIC_CONFIG: gapicConfigGeneratorMain(args); break; case GAPIC_CODE: gapicGeneratorMain(artifactType, args); break; case GAPIC_PACKAGE: gapicGeneratorMain(artifactType, args); break; case LEGACY_GAPIC_AND_PACKAGE: gapicGeneratorMain(artifactType, args); break; case DISCOGAPIC_CONFIG: discoGapicConfigGeneratorMain(args); break; case LEGACY_DISCOGAPIC_AND_PACKAGE: discoGapicMain(args); break; case LEGACY_GRPC_PACKAGE: packageGeneratorMain(args); break; default: System.err.println("ArtifactType '" + artifactType + "' present in enum but not supported on command line - programmer error?"); System.exit(1); } }
From source file:DIA_Umpire_Quant.DIA_Umpire_IntLibSearch.java
/** * @param args the command line arguments *//*w ww .ja v a 2 s. c o m*/ public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire targeted re-extraction analysis using internal library (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be : java -jar -Xmx10G DIA_Umpire_IntLibSearch.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_intlibsearch.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String InternalLibID = ""; float ProbThreshold = 0.99f; float RTWindow_Int = -1f; float Freq = 0f; int TopNFrag = 6; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "InternalLibID": { InternalLibID = value; break; } case "RTWindow_Int": { RTWindow_Int = Float.parseFloat(value); break; } case "ProbThreshold": { ProbThreshold = Float.parseFloat(value); break; } case "TopNFrag": { TopNFrag = Integer.parseInt(value); break; } case "Freq": { Freq = Float.parseFloat(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); try { File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.FastaPath = tandemPara.FastaPath; FileList.add(DiaFile); } } } //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library"> Logger.getRootLogger().info( "================================================================================================="); if (FileList.size() > 1) { Logger.getRootLogger().info("Targeted re-extraction using internal library"); FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, InternalLibID); if (libManager == null) { Logger.getRootLogger().info("Building internal spectral library"); libManager = new FragmentLibManager(InternalLibID); ArrayList<LCMSID> LCMSIDList = new ArrayList<>(); for (DIAPack dia : FileList) { LCMSIDList.add(dia.IDsummary); } libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag); libManager.WriteFragmentLibSerialization(WorkFolder); } libManager.ReduceMemoryUsage(); Logger.getRootLogger() .info("Building retention time prediction model and generate candidate peptide list"); for (int i = 0; i < FileList.size(); i++) { FileList.get(i).IDsummary.ClearMappedPep(); } for (int i = 0; i < FileList.size(); i++) { for (int j = i + 1; j < FileList.size(); j++) { RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder, FileList.get(i).GetParameter(), FileList.get(i).IDsummary, FileList.get(j).IDsummary); alignment.GenerateModel(); alignment.GenerateMappedPepIon(); } FileList.get(i).ExportID(); FileList.get(i).IDsummary = null; } Logger.getRootLogger().info("Targeted matching........"); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) { diafile.UseMappedIon = true; diafile.FilterMappedIonByProb = false; diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); diafile.GenerateMassCalibrationRTMap(); diafile.TargetedExtractionQuant(false, libManager, ProbThreshold, RTWindow_Int); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold); diafile.ExportID(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); diafile.ClearStructure(); } diafile.IDsummary = null; System.gc(); } Logger.getRootLogger().info( "================================================================================================="); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:com.github.xmltopdf.JasperPdfGenerator.java
/**. * @param args//ww w . j a v a 2 s . c om * the arguments * @throws IOException in case IO error */ public static void main(String[] args) throws IOException { if (args.length == 0) { LOG.info(null, USAGE); return; } List<String> templates = new ArrayList<String>(); List<String> xmls = new ArrayList<String>(); List<String> types = new ArrayList<String>(); for (String arg : args) { if (arg.endsWith(".jrxml")) { templates.add(arg); } else if (arg.endsWith(".xml")) { xmls.add(arg); } else if (arg.startsWith(DOC_TYPE)) { types = Arrays .asList(arg.substring(DOC_TYPE.length()).replaceAll("\\s+", "").toUpperCase().split(",")); } } if (templates.isEmpty()) { LOG.info(null, USAGE); return; } if (types.isEmpty()) { types.add("PDF"); } for (String type : types) { ByteArrayOutputStream os = new ByteArrayOutputStream(); if (DocType.valueOf(type) != null) { new JasperPdfGenerator().createDocument(templates, xmls, os, DocType.valueOf(type)); os.writeTo( new FileOutputStream(templates.get(0).replaceFirst("\\.jrxml$", "." + type.toLowerCase()))); } } }
From source file:com.verizon.Main.java
public static void main(String[] args) throws Exception { String warehouseLocation = "file:" + System.getProperty("user.dir") + "spark-warehouse"; SparkSession spark = SparkSession.builder().appName("Verizon").config("spark.master", "local[2]") .config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate(); Configuration configuration = new Configuration(); configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/core-site.xml")); configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/hdfs-site.xml")); configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); SQLContext context = new SQLContext(spark); String schemaString = " Device,Title,ReviewText,SubmissionTime,UserNickname"; //spark.read().textFile(schemaString) Dataset<Row> df = spark.read().csv("hdfs://localhost:9000/data.csv"); //df.show();//from w w w . java 2s.c om //#df.printSchema(); df = df.select("_c2"); Path file = new Path("hdfs://localhost:9000/tempFile.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true); } df.write().csv("hdfs://localhost:9000/tempFile.txt"); JavaRDD<String> lines = spark.read().textFile("hdfs://localhost:9000/tempFile.txt").javaRDD(); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String s) { return Arrays.asList(SPACE.split(s)).iterator(); } }); JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { s = s.replaceAll("[^a-zA-Z0-9]+", ""); s = s.toLowerCase().trim(); return new Tuple2<>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); JavaPairRDD<Integer, String> frequencies = counts .mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { @Override public Tuple2<Integer, String> call(Tuple2<String, Integer> s) { return new Tuple2<Integer, String>(s._2, s._1); } }); frequencies = frequencies.sortByKey(false); JavaPairRDD<String, Integer> result = frequencies .mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<Integer, String> s) throws Exception { return new Tuple2<String, Integer>(s._2, s._1); } }); //JavaPairRDD<Integer,String> sortedByFreq = sort(frequencies, "descending"); file = new Path("hdfs://localhost:9000/allresult.csv"); if (hdfs.exists(file)) { hdfs.delete(file, true); } //FileUtils.deleteDirectory(new File("allresult.csv")); result.saveAsTextFile("hdfs://localhost:9000/allresult.csv"); List<Tuple2<String, Integer>> output = result.take(250); ExportToHive hiveExport = new ExportToHive(); String rows = ""; for (Tuple2<String, Integer> tuple : output) { String date = new Date().toString(); String keyword = tuple._1(); Integer count = tuple._2(); //System.out.println( keyword+ "," +count); rows += date + "," + "Samsung Galaxy s7," + keyword + "," + count + System.lineSeparator(); } //System.out.println(rows); /* file = new Path("hdfs://localhost:9000/result.csv"); if ( hdfs.exists( file )) { hdfs.delete( file, true ); } OutputStream os = hdfs.create(file); BufferedWriter br = new BufferedWriter( new OutputStreamWriter( os, "UTF-8" ) ); br.write(rows); br.close(); */ hdfs.close(); FileUtils.deleteQuietly(new File("result.csv")); FileUtils.writeStringToFile(new File("result.csv"), rows); hiveExport.writeToHive(spark); ExportDataToServer exportServer = new ExportDataToServer(); exportServer.sendDataToRESTService(rows); spark.stop(); }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.ExtractDataAndQueryAsSparseVectors.java
public static void main(String[] args) { String optKeys[] = { CommonParams.MAX_NUM_QUERY_PARAM, MAX_NUM_DATA_PARAM, CommonParams.MEMINDEX_PARAM, IN_QUERIES_PARAM, OUT_QUERIES_PARAM, OUT_DATA_PARAM, TEXT_FIELD_PARAM, TEST_QTY_PARAM, }; String optDescs[] = { CommonParams.MAX_NUM_QUERY_DESC, MAX_NUM_DATA_DESC, CommonParams.MEMINDEX_DESC, IN_QUERIES_DESC, OUT_QUERIES_DESC, OUT_DATA_DESC, TEXT_FIELD_DESC, TEST_QTY_DESC }; boolean hasArg[] = { true, true, true, true, true, true, true, true }; ParamHelper prmHlp = null;/*from w ww . ja v a2 s . c o m*/ try { prmHlp = new ParamHelper(args, optKeys, optDescs, hasArg); CommandLine cmd = prmHlp.getCommandLine(); Options opt = prmHlp.getOptions(); int maxNumQuery = Integer.MAX_VALUE; String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM); if (tmpn != null) { try { maxNumQuery = Integer.parseInt(tmpn); } catch (NumberFormatException e) { UsageSpecify(CommonParams.MAX_NUM_QUERY_PARAM, opt); } } int maxNumData = Integer.MAX_VALUE; tmpn = cmd.getOptionValue(MAX_NUM_DATA_PARAM); if (tmpn != null) { try { maxNumData = Integer.parseInt(tmpn); } catch (NumberFormatException e) { UsageSpecify(MAX_NUM_DATA_PARAM, opt); } } String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == memIndexPref) { UsageSpecify(CommonParams.MEMINDEX_PARAM, opt); } String textField = cmd.getOptionValue(TEXT_FIELD_PARAM); if (null == textField) { UsageSpecify(TEXT_FIELD_PARAM, opt); } textField = textField.toLowerCase(); int fieldId = -1; for (int i = 0; i < FeatureExtractor.mFieldNames.length; ++i) if (FeatureExtractor.mFieldNames[i].compareToIgnoreCase(textField) == 0) { fieldId = i; break; } if (-1 == fieldId) { Usage("Wrong field index, should be one of the following: " + String.join(",", FeatureExtractor.mFieldNames), opt); } InMemForwardIndex indx = new InMemForwardIndex( FeatureExtractor.indexFileName(memIndexPref, FeatureExtractor.mFieldNames[fieldId])); BM25SimilarityLucene bm25simil = new BM25SimilarityLucene(FeatureExtractor.BM25_K1, FeatureExtractor.BM25_B, indx); String inQueryFile = cmd.getOptionValue(IN_QUERIES_PARAM); String outQueryFile = cmd.getOptionValue(OUT_QUERIES_PARAM); if ((inQueryFile == null) != (outQueryFile == null)) { Usage("You should either specify both " + IN_QUERIES_PARAM + " and " + OUT_QUERIES_PARAM + " or none of them", opt); } String outDataFile = cmd.getOptionValue(OUT_DATA_PARAM); tmpn = cmd.getOptionValue(TEST_QTY_PARAM); int testQty = 0; if (tmpn != null) { try { testQty = Integer.parseInt(tmpn); } catch (NumberFormatException e) { UsageSpecify(TEST_QTY_PARAM, opt); } } ArrayList<DocEntry> testDocEntries = new ArrayList<DocEntry>(); ArrayList<DocEntry> testQueryEntries = new ArrayList<DocEntry>(); ArrayList<TrulySparseVector> testDocVectors = new ArrayList<TrulySparseVector>(); ArrayList<TrulySparseVector> testQueryVectors = new ArrayList<TrulySparseVector>(); if (outDataFile != null) { BufferedWriter out = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outDataFile))); ArrayList<DocEntryExt> docEntries = indx.getDocEntries(); for (int id = 0; id < Math.min(maxNumData, docEntries.size()); ++id) { DocEntry e = docEntries.get(id).mDocEntry; TrulySparseVector v = bm25simil.getDocSparseVector(e, false); if (id < testQty) { testDocEntries.add(e); testDocVectors.add(v); } outputVector(out, v); } out.close(); } Splitter splitOnSpace = Splitter.on(' ').trimResults().omitEmptyStrings(); if (outQueryFile != null) { BufferedReader inpText = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(inQueryFile))); BufferedWriter out = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outQueryFile))); String queryText = XmlHelper.readNextXMLIndexEntry(inpText); for (int queryQty = 0; queryText != null && queryQty < maxNumQuery; queryText = XmlHelper .readNextXMLIndexEntry(inpText), queryQty++) { Map<String, String> queryFields = null; // 1. Parse a query try { queryFields = XmlHelper.parseXMLIndexEntry(queryText); } catch (Exception e) { System.err.println("Parsing error, offending QUERY:\n" + queryText); throw new Exception("Parsing error."); } String fieldText = queryFields.get(FeatureExtractor.mFieldsSOLR[fieldId]); if (fieldText == null) { fieldText = ""; } ArrayList<String> tmpa = new ArrayList<String>(); for (String s : splitOnSpace.split(fieldText)) tmpa.add(s); DocEntry e = indx.createDocEntry(tmpa.toArray(new String[tmpa.size()])); TrulySparseVector v = bm25simil.getDocSparseVector(e, true); if (queryQty < testQty) { testQueryEntries.add(e); testQueryVectors.add(v); } outputVector(out, v); } out.close(); } int testedQty = 0, diffQty = 0; // Now let's do some testing for (int iq = 0; iq < testQueryEntries.size(); ++iq) { DocEntry queryEntry = testQueryEntries.get(iq); TrulySparseVector queryVector = testQueryVectors.get(iq); for (int id = 0; id < testDocEntries.size(); ++id) { DocEntry docEntry = testDocEntries.get(id); TrulySparseVector docVector = testDocVectors.get(id); float val1 = bm25simil.compute(queryEntry, docEntry); float val2 = TrulySparseVector.scalarProduct(queryVector, docVector); ++testedQty; if (Math.abs(val1 - val2) > 1e5) { System.err.println( String.format("Potential mismatch BM25=%f <-> scalar product=%f", val1, val2)); ++diffQty; } } } if (testedQty > 0) System.out.println(String.format("Tested %d Mismatched %d", testedQty, diffQty)); } catch (ParseException e) { Usage("Cannot parse arguments: " + e, prmHlp != null ? prmHlp.getOptions() : null); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:edu.illinois.cs.cogcomp.utils.Utils.java
public static void main(String[] args) throws Exception { //romanization(); String[] arabic_names = { "Urdu", "Arabic", "Egyptian_Arabic", "Mazandarani", "Pashto", "Persian", "Western_Punjabi" }; String[] devanagari_names = { "Newar", "Hindi", "Marathi", "Nepali", "Sanskrit" }; String[] cyrillic_names = { "Chuvash", "Bashkir", "Bulgarian", "Chechen", "Kirghiz", "Macedonian", "Russian", "Ukrainian" }; //for(String name : arabic_names){ //System.out.println(name + " : " + WAVE("models/probs-"+name+"-Urdu.txt")); //getSize(name); //}/*w w w . j av a2 s . c o m*/ String lang = "Arabic"; String wikidata = "Data/wikidata." + lang; List<String> allnames = LineIO.read("/Users/stephen/Dropbox/papers/NAACL2016/data/all-names2.txt"); List<Example> training = readWikiData(wikidata); training = training.subList(0, 2000); SPModel m = new SPModel(training); m.Train(5); TopList<Double, String> res = m.Generate("stephen"); System.out.println(res); List<String> outlines = new ArrayList<>(); int i = 0; for (String nameAndLabel : allnames) { if (i % 100 == 0) { System.out.println(i); } i++; String[] s = nameAndLabel.split("\t"); String name = s[0]; String label = s[1]; String[] sname = name.split(" "); String line = ""; for (String tok : sname) { res = m.Generate(tok.toLowerCase()); if (res.size() > 0) { String topcand = res.getFirst().getSecond(); line += topcand + " "; } else { } } if (line.trim().length() > 0) { outlines.add(line.trim() + "\t" + label); } } LineIO.write("/Users/stephen/Dropbox/papers/NAACL2016/data/all-names-" + lang + "2.txt", outlines); // Transliterator t = Transliterator.getInstance("Any-am_FONIPA"); // // String result = t.transform("Stephen"); // System.out.println(result); // // Enumeration<String> tids = t.getAvailableIDs(); // // while(tids.hasMoreElements()){ // String e = tids.nextElement(); // System.out.println(e); // } }
From source file:at.gv.egiz.pdfas.cli.Main.java
public static void main(String[] args) { // create the command line parser CommandLineParser parser = new GnuParser(); ModeOfOperation mode = ModeOfOperation.INVALID; try {//from w w w . j a v a 2 s . com CommandLine cli = parser.parse(createOptions(), args); if (cli.hasOption(CLI_ARG_DEPLOY_SHORT)) { PdfAsFactory.deployDefaultConfiguration(new File(STANDARD_CONFIG_LOCATION)); System.out.println("Configuration was deployed to: " + STANDARD_CONFIG_LOCATION); } if (cli.hasOption(CLI_ARG_MODE_SHORT)) { String modevalue = cli.getOptionValue(CLI_ARG_MODE_SHORT); if (modevalue.toLowerCase().trim().equals("sign")) { mode = ModeOfOperation.SIGN; } else if (modevalue.toLowerCase().trim().equals("verify")) { mode = ModeOfOperation.VERIFY; } else { throw new ParseException("Invalid value for option " + CLI_ARG_MODE_SHORT + ": " + modevalue); } } if (cli.hasOption(CLI_ARG_HELP_SHORT)) { usage(); System.exit(0); } if (mode == ModeOfOperation.INVALID && !cli.hasOption(CLI_ARG_DEPLOY_SHORT)) { usage(); } else if (mode == ModeOfOperation.SIGN) { perform_sign(cli); } else if (mode == ModeOfOperation.VERIFY) { perform_verify(cli); } } catch (ParseException e) { System.err.println("Invalid arguments: " + e.getMessage()); usage(); System.exit(-1); } catch (PDFASError e) { System.err.println("PDF-AS Error: [" + e.getCode() + "]" + e.getMessage()); Iterator<Entry<String, String>> infoIt = e.getProcessInformations().entrySet().iterator(); while (infoIt.hasNext()) { Entry<String, String> infoEntry = infoIt.next(); logger.debug("Process Information: {} = {}", infoEntry.getKey(), infoEntry.getValue()); } e.printStackTrace(System.err); System.exit(-1); } catch (Throwable e) { System.err.println("Unhandled PDF-AS Error: " + e.getMessage()); e.printStackTrace(System.err); System.exit(-1); } }