List of usage examples for java.lang String equals
public boolean equals(Object anObject)
From source file:DIA_Umpire_Quant.DIA_Umpire_ProtQuant.java
/** * @param args the command line arguments */// w w w. j av a2 s . co m public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println( "DIA-Umpire protein quantitation module (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be: java -jar -Xmx10G DIA_Umpire_PortQuant.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_orotquant.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String Combined_Prot = ""; boolean DefaultProtFiltering = true; float Freq = 0f; int TopNPep = 6; int TopNFrag = 6; String FilterWeight = "GW"; float MinWeight = 0.9f; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); boolean ExportSaint = false; boolean SAINT_MS1 = false; boolean SAINT_MS2 = true; HashMap<String, String[]> BaitList = new HashMap<>(); HashMap<String, String> BaitName = new HashMap<>(); HashMap<String, String[]> ControlList = new HashMap<>(); HashMap<String, String> ControlName = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } case "Combined_Prot": { Combined_Prot = value; break; } case "DefaultProtFiltering": { DefaultProtFiltering = Boolean.parseBoolean(value); break; } case "DecoyPrefix": { if (!"".equals(value)) { tandemPara.DecoyPrefix = value; } break; } case "ProteinFDR": { tandemPara.ProtFDR = Float.parseFloat(value); break; } case "FilterWeight": { FilterWeight = value; break; } case "MinWeight": { MinWeight = Float.parseFloat(value); break; } case "TopNFrag": { TopNFrag = Integer.parseInt(value); break; } case "TopNPep": { TopNPep = Integer.parseInt(value); break; } case "Freq": { Freq = Float.parseFloat(value); break; } //<editor-fold defaultstate="collapsed" desc="SaintOutput"> case "ExportSaintInput": { ExportSaint = Boolean.parseBoolean(value); break; } case "QuantitationType": { switch (value) { case "MS1": { SAINT_MS1 = true; SAINT_MS2 = false; break; } case "MS2": { SAINT_MS1 = false; SAINT_MS2 = true; break; } case "BOTH": { SAINT_MS1 = true; SAINT_MS2 = true; break; } } break; } // case "BaitInputFile": { // SaintBaitFile = value; // break; // } // case "PreyInputFile": { // SaintPreyFile = value; // break; // } // case "InterationInputFile": { // SaintInteractionFile = value; // break; // } default: { if (type.startsWith("BaitName_")) { BaitName.put(type.substring(9), value); } if (type.startsWith("BaitFile_")) { BaitList.put(type.substring(9), value.split("\t")); } if (type.startsWith("ControlName_")) { ControlName.put(type.substring(12), value); } if (type.startsWith("ControlFile_")) { ControlList.put(type.substring(12), value.split("\t")); } break; } //</editor-fold> } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Check if the prot.xml file can be found if (!new File(Combined_Prot).exists()) { Logger.getRootLogger().info("ProtXML file: " + Combined_Prot + " cannot be found, the export protein summary table will be empty."); } LCMSID protID = null; //Parse prot.xml and generate protein master list given an FDR if (Combined_Prot != null && !Combined_Prot.equals("")) { protID = LCMSID.ReadLCMSIDSerialization(Combined_Prot); if (!"".equals(Combined_Prot) && protID == null) { protID = new LCMSID(Combined_Prot, tandemPara.DecoyPrefix, tandemPara.FastaPath); ProtXMLParser protxmlparser = new ProtXMLParser(protID, Combined_Prot, 0f); //Use DIA-Umpire default protein FDR calculation if (DefaultProtFiltering) { protID.RemoveLowLocalPWProtein(0.8f); protID.RemoveLowMaxIniProbProtein(0.9f); protID.FilterByProteinDecoyFDRUsingMaxIniProb(tandemPara.DecoyPrefix, tandemPara.ProtFDR); } //Get protein FDR calculation without other filtering else { protID.FilterByProteinDecoyFDRUsingLocalPW(tandemPara.DecoyPrefix, tandemPara.ProtFDR); } protID.LoadSequence(); protID.WriteLCMSIDSerialization(Combined_Prot); } Logger.getRootLogger().info("Protein No.:" + protID.ProteinList.size()); } HashMap<String, HashMap<String, FragmentPeak>> IDSummaryFragments = new HashMap<>(); //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); try { File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.ClearAssignPeakCluster(); FileList.add(DiaFile); HashMap<String, FragmentPeak> FragMap = new HashMap<>(); IDSummaryFragments.put(FilenameUtils.getBaseName(mzXMLFile), FragMap); } } } //<editor-fold defaultstate="collapsed" desc="Peptide and fragment selection"> Logger.getRootLogger().info("Peptide and fragment selection across the whole dataset"); ArrayList<LCMSID> SummaryList = new ArrayList<>(); for (DIAPack diafile : FileList) { if (protID != null) { //Generate protein list according to mapping of peptide ions for each DIA file to the master protein list diafile.IDsummary.GenerateProteinByRefIDByPepSeq(protID, true); diafile.IDsummary.ReMapProPep(); } if ("GW".equals(FilterWeight)) { diafile.IDsummary.SetFilterByGroupWeight(); } else if ("PepW".equals(FilterWeight)) { diafile.IDsummary.SetFilterByWeight(); } SummaryList.add(diafile.IDsummary); } FragmentSelection fragselection = new FragmentSelection(SummaryList); fragselection.freqPercent = Freq; fragselection.GeneratePepFragScoreMap(); fragselection.GenerateTopFragMap(TopNFrag); fragselection.GenerateProtPepScoreMap(MinWeight); fragselection.GenerateTopPepMap(TopNPep); //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Writing general reports"> ExportTable export = new ExportTable(WorkFolder, SummaryList, IDSummaryFragments, protID, fragselection); export.Export(TopNPep, TopNFrag, Freq); //</editor-fold> //<editor-fold defaultstate="collapsed" desc="//<editor-fold defaultstate="collapsed" desc="Generate SAINT input files"> if (ExportSaint && protID != null) { HashMap<String, DIAPack> Filemap = new HashMap<>(); for (DIAPack DIAfile : FileList) { Filemap.put(DIAfile.GetBaseName(), DIAfile); } FileWriter baitfile = new FileWriter(WorkFolder + "SAINT_Bait_" + DateTimeTag.GetTag() + ".txt"); FileWriter preyfile = new FileWriter(WorkFolder + "SAINT_Prey_" + DateTimeTag.GetTag() + ".txt"); FileWriter interactionfileMS1 = null; FileWriter interactionfileMS2 = null; if (SAINT_MS1) { interactionfileMS1 = new FileWriter( WorkFolder + "SAINT_Interaction_MS1_" + DateTimeTag.GetTag() + ".txt"); } if (SAINT_MS2) { interactionfileMS2 = new FileWriter( WorkFolder + "SAINT_Interaction_MS2_" + DateTimeTag.GetTag() + ".txt"); } HashMap<String, String> PreyID = new HashMap<>(); for (String samplekey : ControlName.keySet()) { String name = ControlName.get(samplekey); for (String file : ControlList.get(samplekey)) { baitfile.write(FilenameUtils.getBaseName(file) + "\t" + name + "\t" + "C\n"); LCMSID IDsummary = Filemap.get(FilenameUtils.getBaseName(file)).IDsummary; if (SAINT_MS1) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS1, file, name, PreyID, 1); } if (SAINT_MS2) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS2, file, name, PreyID, 2); } } } for (String samplekey : BaitName.keySet()) { String name = BaitName.get(samplekey); for (String file : BaitList.get(samplekey)) { baitfile.write(FilenameUtils.getBaseName(file) + "\t" + name + "\t" + "T\n"); LCMSID IDsummary = Filemap.get(FilenameUtils.getBaseName(file)).IDsummary; if (SAINT_MS1) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS1, file, name, PreyID, 1); } if (SAINT_MS2) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS2, file, name, PreyID, 2); } } } baitfile.close(); if (SAINT_MS1) { interactionfileMS1.close(); } if (SAINT_MS2) { interactionfileMS2.close(); } for (String AccNo : PreyID.keySet()) { preyfile.write(AccNo + "\t" + PreyID.get(AccNo) + "\n"); } preyfile.close(); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step8GoldDataAggregator.java
public static void main(String[] args) throws Exception { String inputDir = args[0] + "/"; // output dir File outputDir = new File(args[1]); File turkersConfidence = new File(args[2]); if (outputDir.exists()) { outputDir.delete();/*w w w . j av a2 s . c om*/ } outputDir.mkdir(); List<String> annotatorsIDs = new ArrayList<>(); // for (File f : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) { // QueryResultContainer queryResultContainer = QueryResultContainer // .fromXML(FileUtils.readFileToString(f, "utf-8")); // for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { // for (QueryResultContainer.MTurkRelevanceVote relevanceVote : rankedResults.mTurkRelevanceVotes) { // if (!annotatorsIDs.contains(relevanceVote.turkID)) // annotatorsIDs.add(relevanceVote.turkID); // } // } // } HashMap<String, Integer> countVotesForATurker = new HashMap<>(); // creates temporary file with format for mace // Hashmap annotations: key is the id of a document and a sentence // Value is an array votes[] of turkers decisions: true or false (relevant or not) // the length of this array equals the number of annotators in List<String> annotatorsIDs. // If an annotator worked on the task his decision is written in the array otherwise the value is NULL // key: queryID + clueWebID + sentenceID // value: true and false annotations TreeMap<String, Annotations> annotations = new TreeMap<>(); for (File f : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); System.out.println("Reading " + f.getName()); for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { String documentID = rankedResults.clueWebID; for (QueryResultContainer.MTurkRelevanceVote relevanceVote : rankedResults.mTurkRelevanceVotes) { Integer turkerID; if (!annotatorsIDs.contains(relevanceVote.turkID)) { annotatorsIDs.add(relevanceVote.turkID); turkerID = annotatorsIDs.size() - 1; } else { turkerID = annotatorsIDs.indexOf(relevanceVote.turkID); } Integer count = countVotesForATurker.get(relevanceVote.turkID); if (count == null) { count = 0; } count++; countVotesForATurker.put(relevanceVote.turkID, count); String id; List<Integer> trueVotes; List<Integer> falseVotes; for (QueryResultContainer.SingleSentenceRelevanceVote singleSentenceRelevanceVote : relevanceVote.singleSentenceRelevanceVotes) if (!"".equals(singleSentenceRelevanceVote.sentenceID)) { id = f.getName() + "_" + documentID + "_" + singleSentenceRelevanceVote.sentenceID; Annotations turkerVotes = annotations.get(id); if (turkerVotes == null) { trueVotes = new ArrayList<>(); falseVotes = new ArrayList<>(); turkerVotes = new Annotations(trueVotes, falseVotes); } trueVotes = turkerVotes.trueAnnotations; falseVotes = turkerVotes.falseAnnotations; if ("true".equals(singleSentenceRelevanceVote.relevant)) { // votes[turkerID] = true; trueVotes.add(turkerID); } else if ("false".equals(singleSentenceRelevanceVote.relevant)) { // votes[turkerID] = false; falseVotes.add(turkerID); } else { throw new IllegalStateException("Annotation value of sentence " + singleSentenceRelevanceVote.sentenceID + " in " + rankedResults.clueWebID + " equals " + singleSentenceRelevanceVote.relevant); } try { int allVotesCount = trueVotes.size() + falseVotes.size(); if (allVotesCount > 5) { System.err.println(id + " doesn't have 5 annotators: true: " + trueVotes.size() + " false: " + falseVotes.size()); // nasty hack, we're gonna strip some data; true votes first /* we can't do that, it breaks something down the line int toRemove = allVotesCount - 5; if (trueVotes.size() >= toRemove) { trueVotes = trueVotes .subList(0, trueVotes.size() - toRemove); } else if ( falseVotes.size() >= toRemove) { falseVotes = falseVotes .subList(0, trueVotes.size() - toRemove); } */ System.err.println("Adjusted: " + id + " doesn't have 5 annotators: true: " + trueVotes.size() + " false: " + falseVotes.size()); } } catch (IllegalStateException e) { e.printStackTrace(); } turkerVotes.trueAnnotations = trueVotes; turkerVotes.falseAnnotations = falseVotes; annotations.put(id, turkerVotes); } else { throw new IllegalStateException( "Empty Sentence ID in " + f.getName() + " for turker " + turkerID); } } } } File tmp = printHashMap(annotations, annotatorsIDs.size()); String file = TEMP_DIR + "/" + tmp.getName(); MACE.main(new String[] { "--prefix", file }); //gets the keys of the documents and sentences ArrayList<String> lines = (ArrayList<String>) FileUtils.readLines(new File(file + ".prediction")); int i = 0; TreeMap<String, TreeMap<String, ArrayList<HashMap<String, String>>>> ids = new TreeMap<>(); ArrayList<HashMap<String, String>> sentences; if (lines.size() != annotations.size()) { throw new IllegalStateException( "The size of prediction file is " + lines.size() + "but expected " + annotations.size()); } for (Map.Entry entry : annotations.entrySet()) { //1001.xml_clueweb12-1905wb-13-07360_8783 String key = (String) entry.getKey(); String[] IDs = key.split("_"); if (IDs.length > 2) { String queryID = IDs[0]; String clueWebID = IDs[1]; String sentenceID = IDs[2]; TreeMap<String, ArrayList<HashMap<String, String>>> clueWebIDs = ids.get(queryID); if (clueWebIDs == null) { clueWebIDs = new TreeMap<>(); } sentences = clueWebIDs.get(clueWebID); if (sentences == null) { sentences = new ArrayList<>(); } HashMap<String, String> sentence = new HashMap<>(); sentence.put(sentenceID, lines.get(i)); sentences.add(sentence); clueWebIDs.put(clueWebID, sentences); ids.put(queryID, clueWebIDs); } else { throw new IllegalStateException("Wrong ID " + key); } i++; } for (Map.Entry entry : ids.entrySet()) { TreeMap<Integer, String> value = (TreeMap<Integer, String>) entry.getValue(); String queryID = (String) entry.getKey(); QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(new File(inputDir, queryID), "utf-8")); for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { for (Map.Entry val : value.entrySet()) { String clueWebID = (String) val.getKey(); if (clueWebID.equals(rankedResults.clueWebID)) { List<QueryResultContainer.SingleSentenceRelevanceVote> goldEstimatedLabels = new ArrayList<>(); List<QueryResultContainer.SingleSentenceRelevanceVote> turkersVotes = new ArrayList<>(); int size = 0; int hitSize = 0; String hitID = ""; for (QueryResultContainer.MTurkRelevanceVote vote : rankedResults.mTurkRelevanceVotes) { if (!hitID.equals(vote.hitID)) { hitID = vote.hitID; hitSize = vote.singleSentenceRelevanceVotes.size(); size = size + hitSize; turkersVotes.addAll(vote.singleSentenceRelevanceVotes); } else { if (vote.singleSentenceRelevanceVotes.size() != hitSize) { hitSize = vote.singleSentenceRelevanceVotes.size(); size = size + hitSize; turkersVotes.addAll(vote.singleSentenceRelevanceVotes); } } } ArrayList<HashMap<String, String>> sentenceList = (ArrayList<HashMap<String, String>>) val .getValue(); if (sentenceList.size() != turkersVotes.size()) { try { throw new IllegalStateException("Expected size of annotations is " + turkersVotes.size() + "but found " + sentenceList.size() + " for document " + rankedResults.clueWebID + " in " + queryID); } catch (IllegalStateException ex) { ex.printStackTrace(); } } for (QueryResultContainer.SingleSentenceRelevanceVote s : turkersVotes) { String valSentence = null; for (HashMap<String, String> anno : sentenceList) { if (anno.keySet().contains(s.sentenceID)) { valSentence = anno.get(s.sentenceID); } } QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = s.sentenceID; if (("false").equals(valSentence)) { singleSentenceVote.relevant = "false"; } else if (("true").equals(valSentence)) { singleSentenceVote.relevant = "true"; } else { throw new IllegalStateException("Annotation value of sentence " + singleSentenceVote.sentenceID + " equals " + val.getValue()); } goldEstimatedLabels.add(singleSentenceVote); } rankedResults.goldEstimatedLabels = goldEstimatedLabels; } } } File outputFile = new File(outputDir, queryID); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } ArrayList<String> annotators = (ArrayList<String>) FileUtils.readLines(new File(file + ".competence")); FileWriter fileWriter; StringBuilder sb = new StringBuilder(); for (int j = 0; j < annotatorsIDs.size(); j++) { String[] s = annotators.get(0).split("\t"); Float score = Float.parseFloat(s[j]); String turkerID = annotatorsIDs.get(j); System.out.println(turkerID + " " + score + " " + countVotesForATurker.get(turkerID)); sb.append(turkerID).append(" ").append(score).append(" ").append(countVotesForATurker.get(turkerID)) .append("\n"); } fileWriter = new FileWriter(turkersConfidence); fileWriter.append(sb.toString()); fileWriter.close(); }
From source file:edu.duke.igsp.gkde.Main.java
public static void main(String[] argv) throws Exception { Options opts = new Options(); opts.addOption("s", true, "wiggle track step (default=1)"); opts.addOption("l", true, "feature length (default=600)"); opts.addOption("f", true, "fragment size (default=estimated from data)"); // opts.addOption("b", true, "bandwidth (default=200)"); // opts.addOption("w", true, "window (default=3800"); opts.addOption("wg", true, "wg threshold set (defualt = calculated)"); opts.addOption("c", true, "genomic total read weight (defualt = calculated)"); opts.addOption("h", false, "print usage"); opts.addOption(OptionBuilder.withArgName("input dir").hasArg() .withDescription("input directory (default=current directory)").isRequired(false).create("d")); opts.addOption(OptionBuilder.withArgName("output dir").hasArg() .withDescription("output directory (default=current directory)").isRequired(false).create("o")); opts.addOption(OptionBuilder.withArgName("background dir").hasArg() .withDescription("background directory (default=none)").isRequired(false).create("b")); opts.addOption(OptionBuilder.withArgName("ploidy dir").hasArg() .withDescription("ploidy/input directory (default=none)").isRequired(false).create("p")); opts.addOption(OptionBuilder.withArgName("wig | bed | npf").hasArg() .withDescription("output format (default wig)").isRequired(false).create("of")); opts.addOption(OptionBuilder.withArgName("dnase | chip | faire | atac").hasArg() .withDescription("input data").isRequired(true).create("in")); opts.addOption(OptionBuilder.withArgName("weight clip").hasArg() .withDescription("weight clip value (default none)").isRequired(false).create("wc")); opts.addOption("t", true, "threshold (standard deviations) (default=4.0)"); // opts.addOption("r", true, "background ratio (default=2.0)"); opts.addOption("v", false, "verbose output"); CommandLineParser parser = new GnuParser(); int fragment_size = -1; int fragment_offset = 0; long featureLength = 600l; // float thresh = 2; float threshold = KDEChromosome.Settings.DEFAULT_THRESHOLD; int step = 1; boolean showHelp = false; boolean verbose = false; String inputDirectory = null; String backgroundDirectory = null; String ploidyDirectory = null; String[] files = null;/*w ww.j ava 2 s . co m*/ String[] bgfiles = {}; String[] ipfiles = {}; String outputFormat = "wig"; String inputDataType = "dnase"; File outputDirectory = new File(System.getProperty("user.dir")); long bandwidth = 0l; long window = 0l; double ncuts = 0.0d; float temp_threshold = 0f; int weight_clip = 0; System.out.println("F-Seq Version 1.85"); try { CommandLine cmd = parser.parse(opts, argv); showHelp = (cmd.hasOption("h")); verbose = (cmd.hasOption("v")); if (cmd.hasOption("s")) step = Integer.parseInt(cmd.getOptionValue("s")); if (cmd.hasOption("f")) fragment_size = Integer.parseInt(cmd.getOptionValue("f")); if (cmd.hasOption("d")) //input directory inputDirectory = cmd.getOptionValue("d"); if (cmd.hasOption("b")) //background directory backgroundDirectory = cmd.getOptionValue("b"); if (cmd.hasOption("p")) //ploidy|input directory ploidyDirectory = cmd.getOptionValue("p"); if (cmd.hasOption("l")) // bandwidth featureLength = Long.parseLong(cmd.getOptionValue("l")); if (cmd.hasOption("of")) { // output format outputFormat = cmd.getOptionValue("of"); if (!outputFormat.equals("wig") && !outputFormat.equals("bed") && !outputFormat.equals("npf")) { System.out.println("Parameter error: output format must be 'wig' or 'bed'."); showHelp = true; } } if (cmd.hasOption("in")) { // input data type inputDataType = cmd.getOptionValue("in"); if (!inputDataType.equals("dnase") && !inputDataType.equals("chip") && !inputDataType.equals("faire") && !inputDataType.equals("atac")) { System.out.println( "Parameter error: input data type must be 'dnase', 'chip', 'faire', or 'atac'."); showHelp = true; } } if (cmd.hasOption("wc")) { // weight clip weight_clip = Integer.parseInt(cmd.getOptionValue("wc")); } if (cmd.hasOption("t")) { // threshold (standard deviations) threshold = Float.parseFloat(cmd.getOptionValue("t")); } if (cmd.hasOption("o")) { // output directory String out = cmd.getOptionValue("o"); outputDirectory = new File(out); if (!outputDirectory.exists() && !outputDirectory.isDirectory()) { System.out.println("Output directory '" + out + "' is not a valid directory."); showHelp = true; } } if (cmd.hasOption("wg")) temp_threshold = Float.parseFloat(cmd.getOptionValue("wg")); if (cmd.hasOption("c")) ncuts = Double.parseDouble(cmd.getOptionValue("c")); // TESTING ONLY // if(cmd.hasOption("w")) // window // window = Long.parseLong(cmd.getOptionValue("w")); //if(cmd.hasOption("b")) // window // bandwidth = Long.parseLong(cmd.getOptionValue("b")); files = cmd.getArgs(); // input files //bgfiles = cmd.getArgs(); // background files } catch (Exception e) { System.out.println("Error parsing arguments: " + e.getMessage()); e.printStackTrace(); showHelp = true; } if (showHelp || (inputDirectory == null && files.length == 0)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("fseq [options]... [file(s)]...", opts); System.exit(1); } File[] pfiles = getFiles(inputDirectory, files); File[] background_files = getFiles(backgroundDirectory, bgfiles); File[] ploidy_files = getFiles(ploidyDirectory, ipfiles); KDEChromosome[] chrs = null; // assume all files are of the same type, if not we'll get parsing errors String path = pfiles[0].getPath(); String extension = path.substring(path.lastIndexOf('.')).toLowerCase(); System.out.println("Path: " + path + ", extension: " + extension); if (extension.equals(".bed")) { System.out.println("Parsing BED file."); chrs = BedReader.read(pfiles); } else if (extension.equals(".sam") || extension.equals(".bam")) { System.out.println("Parsing SAM/BAM file."); chrs = SamReader.read(pfiles, weight_clip); } //KDEChromosome[] input = BedReader.read(ifiles); //compute fragment offset if (fragment_size == -1) { fragment_size = wgShiftCalc(chrs); } fragment_offset = (int) (fragment_size / 2); if (ncuts == 0.0d) { for (int i = 0; i < chrs.length; ++i) { // computes the total read weight of all cuts on a chromosome ncuts += chrs[i].getTotalWeight(); } } KDEChromosome.Settings settings = null; if (bandwidth > 0 || window > 0) { settings = new KDEChromosome.Settings(bandwidth, window, threshold, fragment_offset, ncuts, inputDataType); } else { settings = new KDEChromosome.Settings(featureLength, threshold, fragment_offset, ncuts, inputDataType); } float wg_threshold = wgThreshold(settings, chrs); if (temp_threshold != 0f) { wg_threshold = temp_threshold; } //KDEChromosome.Settings bg_settings = null; //bg_settings = new KDEChromosome.Settings(featureLength*2, threshold, fragment_offset); //int background_size = 0; //int input_size = 0; //float bg_ratio = 0; //float sd = 0; if (verbose) { System.out.println("Settings: "); System.out.println("\twindow=" + (settings.window * 2)); System.out.println("\tbandwidth=" + (settings.bandwidth)); //System.out.println("\tfragment offset=" + (settings.offset)); System.out.println("\tthreshold = " + wg_threshold); System.out.println("\test. fragment size = " + fragment_size); System.out.println("\tsequence length = " + chrs[0].getSequenceLength()); } // if(backgroundDirectory != null) { // for(int i = 0; i < input.length; ++i) { // background_size += input[i].getLength(); // } // for(int i = 0; i < chrs.length; ++i) { // input_size += chrs[i].getLength(); // } // bg_ratio = (float)input_size/(float)background_size; // sd = computeSD(bg_settings, input); // //System.out.println("Sample Ratio: " + bg_ratio); // //System.out.println("Input Size: " + input_size); // //System.out.println("Background Size: " + background_size); // //System.out.println("Input standard deviation: " + (settings.threshold * (float)Math.sqrt((double)bg_ratio * (double)sd * (double)sd))); // //System.out.println("Data standard deviation: " + settings.threshold * computeSD(settings, chrs)); // } for (int i = 0; i < chrs.length; ++i) { if (chrs[i].getFirstPos() == chrs[i].getLastPos()) { System.out.println("Warning: " + chrs[i].getChromosome() + " has size zero. Skipping."); continue; } File ofile = Util.makeUniqueFileWithExtension(outputDirectory, chrs[i].getChromosome(), outputFormat); DensityWriter dw = null; if (outputFormat.equals("wig")) { dw = new WiggleDensityWriter(ofile, chrs[i].getChromosome(), chrs[i].getFirstPos(), step); } else { if (outputFormat.equals("npf")) { dw = new NpfDensityWriter(ofile, chrs[i].getChromosome(), chrs[i].getFirstPos(), step); } else { dw = new BedDensityWriter(ofile, chrs[i].getChromosome(), chrs[i].getFirstPos(), step); } } //Function takes all? or new function for each? // if(backgroundDirectory != null) { // boolean hit = false; // for(int j = 0; j < background_files.length; ++j) { // if(background_files[j].getName().equals(chrs[i].getChromosome() + ".bff")) { // System.out.println("Running background on Chromosome " + chrs[i].getChromosome()); // chrs[i].runBG(settings, dw, verbose, wg_threshold, background_files[j]); // hit = true; // } // } // if(!hit) // System.out.println("No background for Chromosome " + chrs[i].getChromosome()); // } else { // if(ploidyDirectory !=) // chrs[i].run(settings, dw, verbose, wg_threshold); // } chrs[i].run(settings, dw, verbose, wg_threshold, background_files, ploidy_files); dw.close(); } //kde.showGraph(); }
From source file:de.huberlin.cuneiform.main.Main.java
public static void main(String[] args) throws ParseException, IOException, NotDerivableException, InterruptedException, JSONException { GnuParser gnuParser;/*from w w w . j av a 2s . co m*/ CommandLine cmdline; Options opt; String value; int platform; File outputDir; String[] fileList; StringBuffer buf; String line; String dagid; File logFile; opt = new Options(); opt.addOption("p", "platform", true, "The platform to perform the Cuneiform script's interpretation. " + "Possible platforms are: 'dot', 'local', and 'debug'. Default is 'local'."); opt.addOption("d", "directory", true, "The output directory, to put the interpretation intermediate and output result as well as the default location to store the log."); opt.addOption("c", "clean", false, "If set, the execution engine ignores all cached results and starts a clean workflow run."); opt.addOption("r", "runid", true, "If set, a custom id is set for this workflow run. By default a UUID string is used."); opt.addOption("f", "file", true, "Override the default location of the log file and use the specified filename instead. If the platform is 'dot', this option sets the name of the output dot-file."); opt.addOption("h", "help", false, "Print help text."); gnuParser = new GnuParser(); cmdline = gnuParser.parse(opt, args); if (cmdline.hasOption("help")) { System.out.println("CUNEIFORM - A Functional Workflow Language\n" + LABEL_VERSION); new HelpFormatter().printHelp("java -jar cuneiform.jar [OPTION]*", opt); return; } if (cmdline.hasOption("platform")) { value = cmdline.getOptionValue("platform"); if (value.equals("dot")) platform = PLATFORM_DOT; else if (value.equals("local")) platform = PLATFORM_LOCAL; else if (value.equals("debug")) platform = PLATFORM_DEBUG; else throw new RuntimeException("Specified platform '" + value + "' not recognized."); } else platform = PLATFORM_LOCAL; if (cmdline.hasOption('d')) { value = cmdline.getOptionValue('d'); } else value = "build"; outputDir = new File(value); if (outputDir.exists()) { if (!outputDir.isDirectory()) throw new IOException( "Output directory '" + outputDir.getAbsolutePath() + "' exists but is not a directory."); else if (cmdline.hasOption('c')) { FileUtils.deleteDirectory(outputDir); if (!outputDir.mkdirs()) throw new IOException( "Could not create output directory '" + outputDir.getAbsolutePath() + "'"); } } else if (!outputDir.mkdirs()) throw new IOException("Could not create output directory '" + outputDir.getAbsolutePath() + "'"); if (cmdline.hasOption('r')) dagid = cmdline.getOptionValue('r'); else dagid = UUID.randomUUID().toString(); if (cmdline.hasOption('f')) logFile = new File(cmdline.getOptionValue('f')); else logFile = null; fileList = cmdline.getArgs(); buf = new StringBuffer(); if (fileList.length == 0) { try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) { while ((line = reader.readLine()) != null) buf.append(line).append('\n'); } switch (platform) { case PLATFORM_DOT: createDot(buf.toString(), outputDir, logFile); break; case PLATFORM_LOCAL: runLocal(buf.toString(), outputDir, logFile, dagid); break; case PLATFORM_DEBUG: runDebug(buf.toString(), outputDir, logFile, dagid); break; default: throw new RuntimeException("Platform not recognized."); } } else switch (platform) { case PLATFORM_DOT: createDot(fileList, outputDir, logFile); break; case PLATFORM_LOCAL: runLocal(fileList, outputDir, logFile, dagid); break; case PLATFORM_DEBUG: runDebug(fileList, outputDir, logFile, dagid); break; default: throw new RuntimeException("Platform not recognized."); } }
From source file:net.antidot.semantic.rdf.rdb2rdf.main.Db2triples.java
public static void main(String[] args) { // Get all options Options options = new Options(); Options r2rmlOptions = new Options(); Options dmOptions = new Options(); options.addOption(modeOpt);/*w ww . ja v a 2s .c o m*/ options.addOption(userNameOpt); r2rmlOptions.addOption(userNameOpt); dmOptions.addOption(userNameOpt); options.addOption(passwordOpt); r2rmlOptions.addOption(passwordOpt); dmOptions.addOption(passwordOpt); options.addOption(URLOpt); r2rmlOptions.addOption(URLOpt); dmOptions.addOption(URLOpt); options.addOption(driverOpt); r2rmlOptions.addOption(driverOpt); dmOptions.addOption(driverOpt); options.addOption(dbOpt); r2rmlOptions.addOption(dbOpt); dmOptions.addOption(dbOpt); options.addOption(baseURIOpt); r2rmlOptions.addOption(baseURIOpt); dmOptions.addOption(baseURIOpt); options.addOption(forceOpt); r2rmlOptions.addOption(forceOpt); dmOptions.addOption(forceOpt); options.addOption(nativeOpt); r2rmlOptions.addOption(nativeOpt); dmOptions.addOption(nativeOpt); options.addOption(nativeStoreNameOpt); r2rmlOptions.addOption(nativeStoreNameOpt); dmOptions.addOption(nativeStoreNameOpt); options.addOption(outputOpt); r2rmlOptions.addOption(outputOpt); dmOptions.addOption(outputOpt); options.addOption(transformSPARQLFile); dmOptions.addOption(transformSPARQLFile); options.addOption(transformOutputFile); dmOptions.addOption(transformOutputFile); options.addOption(rdfFormat); r2rmlOptions.addOption(rdfFormat); dmOptions.addOption(rdfFormat); options.addOption(versionOpt); dmOptions.addOption(versionOpt); options.addOption(r2rmlFileOpt); r2rmlOptions.addOption(r2rmlFileOpt); // Init parameters String mode = null; String userName = null; String password = null; String url = null; DriverType driver = null; String dbName = null; String baseURI = null; boolean useNativeStore = false; boolean forceExistingRep = false; String nativeOutput = null; String output = null; String sparql = null; String sparqlOutput = null; String format = null; String r2rmlFile = null; int int_version = 1; // RDF Format output RDFFormat rdfFormat = RDFFormat.TURTLE; // Turtle by default // Norm version Version version = Version.WD_20120529; // Option parsing // Create the parser CommandLineParser parser = new GnuParser(); try { // parse the command line arguments CommandLine line = parser.parse(options, args); // Database settings // Mode if (!line.hasOption("mode")) { // automatically generate the help statement log.error("Mode is required. Use -m option to set it."); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(projectName, options); System.exit(-1); } else { mode = line.getOptionValue("mode"); if (!mode.equals("r2rml") && !mode.equals("dm")) { log.error("Unkonw mode. Please select 'r2rml' or 'dm' mode."); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(projectName, options); System.exit(-1); } } // user name if (!line.hasOption("user")) { // automatically generate the help statement log.error("User name is required. Use -u option to set it."); HelpFormatter formatter = new HelpFormatter(); if (mode.equals("r2rml")) { formatter.printHelp(projectNameR2RMLMode, r2rmlOptions); } else { formatter.printHelp(projectNameDirectMappingMode, dmOptions); } System.exit(-1); } else { userName = line.getOptionValue("user"); } // password if (!line.hasOption("pass")) { // automatically generate the help statement log.error("Password is required. Use -p option to set it."); HelpFormatter formatter = new HelpFormatter(); if (mode.equals("r2rml")) { formatter.printHelp(projectNameR2RMLMode, r2rmlOptions); } else { formatter.printHelp(projectNameDirectMappingMode, dmOptions); } System.exit(-1); } else { password = line.getOptionValue("pass"); } // Database URL url = line.getOptionValue("url", "jdbc:mysql://localhost/"); // driver driver = new DriverType(line.getOptionValue("driver", defaultDriver.getDriverName())); // Database name if (!line.hasOption("database")) { // automatically generate the help statement log.error("Database name is required. Use -b option to set it."); HelpFormatter formatter = new HelpFormatter(); if (mode.equals("r2rml")) { formatter.printHelp(projectNameR2RMLMode, r2rmlOptions); } else { formatter.printHelp(projectNameDirectMappingMode, dmOptions); } System.exit(-1); } else { dbName = line.getOptionValue("database"); } // Base URI baseURI = line.getOptionValue("base_uri", "http://foo.example/DB/"); // Use of native store ? useNativeStore = line.hasOption("n"); // Name of native store if (useNativeStore && !line.hasOption("native_output")) { // automatically generate the help statement log.error("Native triplestore path is required. Use -n option to set it."); HelpFormatter formatter = new HelpFormatter(); if (mode.equals("r2rml")) { formatter.printHelp(projectNameR2RMLMode, r2rmlOptions); } else { formatter.printHelp(projectNameDirectMappingMode, dmOptions); } System.exit(-1); } else { nativeOutput = line.getOptionValue("native_output"); } // Force loading of repository forceExistingRep = line.hasOption("f"); // Output output = line.getOptionValue("output", "output.ttl"); // SPARQL transformation if (line.hasOption("sparql")) { if (!mode.equals("dm")) { log.warn("sparql option is required only for 'dm' mode : it will be ignored..."); } else { sparql = line.getOptionValue("sparql"); sparqlOutput = line.getOptionValue("sparql_output", "output_sparql.ttl"); } } // RDF Format if (line.hasOption("format")) { format = line.getOptionValue("format"); if (format.equals("TURTLE")) rdfFormat = RDFFormat.TURTLE; else if (format.equals("RDFXML")) rdfFormat = RDFFormat.RDFXML; else if (format.equals("NTRIPLES")) rdfFormat = RDFFormat.NTRIPLES; else if (!format.equals("N3")) { log.error("Unknown RDF format. Please use RDFXML, TURTLE, N3 or NTRIPLES."); HelpFormatter formatter = new HelpFormatter(); if (mode.equals("r2rml")) { formatter.printHelp(projectNameR2RMLMode, r2rmlOptions); } else { formatter.printHelp(projectNameDirectMappingMode, dmOptions); } System.exit(-1); } } // Norm version if (line.hasOption("version")) { if (!mode.equals("dm")) { log.warn("version option is required only for 'dm' mode : it will be ignored..."); } switch (int_version) { case 1: version = Version.WD_20120529; break; case 2: version = Version.WD_20110324; // Check DB compatibilities if (!(driver.equals(DriverType.MysqlDriver) || driver.equals(DriverType.PostgreSQL))) { log.error( "Db2triples in Direct Mapping mode does'nt support this driver for the Working Draft" + " of 23 March 2011 (only MySQL and PostGreSQL for this time). " + "You can set the version option to select Working Draft of 20 September 2011."); System.exit(-1); } break; default: break; } } // r2rml instance if (mode.equals("r2rml")) { if (!line.hasOption("r2rml_file")) { log.error("R2RML config file is required. Use -r option to set it."); // automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(projectNameR2RMLMode, r2rmlOptions); System.exit(-1); } else { r2rmlFile = line.getOptionValue("r2rml_file"); File r2rmlFileTest = new File(r2rmlFile); if (!r2rmlFileTest.exists()) { log.error("R2RML file does not exists."); System.exit(-1); } } } } catch (ParseException exp) { // oops, something went wrong log.error("Parsing failed. Reason : " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(projectName, options); System.exit(-1); } // Open test database Connection conn = null; try { // Connect database conn = SQLConnector.connect(userName, password, url + dbName, driver); // Generate RDF graph SesameDataSet g = null; // Check nature of storage (memory by default) if (useNativeStore) { File pathToNativeOutputDir = new File(nativeOutput); if (pathToNativeOutputDir.exists() && !forceExistingRep) { log.error("Directory " + pathToNativeOutputDir + " already exists. Use -f option to force loading of existing repository."); System.exit(-1); } // Extract database model according to convert mode if (mode.equals("r2rml")) { g = R2RMLProcessor.convertDatabase(conn, r2rmlFile, baseURI, nativeOutput, driver); } else { g = DirectMapper.generateDirectMapping(conn, version, driver, baseURI, null, nativeOutput); } } else { File outputFile = new File(output); if (outputFile.exists() && !forceExistingRep) { log.error("Output file " + outputFile.getAbsolutePath() + " already exists. Please remove it or modify ouput name option."); System.exit(-1); } // Extract database model if (mode.equals("r2rml")) { g = R2RMLProcessor.convertDatabase(conn, r2rmlFile, baseURI, driver); } else { g = DirectMapper.generateDirectMapping(conn, version, driver, baseURI, null, null); } // Dump graph log.info("Serialize RDF graph..."); g.dumpRDF(output, rdfFormat); log.info("RDF graph serialized into " + outputFile.getAbsolutePath()); } if (sparql != null && mode.equals("dm")) { log.info("Execute SPARQL transformation..."); Long start = System.currentTimeMillis(); String result = g.runSPARQLFromFile(sparql, rdfFormat); SesameDataSet gResult = new SesameDataSet(); gResult.addString(result, rdfFormat); gResult.dumpRDF(sparqlOutput, rdfFormat); Float stop = Float.valueOf(System.currentTimeMillis() - start) / 1000; log.info("Direct Mapping SPARQL query executed in " + stop + " seconds."); log.info("[DirectMapping:main] Number of triples after transformation : " + gResult.getSize()); } } catch (Exception e) { e.printStackTrace(); } finally { try { // Close db connection conn.close(); } catch (SQLException e) { e.printStackTrace(); } } }
From source file:com.era7.bioinfo.annotation.AutomaticQualityControl.java
public static void main(String[] args) { if (args.length != 4) { System.out.println("This program expects four parameters: \n" + "1. Gene annotation XML filename \n" + "2. Reference protein set (.fasta)\n" + "3. Output TXT filename\n" + "4. Initial Blast XML results filename (the one used at the very beginning of the semiautomatic annotation process)\n"); } else {//from w w w. j ava2 s.c o m BufferedWriter outBuff = null; try { File inFile = new File(args[0]); File fastaFile = new File(args[1]); File outFile = new File(args[2]); File blastFile = new File(args[3]); //Primero cargo todos los datos del archivo xml del blast BufferedReader buffReader = new BufferedReader(new FileReader(blastFile)); StringBuilder stBuilder = new StringBuilder(); String line = null; while ((line = buffReader.readLine()) != null) { stBuilder.append(line); } buffReader.close(); System.out.println("Creating blastoutput..."); BlastOutput blastOutput = new BlastOutput(stBuilder.toString()); System.out.println("BlastOutput created! :)"); stBuilder.delete(0, stBuilder.length()); HashMap<String, String> blastProteinsMap = new HashMap<String, String>(); ArrayList<Iteration> iterations = blastOutput.getBlastOutputIterations(); for (Iteration iteration : iterations) { blastProteinsMap.put(iteration.getQueryDef().split("\\|")[1].trim(), iteration.toString()); } //freeing some memory blastOutput = null; //------------------------------------------------------------------------ //Initializing writer for output file outBuff = new BufferedWriter(new FileWriter(outFile)); //reading gene annotation xml file..... buffReader = new BufferedReader(new FileReader(inFile)); stBuilder = new StringBuilder(); line = null; while ((line = buffReader.readLine()) != null) { stBuilder.append(line); } buffReader.close(); XMLElement genesXML = new XMLElement(stBuilder.toString()); //freeing some memory I don't need anymore stBuilder.delete(0, stBuilder.length()); //reading file with the reference proteins set ArrayList<String> proteinsReferenceSet = new ArrayList<String>(); buffReader = new BufferedReader(new FileReader(fastaFile)); while ((line = buffReader.readLine()) != null) { if (line.charAt(0) == '>') { proteinsReferenceSet.add(line.split("\\|")[1]); } } buffReader.close(); Element pGenes = genesXML.asJDomElement().getChild(PredictedGenes.TAG_NAME); List<Element> contigs = pGenes.getChildren(ContigXML.TAG_NAME); System.out.println("There are " + contigs.size() + " contigs to be checked... "); outBuff.write("There are " + contigs.size() + " contigs to be checked... \n"); outBuff.write("Proteins reference set: \n"); for (String st : proteinsReferenceSet) { outBuff.write(st + ","); } outBuff.write("\n"); for (Element elem : contigs) { ContigXML contig = new ContigXML(elem); //escribo el id del contig en el que estoy outBuff.write("Checking contig: " + contig.getId() + "\n"); outBuff.flush(); List<XMLElement> geneList = contig.getChildrenWith(PredictedGene.TAG_NAME); System.out.println("geneList.size() = " + geneList.size()); int numeroDeGenesParaAnalizar = geneList.size() / FACTOR; if (numeroDeGenesParaAnalizar == 0) { numeroDeGenesParaAnalizar++; } ArrayList<Integer> indicesUtilizados = new ArrayList<Integer>(); outBuff.write("\nThe contig has " + geneList.size() + " predicted genes, let's analyze: " + numeroDeGenesParaAnalizar + "\n"); for (int j = 0; j < numeroDeGenesParaAnalizar; j++) { int geneIndex; boolean geneIsDismissed = false; do { geneIsDismissed = false; geneIndex = (int) Math.round(Math.floor(Math.random() * geneList.size())); PredictedGene tempGene = new PredictedGene(geneList.get(geneIndex).asJDomElement()); if (tempGene.getStatus().equals(PredictedGene.STATUS_DISMISSED)) { geneIsDismissed = true; } } while (indicesUtilizados.contains(new Integer(geneIndex)) && geneIsDismissed); indicesUtilizados.add(geneIndex); System.out.println("geneIndex = " + geneIndex); //Ahora hay que sacar el gen correspondiente al indice y hacer el control de calidad PredictedGene gene = new PredictedGene(geneList.get(geneIndex).asJDomElement()); outBuff.write("\nAnalyzing gene with id: " + gene.getId() + " , annotation uniprot id: " + gene.getAnnotationUniprotId() + "\n"); outBuff.write("eValue: " + gene.getEvalue() + "\n"); //--------------PETICION POST HTTP BLAST---------------------- PostMethod post = new PostMethod(BLAST_URL); post.addParameter("program", "blastx"); post.addParameter("sequence", gene.getSequence()); post.addParameter("database", "uniprotkb"); post.addParameter("email", "ppareja@era7.com"); post.addParameter("exp", "1e-10"); post.addParameter("stype", "dna"); // execute the POST HttpClient client = new HttpClient(); int status = client.executeMethod(post); System.out.println("status post = " + status); InputStream inStream = post.getResponseBodyAsStream(); String fileName = "jobid.txt"; FileOutputStream outStream = new FileOutputStream(new File(fileName)); byte[] buffer = new byte[1024]; int len; while ((len = inStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } outStream.close(); //Once the file is created I just have to read one line in order to extract the job id buffReader = new BufferedReader(new FileReader(new File(fileName))); String jobId = buffReader.readLine(); buffReader.close(); System.out.println("jobId = " + jobId); //--------------HTTP CHECK JOB STATUS REQUEST---------------------- GetMethod get = new GetMethod(CHECK_JOB_STATUS_URL + jobId); String jobStatus = ""; do { try { Thread.sleep(1000);//sleep for 1000 ms } catch (InterruptedException ie) { //If this thread was intrrupted by nother thread } status = client.executeMethod(get); //System.out.println("status get = " + status); inStream = get.getResponseBodyAsStream(); fileName = "jobStatus.txt"; outStream = new FileOutputStream(new File(fileName)); while ((len = inStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } outStream.close(); //Once the file is created I just have to read one line in order to extract the job id buffReader = new BufferedReader(new FileReader(new File(fileName))); jobStatus = buffReader.readLine(); //System.out.println("jobStatus = " + jobStatus); buffReader.close(); } while (!jobStatus.equals(FINISHED_JOB_STATUS)); //Once I'm here the blast should've already finished //--------------JOB RESULTS HTTP REQUEST---------------------- get = new GetMethod(JOB_RESULT_URL + jobId + "/out"); status = client.executeMethod(get); System.out.println("status get = " + status); inStream = get.getResponseBodyAsStream(); fileName = "jobResults.txt"; outStream = new FileOutputStream(new File(fileName)); while ((len = inStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } outStream.close(); //--------parsing the blast results file----- TreeSet<GeneEValuePair> featuresBlast = new TreeSet<GeneEValuePair>(); buffReader = new BufferedReader(new FileReader(new File(fileName))); while ((line = buffReader.readLine()) != null) { if (line.length() > 3) { String prefix = line.substring(0, 3); if (prefix.equals("TR:") || prefix.equals("SP:")) { String[] columns = line.split(" "); String id = columns[1]; //System.out.println("id = " + id); String e = ""; String[] arraySt = line.split("\\.\\.\\."); if (arraySt.length > 1) { arraySt = arraySt[1].trim().split(" "); int contador = 0; for (int k = 0; k < arraySt.length && contador <= 2; k++) { String string = arraySt[k]; if (!string.equals("")) { contador++; if (contador == 2) { e = string; } } } } else { //Number before e- String[] arr = arraySt[0].split("e-")[0].split(" "); String numeroAntesE = arr[arr.length - 1]; String numeroDespuesE = arraySt[0].split("e-")[1].split(" ")[0]; e = numeroAntesE + "e-" + numeroDespuesE; } double eValue = Double.parseDouble(e); //System.out.println("eValue = " + eValue); GeneEValuePair g = new GeneEValuePair(id, eValue); featuresBlast.add(g); } } } GeneEValuePair currentGeneEValuePair = new GeneEValuePair(gene.getAnnotationUniprotId(), gene.getEvalue()); System.out.println("currentGeneEValuePair.id = " + currentGeneEValuePair.id); System.out.println("currentGeneEValuePair.eValue = " + currentGeneEValuePair.eValue); boolean blastContainsGene = false; for (GeneEValuePair geneEValuePair : featuresBlast) { if (geneEValuePair.id.equals(currentGeneEValuePair.id)) { blastContainsGene = true; //le pongo la e que tiene en el wu-blast para poder comparar currentGeneEValuePair.eValue = geneEValuePair.eValue; break; } } if (blastContainsGene) { outBuff.write("The protein was found in the WU-BLAST result.. \n"); //Una vez que se que esta en el blast tengo que ver que sea la mejor GeneEValuePair first = featuresBlast.first(); outBuff.write("Protein with best eValue according to the WU-BLAST result: " + first.id + " , " + first.eValue + "\n"); if (first.id.equals(currentGeneEValuePair.id)) { outBuff.write("Proteins with best eValue match up \n"); } else { if (first.eValue == currentGeneEValuePair.eValue) { outBuff.write( "The one with best eValue is not the same protein but has the same eValue \n"); } else if (first.eValue > currentGeneEValuePair.eValue) { outBuff.write( "The one with best eValue is not the same protein but has a worse eValue :) \n"); } else { outBuff.write( "The best protein from BLAST has an eValue smaller than ours, checking if it's part of the reference set...\n"); //System.exit(-1); if (proteinsReferenceSet.contains(first.id)) { //The protein is in the reference set and that shouldn't happen outBuff.write( "The protein was found on the reference set, checking if it belongs to the same contig...\n"); String iterationSt = blastProteinsMap.get(gene.getAnnotationUniprotId()); if (iterationSt != null) { outBuff.write( "The protein was found in the BLAST used at the beginning of the annotation process.\n"); Iteration iteration = new Iteration(iterationSt); ArrayList<Hit> hits = iteration.getIterationHits(); boolean contigFound = false; Hit errorHit = null; for (Hit hit : hits) { if (hit.getHitDef().indexOf(contig.getId()) >= 0) { contigFound = true; errorHit = hit; break; } } if (contigFound) { outBuff.write( "ERROR: A hit from the same contig was find in the Blast file: \n" + errorHit.toString() + "\n"); } else { outBuff.write("There is no hit with the same contig! :)\n"); } } else { outBuff.write( "The protein is NOT in the BLAST used at the beginning of the annotation process.\n"); } } else { //The protein was not found on the reference set so everything's ok outBuff.write( "The protein was not found on the reference, everything's ok :)\n"); } } } } else { outBuff.write("The protein was NOT found on the WU-BLAST !! :( \n"); //System.exit(-1); } } } } catch (Exception ex) { ex.printStackTrace(); } finally { try { //closing outputfile outBuff.close(); } catch (IOException ex) { Logger.getLogger(AutomaticQualityControl.class.getName()).log(Level.SEVERE, null, ex); } } } }
From source file:com.zuora.api.UsageAdjInvoiceRegenerator.java
public static void main(String[] args) { String exportIdPRPC, exportIdII, exportFileIdII, exportFileIdPRPC, queryII, queryPRPC; boolean hasArgs = false; if (args != null && args.length >= 1) { UsageAdjInvoiceRegenerator.PROPERTY_FILE_NAME = args[0]; hasArgs = true;//from w w w. j a va 2 s .c om } AppParamManager.initParameters(hasArgs); if (!AppParamManager.TASK_ID.equals("")) { try { zApiClient = new ApiClient(AppParamManager.API_URL, AppParamManager.USER_NAME, AppParamManager.USER_PASSWORD, AppParamManager.USER_SESSION); zApiClient.login(); } catch (Exception ex) { Logger.print(ex); Logger.print("RefreshSession - There's exception in the API call."); } queryPRPC = AppParamManager.PRPCexportQuery; queryII = AppParamManager.IIexportQuery; exportIdPRPC = zApiClient.createExport(queryPRPC); exportIdII = zApiClient.createExport(queryII); // ERROR createExport fails if (exportIdII == null || exportIdII.equals("")) { Logger.print("Error. Failed to create Invoice Item Export"); //return false; } // ERROR createExport fails if (exportIdPRPC == null || exportIdPRPC.equals("")) { Logger.print("Error. Failed to create PRPC export"); //return false; } exportFileIdII = zApiClient.getExportFileId(exportIdII); if (exportFileIdII == null) { Logger.print("Error. Failed to get Invoice Item Export file Id"); //log.closeLogFile(); //return false; } exportFileIdPRPC = zApiClient.getExportFileId(exportIdPRPC); if (exportFileIdPRPC == null) { Logger.print("Error. Failed to get PRPC Export file Id"); //log.closeLogFile(); //return false; } // get the export file from zuora //zApiClient.getFile(exportFileIdII, exportFileIdTI); /* Logger.print("II export ID: "+exportFileIdII); Logger.print("TI export ID: "+exportFileIdTI); */ Logger.print("Opening Export file"); //Base64 bs64 = new BASE64Encoder(); /* String login = AppParamManager.USER_NAME+":"+AppParamManager.USER_PASSWORD; String authorization ="Basic "+ Base64.encodeBase64String(login.getBytes()); String zendpoint = ""; */ String authorization = ""; //Base64 bs64 = new BASE64Encoder(); if (AppParamManager.USER_SESSION.isEmpty()) { String login = AppParamManager.USER_NAME + ":" + AppParamManager.USER_PASSWORD; authorization = "Basic " + Base64.encodeBase64String(login.getBytes()); } else { authorization = "ZSession " + AppParamManager.USER_SESSION; } String zendpoint = ""; try { /* if( AppParamManager.API_URL.contains("api") ){ //look in api sandbox zendpoint = "https://apisandbox.zuora.com/apps/api/file/"; } else { //look in production zendpoint = "https://www.zuora.com/apps/api/file/"; } */ int index = AppParamManager.API_URL.indexOf("apps"); index = index + 5; zendpoint = AppParamManager.API_URL.substring(0, index) + "api/file/"; Logger.print(zendpoint); //zendpoint = AppParamManager.FILE_URL; //Start reading Invoice Items String zendpointII = zendpoint + exportFileIdII + "/"; URL url = new URL(zendpointII); URLConnection uc = url.openConnection(); //Logger.print("Opening invoice item file: "+exportFileIdII+" authorization: "+authorization); uc.setRequestProperty("Authorization", authorization); InputStream content = (InputStream) uc.getInputStream(); BufferedReader in = new BufferedReader(new InputStreamReader(content)); CSVReader cvsReader = new CSVReader(in); List<String[]> batchOfRawDataList = null; while ((batchOfRawDataList = cvsReader.parseEntity()) != null) { UsageAdjInvoiceRegenerator.readInvoices(batchOfRawDataList, cvsReader.getBatchStartLineNumber(), "InvoiceItem"); } in.close(); String zenpointPRPC = zendpoint + exportFileIdPRPC + "/"; url = new URL(zenpointPRPC); uc = url.openConnection(); uc.setRequestProperty("Authorization", authorization); content = (InputStream) uc.getInputStream(); in = new BufferedReader(new InputStreamReader(content)); cvsReader = new CSVReader(in); while ((batchOfRawDataList = cvsReader.parseEntity()) != null) { UsageAdjInvoiceRegenerator.readInvoices(batchOfRawDataList, cvsReader.getBatchStartLineNumber(), "PRPCItem"); } in.close(); Logger.print("start processing values"); UsageAdjInvoiceRegenerator chargeAdjustment = new UsageAdjInvoiceRegenerator(); int[] results; int totalErrors = 0; String emailmsg = ""; Logger.print("----------------------------------------"); Logger.print("start creating usages"); results = zApiClient.createUsageItems(newUsageCollection); if (results[1] != 0) { emailmsg = (results[0] - results[1]) + "/" + results[0] + " usage creation "; } totalErrors = totalErrors + results[1]; Logger.print("start cancelling invoices"); results = zApiClient.alterInvoices(newUsageCollection, "cancel"); if (results[1] != 0) { emailmsg = (results[0] - results[1]) + "/" + results[0] + " invoice cancellation "; } totalErrors = totalErrors + results[1]; Logger.print("start deleting usages"); results = zApiClient.deleteUsageItems(deleteList); if (results[1] != 0) { emailmsg = (results[0] - results[1]) + "/" + results[0] + " usage deletion "; } totalErrors = totalErrors + results[1]; Logger.print("start regenerating invoices"); results = zApiClient.alterInvoices(newUsageCollection, "generate"); if (results[1] != 0) { emailmsg = (results[0] - results[1]) + "/" + results[0] + " invoice generation "; } totalErrors = totalErrors + results[1]; Logger.print("start deleting old invoices"); results = zApiClient.alterInvoices(newUsageCollection, "delete"); if (results[1] != 0) { emailmsg = (results[0] - results[1]) + "/" + results[0] + " invoice deletion "; } totalErrors = totalErrors + results[1]; // Create the attachment EmailAttachment attachment = new EmailAttachment(); if (totalErrors > 0) { String logFileName = AppParamManager.OUTPUT_FOLDER_LOCATION + File.separator + "runtime_log_" + AppParamManager.OUTPUT_FILE_POSTFIX + ".txt"; attachment.setPath(logFileName); attachment.setDisposition(EmailAttachment.ATTACHMENT); attachment.setDescription("System Log"); attachment.setName("System Log"); } MultiPartEmail email = new MultiPartEmail(); email.setSmtpPort(587); email.setAuthenticator( new DefaultAuthenticator(AppParamManager.EMAIL_ADDRESS, AppParamManager.EMAIL_PASSWORD)); email.setDebug(false); email.setHostName("smtp.gmail.com"); email.setFrom("zuora@gmail.com"); if (totalErrors > 0) { email.setSubject("Base Calc Processor Finished with Errors"); email.setMsg("The base calc processing has finished " + emailmsg + "records successfully."); } else { email.setSubject("Base Calc Processor Finished Successfully"); emailmsg = (results[0] - results[1]) + "/" + results[0] + " invoice "; email.setMsg("The base calc processing has finished " + emailmsg + "records successfully."); } email.setTLS(true); email.addTo(AppParamManager.RECIPIENT_ADDRESS); if (totalErrors > 0) { email.attach(attachment); } email.send(); System.out.println("Mail sent!"); if (hasArgs) { Connection conn = AppParamManager.getConnection(); Statement stmt = conn.createStatement(); DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); java.util.Date date = new Date(); stmt.executeUpdate("UPDATE TASK SET STATUS = 'completed', END_TIME = '" + dateFormat.format(date) + "' WHERE ID = " + AppParamManager.TASK_ID); Utility.saveLogToDB(conn); conn.close(); } } catch (Exception e) { Logger.print("Failure, gettingFile."); Logger.print("Error getting the export file: " + e.getMessage()); //System.out.println("Error getting the export file: "+e.getMessage()); try { Connection conn = AppParamManager.getConnection(); Statement stmt = conn.createStatement(); DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); java.util.Date date = new Date(); stmt.executeUpdate("UPDATE TASK SET STATUS = 'failed', END_TIME = '" + dateFormat.format(date) + "' WHERE ID = " + AppParamManager.TASK_ID); Utility.saveLogToDB(conn); conn.close(); } catch (Exception e2) { // TODO Auto-generated catch block e2.printStackTrace(); } } } else { Logger.print("No tasks in wait status"); } }
From source file:ArrayUtils.java
/** * A testing method// ww w . j av a2 s. co m * * @param args * The command-line arguments. The first argument is used to * determine what test to run. */ public static void main(String[] args) { String test; if (args.length == 0) test = "intSort"; else test = args[0]; if (test.equals("intSort")) { int[] random = new int[10000]; int[] random2 = new int[random.length]; for (int i = 0; i < random.length; i++) random[i] = (int) (Math.random() * Integer.MAX_VALUE); ArrayAdjuster.mergeSort(random2, random, 0, random.length); boolean sorted = true; for (int i = 0; i < random.length - 1; i++) if (random[i] < random[i + 1]) { sorted = false; break; } System.out.println("Sort " + (sorted ? "succeeded" : "failed")); } else if (test.equals("adjust")) { final Integer[] start = new Integer[25]; for (int i = 0; i < start.length; i++) start[i] = new Integer(i); Integer[] modifier = new Integer[start.length]; for (int i = 0; i < modifier.length; i++) { if (i % 5 != 0) modifier[i] = new Integer(i); else modifier[i] = new Integer(i / 5 * start.length); } Integer[] result = adjust(start, modifier, new DifferenceListener<Integer, Integer>() { public boolean identity(Integer o1, Integer o2) { return o1.equals(o2); } public Integer added(Integer o, int mIdx, int retIdx) { return o; } public Integer removed(Integer o, int oIdx, int oIdxAdj, int retIdx) { return o; } public Integer set(Integer o1, int idx1, int oIdxAdj, Integer o2, int idx2, int retIdx) { if (o1.intValue() % 5 == 2) return null; return o1; } }); System.out.println("Original array=" + toString(start)); System.out.println("Modifier array=" + toString(modifier)); System.out.println("Adjusted array=" + toString(result)); } else throw new IllegalArgumentException("Unrecognized test: " + test); }
From source file:net.java.sen.tools.MkSenDic.java
/** * Build sen dictionary.// ww w. ja v a2 s . c o m * * @param args * custom dictionary files. see dic/build.xml. */ public static void main(String args[]) { ResourceBundle rb = ResourceBundle.getBundle("dictionary"); DictionaryMaker dm1 = new DictionaryMaker(); DictionaryMaker dm2 = new DictionaryMaker(); DictionaryMaker dm3 = new DictionaryMaker(); // 1st field information of connect file. Vector rule1 = new Vector(); // 2nd field information of connect file. Vector rule2 = new Vector(); // 3rd field information of connect file. Vector rule3 = new Vector(); // 4th field information of connect file. // this field shows cost of morpheme connection // [size3*(x3*size2+x2)+x1] // [size3*(Attr1*size2+Attr2)+Attl] short score[] = new short[20131]; long start = System.currentTimeMillis(); // ///////////////////////////////////////// // // Step1. Loading connetion file. // log.info("(1/7): reading connection matrix ... "); try { log.info("connection file = " + rb.getString("text_connection_file")); log.info("charset = " + rb.getString("dic.charset")); CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")), rb.getString("dic.charset")); String t[]; int line = 0; while ((t = csvparser.nextTokens()) != null) { if (t.length < 4) { log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line); log.warn(rb.getString("text_connection_file") + "may be broken."); break; } dm1.add(t[0]); rule1.add(t[0]); dm2.add(t[1]); rule2.add(t[1]); dm3.add(t[2]); rule3.add(t[2]); if (line == score.length) { score = resize(score); } score[line++] = (short) Integer.parseInt(t[3]); } // ///////////////////////////////////////// // // Step2. Building internal dictionary // log.info("(2/7): building type dictionary ... "); dm1.build(); dm2.build(); dm3.build(); // if you want check specified morpheme, you uncomment and modify // following line: /* * System.out.print("22="); dm3.getById(22); * System.out.print("368="); dm3.getById(368); * * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * DictionaryMaker.debug = true; * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?")); */ } catch (IOException e) { e.printStackTrace(); System.exit(0); } // ------------------------------------------------- int size1 = dm1.size(); int size2 = dm2.size(); int size3 = dm3.size(); int ruleSize = rule1.size(); short matrix[] = new short[size1 * size2 * size3]; short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost")); // ///////////////////////////////////////// // // Step3. Writing Connection Matrix // log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = " + size1 * size2 * size3 + ") ..."); for (int i = 0; i < (int) (size1 * size2 * size3); i++) matrix[i] = default_cost; for (int i = 0; i < ruleSize; i++) { Vector r1 = dm1.getRuleIdList((String) rule1.get(i)); Vector r2 = dm2.getRuleIdList((String) rule2.get(i)); Vector r3 = dm3.getRuleIdList((String) rule3.get(i)); for (Iterator i1 = r1.iterator(); i1.hasNext();) { int ii1 = ((Integer) i1.next()).intValue(); for (Iterator i2 = r2.iterator(); i2.hasNext();) { int ii2 = ((Integer) i2.next()).intValue(); for (Iterator i3 = r3.iterator(); i3.hasNext();) { int ii3 = ((Integer) i3.next()).intValue(); int pos = size3 * (size2 * ii1 + ii2) + ii3; matrix[pos] = score[i]; } } } } try { DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file")))); out.writeShort(size1); out.writeShort(size2); out.writeShort(size3); for (int i1 = 0; i1 < size1; i1++) for (int i2 = 0; i2 < size2; i2++) for (int i3 = 0; i3 < size3; i3++) { out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]); // if (matrix[size3 * (size2 * i1 + i2) + i3] != // default_cost) { // } } out.close(); } catch (IOException e) { e.printStackTrace(); System.exit(0); } matrix = null; score = null; // ------------------------------------------------- int pos_start = Integer.parseInt(rb.getString("pos_start")); int pos_size = Integer.parseInt(rb.getString("pos_size")); int di = 0; int offset = 0; ArrayList dicList = new ArrayList(); // ///////////////////////////////////////// // // Step4. Reading Morpheme Information // log.info("(4/7): reading morpheme information ... "); String t = null; String[] csv = null; try { // writer for feature file. BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset"))); log.info("load dic: " + rb.getString("text_dic_file")); BufferedReader dicStream = null; int custom_dic = -1; if (args.length == 0) { dicStream = new BufferedReader(new InputStreamReader( new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset"))); } else { custom_dic = 0; dicStream = new BufferedReader( new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } int line = 0; CSVData key_b = new CSVData(); CSVData pos_b = new CSVData(); while (true) { t = dicStream.readLine(); if (t == null) { dicStream.close(); custom_dic++; if (args.length == custom_dic) { break; } else { // read custum dictionary log.info("load dic: " + "args[custum_dic]"); dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } continue; } CSVParser parser = new CSVParser(t); csv = parser.nextTokens(); if (csv.length < (pos_size + pos_start)) { throw new RuntimeException("format error:" + t); } key_b.clear(); pos_b.clear(); for (int i = pos_start; i < (pos_start + pos_size - 1); i++) { key_b.append(csv[i]); pos_b.append(csv[i]); } key_b.append(csv[pos_start + pos_size - 1]); pos_b.append(csv[pos_start + pos_size - 1]); for (int i = pos_start + pos_size; i < (csv.length - 1); i++) { pos_b.append(csv[i]); } pos_b.append(csv[csv.length - 1]); CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(key_b.toString()); token.rcAttr1 = (short) dm2.getDicId(key_b.toString()); token.lcAttr = (short) dm3.getDicId(key_b.toString()); token.posid = 0; token.posID = offset; token.length = (short) csv[0].length(); token.cost = (short) Integer.parseInt(csv[1]); dicList.add(new PairObject(csv[0], token)); byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset")); offset += (b.length + 1); String pos_b_str = pos_b.toString(); bw.write(pos_b_str, 0, pos_b_str.length()); // bw.write(b, 0, b.length); bw.write(0); if (++di % 50000 == 0) log.info("" + di + "... "); } bw.close(); // ----end of writing feature.cha ---- } catch (Exception e) { log.error("Error: " + t); e.printStackTrace(); System.exit(1); } rule1 = null; rule2 = null; rule3 = null; // ///////////////////////////////////////// // // Step5. Sort lexs and write to file // log.info("(5/7): sorting lex... "); int value[] = new int[dicList.size()]; char key[][] = new char[dicList.size()][]; int spos = 0; int dsize = 0; int bsize = 0; String prev = ""; Collections.sort(dicList); // ///////////////////////////////////////// // // Step6. Writing Token Information // log.info("(6/7): writing token... "); try { // writer for token file. DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("token_file")))); // writing 'bos' and 'eos' and 'unknown' token. CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos")); token.posID = -1; token.write(out); log.info("key size = " + key.length); for (int i = 0; i < key.length; i++) { String k = (String) ((PairObject) dicList.get(i)).key; if (!prev.equals(k) && i != 0) { key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; bsize = 1; spos = i; } else { bsize++; } prev = (String) ((PairObject) dicList.get(i)).key; ((CToken) (((PairObject) dicList.get(i)).value)).write(out); } out.flush(); out.close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; dm1 = null; dm2 = null; dm3 = null; dicList = null; // ///////////////////////////////////////// // // Step7. Build Double Array // log.info("(7/7): building Double-Array (size = " + dsize + ") ..."); DoubleArrayTrie da = new DoubleArrayTrie(); da.build(key, null, value, dsize); try { da.save(rb.getString("double_array_file")); } catch (Exception e) { e.printStackTrace(); } log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]"); }
From source file:com.moss.schematrax.SchemaUpdater.java
public static void main(String[] args) throws Exception { boolean configureLogging = new Boolean(System.getProperty("configureLogging", "true")).booleanValue(); if (configureLogging) { BasicConfigurator.configure();/* www. jav a 2 s . c o m*/ Logger.getRootLogger().setLevel(Level.INFO); } Map<String, String> argsMap = new HashMap<String, String>(); for (int x = 0; x < args.length; x++) { String arg = args[x]; String[] pair = arg.split("="); if (pair.length != 2) { System.out.println("Error parsing command line arguments in value pair:" + arg); System.out.println(usage); System.exit(1); } String name = pair[0]; String value = pair[1]; argsMap.put(name, value); } String mode = (String) argsMap.get("mode"); String databaseTypeName = (String) argsMap.get("dbtype"); String databaseName = (String) argsMap.get("catalog"); String logon = (String) argsMap.get("logon"); String password = (String) argsMap.get("password"); String host = (String) argsMap.get("host"); if (args.length < 5 || databaseTypeName == null || databaseName == null || logon == null || password == null || host == null || mode == null) { System.out.println(usage); } String schematraxFileLocation = (String) argsMap.get("schematraxFile"); if (schematraxFileLocation == null) schematraxFileLocation = ""; String schemaName = (String) argsMap.get("schema"); if (schemaName == null) schemaName = logon; DatabaseType dbType = DatabaseType.getDatabaseType(databaseTypeName); String schemaVersion = (String) argsMap.get("version"); JdbcConnectionConfig connectionConfig = new JdbcConnectionConfig(); connectionConfig.setJdbcDriverClassName(dbType.getJdbcDriver().getClassName()); connectionConfig .setJdbcUrl(dbType.getJdbcDriver().createJdbcUrl(host, null, databaseName, logon, password)); connectionConfig.setLogon(logon); connectionConfig.setPassword(password); SchemaUpdater updater = new SchemaUpdater(new File(schematraxFileLocation).toURL()); if (mode.equals("update")) { if (schemaVersion == null) { System.err.println("You must specify a schema version"); System.err.println(usage); } updater.updateSchema(dbType, connectionConfig, schemaName, schemaVersion); } else if (mode.equals("create")) { updater.createSchema(dbType, connectionConfig, schemaName, schemaVersion); } else { System.err.println("Invalid mode:" + mode); System.err.println(usage); } }