List of usage examples for java.util Scanner hasNextLine
public boolean hasNextLine()
From source file:io.realm.RealmTest.java
public void rarely_run_testUTF8() { testRealm.beginTransaction();/*from w w w. j a v a2 s .c o m*/ testRealm.clear(AllTypes.class); testRealm.commitTransaction(); String file = "assets/unicode_codepoints.csv"; Scanner scanner = new Scanner(getClass().getClassLoader().getResourceAsStream(file)); int i = 0; String currentUnicode = null; try { testRealm.beginTransaction(); while (scanner.hasNextLine()) { currentUnicode = scanner.nextLine(); char[] chars = Character.toChars(Integer.parseInt(currentUnicode, 16)); String codePoint = new String(chars); AllTypes o = testRealm.createObject(AllTypes.class); o.setColumnLong(i); o.setColumnString(codePoint); AllTypes realmType = testRealm.where(AllTypes.class).equalTo("columnLong", i).findFirst(); if (i > 1) { assertEquals("Codepoint: " + i + " / " + currentUnicode, codePoint, realmType.getColumnString()); // codepoint 0 is NULL, ignore for now. } i++; } testRealm.commitTransaction(); } catch (Exception e) { fail("Failure, Codepoint: " + i + " / " + currentUnicode + " " + e.getMessage()); } }
From source file:com.joliciel.jochre.Jochre.java
/** * Usage (* indicates optional):<br/> * Jochre load [filename] [isoLanguageCode] [firstPage]* [lastPage]*<br/> * Loads a file (pdf or image) and segments it into letters. * The analysed version is stored in the persistent store. * Writes [filename].xml to the same location, to enable the user to indicate the text * to associate with this file.<br/> * Jochre extract [filename] [outputDirectory] [firstPage]* [lastPage]*<br/> * Extracts images form a pdf file.<br/> * @param args//from w w w .ja v a2 s. c o m */ public void execute(Map<String, String> argMap) throws Exception { if (argMap.size() == 0) { System.out.println("Usage (* indicates optional):"); System.out.println( "Jochre command=load file=[filename] name=[userFriendlyName] lang=[isoLanguageCode] first=[firstPage]* last=[lastPage]* outputDir=[outputDirectory]* showSeg=[true/false]"); System.out.println( "Jochre command=extract file=[filename] outputDir=[outputDirectory] first=[firstPage]* last=[lastPage]*"); System.out.println("Jochre command=analyse"); System.out.println( "Jochre command=train file=[filename] outputDir=[outputDirectory] iterations=[iterations] cutoff=[cutoff]"); return; } String logConfigPath = argMap.get("logConfigFile"); if (logConfigPath != null) { argMap.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } File performanceConfigFile = null; String command = ""; String filename = ""; String userFriendlyName = ""; String outputDirPath = null; int firstPage = -1; int lastPage = -1; int shapeId = -1; int docId = -1; int imageId = 0; int iterations = 100; int cutoff = 0; int userId = -1; int imageCount = 0; int multiplier = 0; int beamWidth = 5; boolean showSegmentation = false; boolean drawPixelSpread = false; boolean save = false; String letterModelPath = ""; String splitModelPath = ""; String mergeModelPath = ""; ImageStatus[] imageSet = new ImageStatus[] { ImageStatus.TRAINING_HELD_OUT }; String letterFeatureFilePath = ""; String splitFeatureFilePath = ""; String mergeFeatureFilePath = ""; boolean reconstructLetters = false; double minProbForDecision = 0.5; double junkThreshold = 0.0; BoundaryDetectorType boundaryDetectorType = BoundaryDetectorType.LetterByLetter; int excludeImageId = 0; int crossValidationSize = -1; int includeIndex = -1; int excludeIndex = -1; Set<Integer> documentSet = null; boolean frequencyAdjusted = false; double smoothing = 0.3; double frequencyLogBase = 10.0; String suffix = ""; String dataSourcePath = null; String docGroupPath = null; boolean includeBeam = false; List<OutputFormat> outputFormats = new ArrayList<Jochre.OutputFormat>(); outputFormats.add(OutputFormat.Jochre); outputFormats.add(OutputFormat.HTML); for (Entry<String, String> argMapEntry : argMap.entrySet()) { String argName = argMapEntry.getKey(); String argValue = argMapEntry.getValue(); if (argName.equals("command")) command = argValue; else if (argName.equals("file")) filename = argValue; else if (argName.equals("name")) userFriendlyName = argValue; else if (argName.equals("lang")) locale = new Locale(argValue); else if (argName.equals("first")) firstPage = Integer.parseInt(argValue); else if (argName.equals("last")) lastPage = Integer.parseInt(argValue); else if (argName.equals("outputDir")) outputDirPath = argValue; else if (argName.equals("showSeg")) showSegmentation = (argValue.equals("true")); else if (argName.equals("drawPixelSpread")) drawPixelSpread = (argValue.equals("true")); else if (argName.equals("save")) save = (argValue.equals("true")); else if (argName.equals("shapeId")) shapeId = Integer.parseInt(argValue); else if (argName.equals("imageId")) imageId = Integer.parseInt(argValue); else if (argName.equals("docId")) docId = Integer.parseInt(argValue); else if (argName.equals("userId")) userId = Integer.parseInt(argValue); else if (argName.equals("iterations")) iterations = Integer.parseInt(argValue); else if (argName.equals("cutoff")) cutoff = Integer.parseInt(argValue); else if (argName.equals("imageCount")) imageCount = Integer.parseInt(argValue); else if (argName.equals("beamWidth")) beamWidth = Integer.parseInt(argValue); else if (argName.equals("multiplier")) multiplier = Integer.parseInt(argValue); else if (argName.equals("letterModel")) letterModelPath = argValue; else if (argName.equals("splitModel")) splitModelPath = argValue; else if (argName.equals("mergeModel")) mergeModelPath = argValue; else if (argName.equals("letterFeatures")) letterFeatureFilePath = argValue; else if (argName.equals("splitFeatures")) splitFeatureFilePath = argValue; else if (argName.equals("mergeFeatures")) mergeFeatureFilePath = argValue; else if (argName.equals("imageStatus")) { if (argValue.equals("heldOut")) imageSet = new ImageStatus[] { ImageStatus.TRAINING_HELD_OUT }; else if (argValue.equals("test")) imageSet = new ImageStatus[] { ImageStatus.TRAINING_TEST }; else if (argValue.equals("training")) imageSet = new ImageStatus[] { ImageStatus.TRAINING_VALIDATED }; else if (argValue.equals("all")) imageSet = new ImageStatus[] { ImageStatus.TRAINING_VALIDATED, ImageStatus.TRAINING_HELD_OUT, ImageStatus.TRAINING_TEST }; else throw new RuntimeException("Unknown imageSet: " + argValue); } else if (argName.equals("reconstructLetters")) reconstructLetters = (argValue.equals("true")); else if (argName.equals("minProbForDecision")) minProbForDecision = Double.parseDouble(argValue); else if (argName.equals("junkThreshold")) junkThreshold = Double.parseDouble(argValue); else if (argName.equals("boundaryDetector")) boundaryDetectorType = BoundaryDetectorType.valueOf(argValue); else if (argName.equals("lexicon")) lexiconPath = argValue; else if (argName.equals("freqLogBase")) { frequencyLogBase = Double.parseDouble(argValue); frequencyAdjusted = true; } else if (argName.equals("smoothing")) smoothing = Double.parseDouble(argValue); else if (argName.equals("excludeImageId")) excludeImageId = Integer.parseInt(argValue); else if (argName.equals("crossValidationSize")) crossValidationSize = Integer.parseInt(argValue); else if (argName.equals("includeIndex")) includeIndex = Integer.parseInt(argValue); else if (argName.equals("excludeIndex")) excludeIndex = Integer.parseInt(argValue); else if (argName.equals("docSet")) { String[] docIdArray = argValue.split(","); documentSet = new HashSet<Integer>(); for (String docIdString : docIdArray) { int oneId = Integer.parseInt(docIdString); documentSet.add(oneId); } } else if (argName.equals("docGroupFile")) { docGroupPath = argValue; } else if (argName.equals("frequencyAdjusted")) frequencyAdjusted = argValue.equalsIgnoreCase("true"); else if (argName.equals("suffix")) suffix = argValue; else if (argName.equals("dataSource")) dataSourcePath = argValue; else if (argName.equals("encoding")) encoding = argValue; else if (argName.equals("performanceConfigFile")) performanceConfigFile = new File(argValue); else if (argName.equals("includeBeam")) includeBeam = argValue.equalsIgnoreCase("true"); else if (argName.equals("outputFormat")) { outputFormats = new ArrayList<Jochre.OutputFormat>(); String[] outputFormatStrings = argValue.split(","); for (String outputFormatString : outputFormatStrings) { outputFormats.add(OutputFormat.valueOf(outputFormatString)); } if (outputFormats.size() == 0) throw new JochreException("At least one outputFormat required."); } else throw new RuntimeException("Unknown argument: " + argName); } PerformanceMonitor.start(performanceConfigFile); try { if (userFriendlyName.length() == 0) userFriendlyName = filename; if (locale == null) { throw new JochreException("Argument lang is required"); } if (encoding == null) encoding = Charset.defaultCharset().name(); JochreServiceLocator locator = JochreServiceLocator.getInstance(); if (dataSourcePath != null) locator.setDataSourcePropertiesFile(dataSourcePath); this.initialise(); this.setUserId(userId); CorpusSelectionCriteria criteria = this.getGraphicsService().getCorpusSelectionCriteria(); criteria.setImageId(imageId); criteria.setImageCount(imageCount); criteria.setImageStatusesToInclude(imageSet); criteria.setExcludeImageId(excludeImageId); criteria.setCrossValidationSize(crossValidationSize); criteria.setIncludeIndex(includeIndex); criteria.setExcludeIndex(excludeIndex); criteria.setDocumentId(docId); criteria.setDocumentIds(documentSet); if (docGroupPath != null) { File docGroupFile = new File(docGroupPath); Scanner scanner = new Scanner( new BufferedReader(new InputStreamReader(new FileInputStream(docGroupFile), encoding))); while (scanner.hasNextLine()) { String line = scanner.nextLine(); int equalsPos = line.indexOf('='); String groupName = line.substring(0, equalsPos); String[] ids = line.substring(equalsPos + 1).split(","); Set<Integer> idSet = new HashSet<Integer>(); for (String idString : ids) { idSet.add(Integer.parseInt(idString)); } documentGroups.put(groupName, idSet); } } OutputService outputService = locator.getTextServiceLocator().getTextService(); MostLikelyWordChooser wordChooser = null; LexiconService lexiconService = locator.getLexiconServiceLocator().getLexiconService(); wordChooser = lexiconService.getMostLikelyWordChooser(this.getLexicon(), this.getWordSplitter()); wordChooser.setAdditiveSmoothing(smoothing); wordChooser.setFrequencyLogBase(frequencyLogBase); wordChooser.setFrequencyAdjusted(frequencyAdjusted); JochreSession.setJunkConfidenceThreshold(junkThreshold); if (command.equals("segment")) { this.doCommandSegment(filename, userFriendlyName, showSegmentation, drawPixelSpread, outputDirPath, save, firstPage, lastPage); } else if (command.equals("extract")) { this.doCommandExtractImages(filename, outputDirPath, firstPage, lastPage); } else if (command.equals("updateImages")) { this.doCommandUpdateImages(filename, docId, firstPage, lastPage); } else if (command.equals("applyFeatures")) { this.doCommandApplyFeatures(imageId, shapeId, letterFeatureFilePath); } else if (command.equals("train")) { this.doCommandTrain(letterModelPath, letterFeatureFilePath, iterations, cutoff, criteria, reconstructLetters); } else if (command.equals("evaluate") || command.equals("evaluateComplex")) { this.doCommandEvaluate(letterModelPath, criteria, outputDirPath, wordChooser, beamWidth, reconstructLetters, save, suffix, includeBeam); } else if (command.equals("evaluateFull")) { this.doCommandEvaluateFull(letterModelPath, splitModelPath, mergeModelPath, criteria, save, outputDirPath, wordChooser, beamWidth, boundaryDetectorType, minProbForDecision, suffix); } else if (command.equals("analyse")) { this.doCommandAnalyse(letterModelPath, docId, criteria, wordChooser); } else if (command.equals("trainSplits")) { this.doCommandTrainSplits(splitModelPath, splitFeatureFilePath, iterations, cutoff, criteria); } else if (command.equals("evaluateSplits")) { this.doCommandEvaluateSplits(splitModelPath, criteria, beamWidth, minProbForDecision); } else if (command.equals("trainMerge")) { this.doCommandTrainMerge(mergeModelPath, mergeFeatureFilePath, multiplier, iterations, cutoff, criteria); } else if (command.equals("evaluateMerge")) { this.doCommandEvaluateMerge(mergeModelPath, criteria, minProbForDecision); } else if (command.equals("logImage")) { this.doCommandLogImage(shapeId); } else if (command.equals("testFeature")) { this.doCommandTestFeature(shapeId); } else if (command.equals("serializeLexicon")) { File outputDir = new File(outputDirPath); outputDir.mkdirs(); File inputFile = new File(filename); if (inputFile.isDirectory()) { File[] lexiconFiles = inputFile.listFiles(); for (File oneLexFile : lexiconFiles) { LOG.debug(oneLexFile.getName() + ": " + ", size: " + oneLexFile.length()); TextFileLexicon lexicon = new TextFileLexicon(oneLexFile, encoding); String baseName = oneLexFile.getName().substring(0, oneLexFile.getName().indexOf(".")); if (baseName.lastIndexOf("/") > 0) baseName = baseName.substring(baseName.lastIndexOf("/") + 1); File lexiconFile = new File(outputDir, baseName + ".obj"); lexicon.serialize(lexiconFile); } } else { LOG.debug(filename + ": " + inputFile.exists() + ", size: " + inputFile.length()); TextFileLexicon lexicon = new TextFileLexicon(inputFile, encoding); String baseName = filename.substring(0, filename.indexOf(".")); if (baseName.lastIndexOf("/") > 0) baseName = baseName.substring(baseName.lastIndexOf("/") + 1); File lexiconFile = new File(outputDir, baseName + ".obj"); lexicon.serialize(lexiconFile); } } else if (command.equals("analyseFile")) { File pdfFile = new File(filename); File letterModelFile = new File(letterModelPath); File splitModelFile = null; File mergeModelFile = null; if (splitModelPath.length() > 0) splitModelFile = new File(splitModelPath); if (mergeModelPath.length() > 0) mergeModelFile = new File(mergeModelPath); File outputDir = new File(outputDirPath); outputDir.mkdirs(); String baseName = filename; if (baseName.lastIndexOf('.') > 0) baseName = filename.substring(0, filename.lastIndexOf('.')); if (baseName.lastIndexOf('/') > 0) baseName = baseName.substring(baseName.lastIndexOf('/') + 1); if (baseName.lastIndexOf('\\') > 0) baseName = baseName.substring(baseName.lastIndexOf('\\') + 1); List<DocumentObserver> observers = new ArrayList<DocumentObserver>(); for (OutputFormat outputFormat : outputFormats) { switch (outputFormat) { case AbbyyFineReader8: { Writer analysisFileWriter = null; String outputFileName = baseName + "_abbyy8.xml"; File analysisFile = new File(outputDir, outputFileName); analysisFile.delete(); analysisFileWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(analysisFile, true), "UTF8")); DocumentObserver observer = outputService.getAbbyyFineReader8Exporter(analysisFileWriter); observers.add(observer); break; } case HTML: { Writer htmlWriter = null; String htmlFileName = baseName + ".html"; File htmlFile = new File(outputDir, htmlFileName); htmlFile.delete(); htmlWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(htmlFile, true), "UTF8")); DocumentObserver textGetter = outputService.getTextGetter(htmlWriter, TextFormat.XHTML, this.getLexicon()); observers.add(textGetter); break; } case Jochre: { Writer analysisFileWriter = null; String outputFileName = baseName + ".xml"; File analysisFile = new File(outputDir, outputFileName); analysisFile.delete(); analysisFileWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(analysisFile, true), "UTF8")); DocumentObserver observer = outputService.getJochreXMLExporter(analysisFileWriter); observers.add(observer); break; } case JochrePageByPage: { DocumentObserver observer = outputService.getJochrePageByPageExporter(outputDir, baseName); observers.add(observer); break; } case UnknownWords: { if (this.getLexicon() != null) { File unknownWordFile = new File(outputDir, "unknownWords.txt"); unknownWordFile.delete(); Writer unknownWordWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(unknownWordFile, true), "UTF8")); UnknownWordListWriter unknownWordListWriter = new UnknownWordListWriter( unknownWordWriter); observers.add(unknownWordListWriter); } break; } } } this.doCommandAnalyse(pdfFile, letterModelFile, splitModelFile, mergeModelFile, wordChooser, observers, firstPage, lastPage); } else { throw new RuntimeException("Unknown command: " + command); } } catch (Exception e) { LogUtils.logError(LOG, e); throw e; } finally { PerformanceMonitor.end(); } LOG.debug("#### finished #####"); }
From source file:com.maxl.java.aips2sqlite.RealExpertInfo.java
/** * Extracts package info from Swissmedic package Excel file *//*from w w w . ja v a 2 s. c o m*/ private void extractPackageInfo() { try { long startTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.print("- Processing packages xlsx... "); // Load Swissmedic xls file FileInputStream packages_file = new FileInputStream(Constants.FILE_PACKAGES_XLSX); // Get workbook instance for XLSX file (XSSF = Horrible SpreadSheet Format) XSSFWorkbook packages_workbook = new XSSFWorkbook(packages_file); // Get first sheet from workbook XSSFSheet packages_sheet = packages_workbook.getSheetAt(0); /* if (SHOW_LOGS) System.out.print("- Processing packages xls... "); // Load Swissmedic xls file FileInputStream packages_file = new FileInputStream(FILE_PACKAGES_XLS); // Get workbook instance for XLS file (HSSF = Horrible SpreadSheet Format) HSSFWorkbook packages_workbook = new HSSFWorkbook(packages_file); // Get first sheet from workbook HSSFSheet packages_sheet = packages_workbook.getSheetAt(0); */ // Iterate through all rows of first sheet Iterator<Row> rowIterator = packages_sheet.iterator(); int num_rows = 0; while (rowIterator.hasNext()) { Row row = rowIterator.next(); if (num_rows > 5) { String swissmedic_no5 = ""; // SwissmedicNo5 registration number (5 digits) String sequence_name = ""; String package_id = ""; String swissmedic_no8 = ""; // SwissmedicNo8 = SwissmedicNo5 + Package id (8 digits) String heilmittel_code = ""; String package_size = ""; String package_unit = ""; String swissmedic_cat = ""; String application_area = ""; String public_price = ""; String exfactory_price = ""; String therapeutic_index = ""; String withdrawn_str = ""; String speciality_str = ""; String plimitation_str = ""; String add_info_str = ""; // Contains additional information separated by ; String ean_code_str = ""; String pharma_code_str = ""; // 0: Zulassungsnummer, 1: Dosisstrkenummer, 2: Prparatebezeichnung, 3: Zulassunginhaberin, 4: Heilmittelcode, 5: IT-Nummer, 6: ATC-Code // 7: Erstzulassung Prparat, 8: Zulassungsdatum Sequenz, 9: Gltigkeitsdatum, 10: Packungscode, 11: Packungsgrsse // 12: Einheit, 13: Abgabekategorie Packung, 14: Abgabekategorie Dosisstrke, 15: Abgabekategorie Prparat, // 16: Wirkstoff, 17: Zusammensetzung, 18: Anwendungsgebiet Prparat, 19: Anwendungsgebiet Dosisstrke, 20: Gentechnisch hergestellte Wirkstoffe // 21: Kategorie bei Insulinen, 22: Betubungsmittelhaltigen Prparaten // @cybermax: 15.10.2013 - work around for Excel cells of type "Special" (cell0 and cell10) if (row.getCell(0) != null) swissmedic_no5 = String.format("%05d", (int) (row.getCell(0).getNumericCellValue())); // Swissmedic registration number (5 digits) if (row.getCell(2) != null) sequence_name = ExcelOps.getCellValue(row.getCell(2)); // Sequence name if (row.getCell(4) != null) heilmittel_code = ExcelOps.getCellValue(row.getCell(4)); // Heilmittelcode if (row.getCell(11) != null) package_size = ExcelOps.getCellValue(row.getCell(11)); // Packungsgrsse if (row.getCell(12) != null) package_unit = ExcelOps.getCellValue(row.getCell(12)); // Einheit if (row.getCell(13) != null) swissmedic_cat = ExcelOps.getCellValue(row.getCell(13)); // Abgabekategorie Packung if (row.getCell(18) != null) application_area = ExcelOps.getCellValue(row.getCell(18)); // Anwendungsgebiet Prparat if (row.getCell(10) != null) { package_id = String.format("%03d", (int) (row.getCell(10).getNumericCellValue())); // Verpackungs ID swissmedic_no8 = swissmedic_no5 + package_id; // Fill in row ArrayList<String> pack = new ArrayList<String>(); pack.add(swissmedic_no5); // 0 pack.add(sequence_name); // 1 pack.add(heilmittel_code); // 2 pack.add(package_size); // 3 pack.add(package_unit); // 4 pack.add(swissmedic_cat); // 5 if (!application_area.isEmpty()) pack.add(application_area + " (Swissmedic);"); // 6 = swissmedic + bag else pack.add(""); pack.add(public_price); // 7 pack.add(exfactory_price); // 8 pack.add(therapeutic_index);// 9 // By default the meds are "ausser Handel" if (CmlOptions.DB_LANGUAGE.equals("de")) withdrawn_str = "a.H."; // ausser Handel else if (CmlOptions.DB_LANGUAGE.equals("fr")) withdrawn_str = "p.c."; // pack.add(withdrawn_str); // 10 pack.add(speciality_str); // 11 pack.add(plimitation_str); // 12 pack.add(add_info_str); // 13 // 22.03.2014: EAN-13 barcodes - initialization - check digit is missing! ean_code_str = "7680" + swissmedic_no8; pack.add(ean_code_str); // 14 pack.add(pharma_code_str); // 15 m_package_info.put(swissmedic_no8, pack); } } num_rows++; } long stopTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) { System.out.println( (m_package_info.size() + 1) + " packages in " + (stopTime - startTime) / 1000.0f + " sec"); } startTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.print("- Processing atc classes xls... "); if (CmlOptions.DB_LANGUAGE.equals("de")) { /* // Load ATC classes xls file FileInputStream atc_classes_file = new FileInputStream(Constants.FILE_ATC_CLASSES_XLS); // Get workbook instance for XLS file (HSSF = Horrible SpreadSheet Format) HSSFWorkbook atc_classes_workbook = new HSSFWorkbook(atc_classes_file); // Get first sheet from workbook // HSSFSheet atc_classes_sheet = atc_classes_workbook.getSheetAt(1); // --> 2013 file HSSFSheet atc_classes_sheet = atc_classes_workbook.getSheetAt(0); // --> 2014 file // Iterate through all rows of first sheet rowIterator = atc_classes_sheet.iterator(); num_rows = 0; while (rowIterator.hasNext()) { Row row = rowIterator.next(); if (num_rows>2) { String atc_code = ""; String atc_class = ""; if (row.getCell(0)!=null) { atc_code = row.getCell(0).getStringCellValue().replaceAll("\\s", ""); } if (row.getCell(2)!=null) { atc_class = row.getCell(2).getStringCellValue(); } // Build a full map atc code to atc class if (atc_code.length()>0) { m_atc_map.put(atc_code, atc_class); } } num_rows++; } */ CSVReader reader = new CSVReader( new InputStreamReader(new FileInputStream(Constants.FILE_EPHA_ATC_CODES_CSV), "UTF-8")); List<String[]> myEntries = reader.readAll(); num_rows = myEntries.size(); for (String[] s : myEntries) { if (s.length > 2) { String atc_code = s[0]; String atc_class = s[1]; m_atc_map.put(atc_code, atc_class); } } reader.close(); } else if (CmlOptions.DB_LANGUAGE.equals("fr")) { // Load ATC classes xls file FileInputStream atc_classes_file = new FileInputStream(Constants.FILE_WHO_ATC_CLASSES_XLS); // Get workbook instance for XLS file (HSSF = Horrible SpreadSheet Format) HSSFWorkbook atc_classes_workbook = new HSSFWorkbook(atc_classes_file); // Get first sheet from workbook HSSFSheet atc_classes_sheet = atc_classes_workbook.getSheetAt(0); // --> 2014 file // Iterate through all rows of first sheet rowIterator = atc_classes_sheet.iterator(); num_rows = 0; while (rowIterator.hasNext()) { Row row = rowIterator.next(); if (num_rows > 0) { String atc_code = ""; String atc_class = ""; if (row.getCell(1) != null) { atc_code = row.getCell(1).getStringCellValue(); if (atc_code.length() > 0) { // Extract L5 and below if (atc_code.length() < 6 && row.getCell(2) != null) { atc_class = row.getCell(2).getStringCellValue(); // Build a full map atc code to atc class m_atc_map.put(atc_code, atc_class); // Extract L7 } else if (atc_code.length() == 7 && row.getCell(4) != null) { atc_class = row.getCell(4).getStringCellValue(); m_atc_map.put(atc_code, atc_class); } } } } num_rows++; } // Load multilingual ATC classes txt file, replace English with French String atc_classes_multi = FileOps.readFromFile(Constants.FILE_ATC_MULTI_LINGUAL_TXT); // Loop through all lines Scanner scanner = new Scanner(atc_classes_multi); while (scanner.hasNextLine()) { String line = scanner.nextLine(); List<String> atc_class = Arrays.asList(line.split(": ")); String atc_code = atc_class.get(0); String[] atc_classes_str = atc_class.get(1).split(";"); String atc_class_french = atc_classes_str[1].trim(); // Replaces atc code... m_atc_map.put(atc_code, atc_class_french); } scanner.close(); } stopTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.println( (m_atc_map.size() + 1) + " classes in " + (stopTime - startTime) / 1000.0f + " sec"); // Load Refdata xml file File refdata_xml_file = new File(Constants.FILE_REFDATA_PHARMA_XML); FileInputStream refdata_fis = new FileInputStream(refdata_xml_file); startTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.println("- Unmarshalling Refdatabase for " + CmlOptions.DB_LANGUAGE + "... "); JAXBContext context = JAXBContext.newInstance(Refdata.class); Unmarshaller um = context.createUnmarshaller(); Refdata refdataPharma = (Refdata) um.unmarshal(refdata_fis); List<Refdata.ITEM> pharma_list = refdataPharma.getItem(); String smno8; for (Refdata.ITEM pharma : pharma_list) { String ean_code = pharma.getGtin(); String pharma_code = pharma.getPhar(); if (ean_code.length() == 13) { smno8 = ean_code.substring(4, 12); // Extract pharma corresponding to swissmedicno8 (source: swissmedic package file) ArrayList<String> pi_row = m_package_info.get(smno8); // Replace sequence_name if (pi_row != null) { // Prparatname + galenische Form if (CmlOptions.DB_LANGUAGE.equals("de")) pi_row.set(1, pharma.getNameDE()); else if (CmlOptions.DB_LANGUAGE.equals("fr")) pi_row.set(1, pharma.getNameFR()); // If med is in refdata file, then it is "in Handel!!" ;) pi_row.set(10, ""); // By default this is set to a.H. or p.C. // 22.03.2014: EAN-13 barcodes - replace with refdata if package exists pi_row.set(14, ean_code); // Pharma code pi_row.set(15, pharma_code); } else { if (CmlOptions.SHOW_ERRORS) { if (pharma.getATYPE().equals("PHARMA")) System.err.println( ">> Does not exist in BAG xls: " + smno8 + " (" + pharma.getNameDE() + ")"); } } } else if (ean_code.length() < 13) { if (CmlOptions.SHOW_ERRORS) System.err.println(">> EAN code too short: " + ean_code + ": " + pharma.getNameDE()); } else if (ean_code.length() > 13) { if (CmlOptions.SHOW_ERRORS) System.err.println(">> EAN code too long: " + ean_code + ": " + pharma.getNameDE()); } } stopTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.println(pharma_list.size() + " medis in " + (stopTime - startTime) / 1000.0f + " sec"); // Load BAG xml file File bag_xml_file = new File(Constants.FILE_PREPARATIONS_XML); FileInputStream fis_bag = new FileInputStream(bag_xml_file); startTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.println("- Processing preparations xml... "); context = JAXBContext.newInstance(Preparations.class); um = context.createUnmarshaller(); Preparations prepInfos = (Preparations) um.unmarshal(fis_bag); List<Preparations.Preparation> prep_list = prepInfos.getPreparations(); int num_preparations = 0; for (Preparations.Preparation prep : prep_list) { String swissmedicno5_str = prep.getSwissmedicNo5(); if (swissmedicno5_str != null) { String orggencode_str = ""; // "O", "G" or empty -> "" String flagSB20_str = ""; // "Y" -> 20% or "N" -> 10% if (prep.getOrgGenCode() != null) orggencode_str = prep.getOrgGenCode(); if (prep.getFlagSB20() != null) { flagSB20_str = prep.getFlagSB20(); if (flagSB20_str.equals("Y")) { if (CmlOptions.DB_LANGUAGE.equals("de")) flagSB20_str = "SB 20%"; else if (CmlOptions.DB_LANGUAGE.equals("fr")) flagSB20_str = "QP 20%"; } else if (flagSB20_str.equals("N")) { if (CmlOptions.DB_LANGUAGE.equals("de")) flagSB20_str = "SB 10%"; else if (CmlOptions.DB_LANGUAGE.equals("fr")) flagSB20_str = "QP 10%"; } else flagSB20_str = ""; } m_add_info_map.put(swissmedicno5_str, orggencode_str + ";" + flagSB20_str); } List<Preparation.Packs> packs_list = prep.getPacks(); for (Preparation.Packs packs : packs_list) { // Extract codes for therapeutic index / classification String bag_application = ""; String therapeutic_code = ""; List<Preparations.Preparation.ItCodes> itcode_list = prep.getItCodes(); for (Preparations.Preparation.ItCodes itc : itcode_list) { List<Preparations.Preparation.ItCodes.ItCode> code_list = itc.getItCode(); int index = 0; for (Preparations.Preparation.ItCodes.ItCode code : code_list) { if (index == 0) { if (CmlOptions.DB_LANGUAGE.equals("de")) therapeutic_code = code.getDescriptionDe(); else if (CmlOptions.DB_LANGUAGE.equals("fr")) therapeutic_code = code.getDescriptionFr(); } else { if (CmlOptions.DB_LANGUAGE.equals("de")) bag_application = code.getDescriptionDe(); else if (CmlOptions.DB_LANGUAGE.equals("fr")) bag_application = code.getDescriptionFr(); } index++; } } // Generate new package info List<Preparation.Packs.Pack> pack_list = packs.getPack(); for (Preparation.Packs.Pack pack : pack_list) { // Get SwissmedicNo8 and used it as a key to extract all the relevant package info String swissMedicNo8 = pack.getSwissmedicNo8(); ArrayList<String> pi_row = null; if (swissMedicNo8 != null) pi_row = m_package_info.get(swissMedicNo8); // Preparation also in BAG xml file (we have a price) if (pi_row != null) { // Update Swissmedic catory if necessary ("N->A", Y->"A+") if (pack.getFlagNarcosis().equals("Y")) pi_row.set(5, pi_row.get(5) + "+"); // Extract point limitations List<Preparations.Preparation.Packs.Pack.PointLimitations> point_limits = pack .getPointLimitations(); for (Preparations.Preparation.Packs.Pack.PointLimitations limits : point_limits) { List<Preparations.Preparation.Packs.Pack.PointLimitations.PointLimitation> plimits_list = limits .getPointLimitation(); if (plimits_list.size() > 0) if (plimits_list.get(0) != null) pi_row.set(12, ", LIM" + plimits_list.get(0).getPoints() + ""); } // Extract exfactory and public prices List<Preparations.Preparation.Packs.Pack.Prices> price_list = pack.getPrices(); for (Preparations.Preparation.Packs.Pack.Prices price : price_list) { List<Preparations.Preparation.Packs.Pack.Prices.PublicPrice> public_price = price .getPublicPrice(); List<Preparations.Preparation.Packs.Pack.Prices.ExFactoryPrice> exfactory_price = price .getExFactoryPrice(); if (exfactory_price.size() > 0) { try { float f = Float.valueOf(exfactory_price.get(0).getPrice()); String ep = String.format("%.2f", f); pi_row.set(8, "CHF " + ep); } catch (NumberFormatException e) { if (CmlOptions.SHOW_ERRORS) System.err.println("Number format exception (exfactory price): " + swissMedicNo8 + " (" + public_price.size() + ")"); } } if (public_price.size() > 0) { try { float f = Float.valueOf(public_price.get(0).getPrice()); String pp = String.format("%.2f", f); pi_row.set(7, "CHF " + pp); if (CmlOptions.DB_LANGUAGE.equals("de")) pi_row.set(11, ", SL"); else if (CmlOptions.DB_LANGUAGE.equals("fr")) pi_row.set(11, ", LS"); } catch (NullPointerException e) { if (CmlOptions.SHOW_ERRORS) System.err.println("Null pointer exception (public price): " + swissMedicNo8 + " (" + public_price.size() + ")"); } catch (NumberFormatException e) { if (CmlOptions.SHOW_ERRORS) System.err.println("Number format exception (public price): " + swissMedicNo8 + " (" + public_price.size() + ")"); } } // Add application area and therapeutic code if (!bag_application.isEmpty()) pi_row.set(6, pi_row.get(6) + bag_application + " (BAG)"); pi_row.set(9, therapeutic_code); } } } } num_preparations++; } stopTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.println( num_preparations + " preparations in " + (stopTime - startTime) / 1000.0f + " sec"); // Loop through all SwissmedicNo8 numbers /* for (Map.Entry<String, ArrayList<String>> entry : package_info.entrySet()) { String swissmedicno8 = entry.getKey(); ArrayList<String> pi_row = entry.getValue(); } */ } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (JAXBException e) { e.printStackTrace(); } }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.por.PORFileReader.java
private File decodeHeader(BufferedInputStream stream) throws IOException { File tempPORfile = null;//from w ww.j ava 2s . c om if (stream == null) { throw new IllegalArgumentException("file == null!"); } byte[] headerByes = new byte[POR_HEADER_SIZE]; if (stream.markSupported()) { stream.mark(1000); } int nbytes = stream.read(headerByes, 0, POR_HEADER_SIZE); //printHexDump(headerByes, "hex dump of the byte-array"); if (nbytes == 0) { throw new IOException("decodeHeader: reading failure"); } else if (nbytes < 491) { // Size test: by defnition, it must have at least // 491-byte header, i.e., the file size less than this threshold // is not a POR file dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("file is not spss-por type"); } // rewind the current reading position back to the beginning if (stream.markSupported()) { stream.reset(); } // line-terminating characters are usually one or two by defnition // however, a POR file saved by a genuine SPSS for Windows // had a three-character line terminator, i.e., failed to remove the // original file's one-character terminator when it was opened, and // saved it with the default two-character terminator without // removing original terminators. So we have to expect such a rare // case // // terminator // windows [0D0A]=> [1310] = [CR/LF] // unix [0A] => [10] // mac [0D] => [13] // 3char [0D0D0A]=> [131310] spss for windows rel 15 // // terminating characters should be found at the following // column positions[counting from 0]: // unix case: [0A] : [80], [161], [242], [323], [404], [485] // windows case: [0D0A] : [81], [163], [245], [327], [409], [491] // : [0D0D0A] : [82], [165], [248], [331], [414], [495] // convert b into a ByteBuffer ByteBuffer buff = ByteBuffer.wrap(headerByes); byte[] nlch = new byte[36]; int pos1; int pos2; int pos3; int ucase = 0; int wcase = 0; int mcase = 0; int three = 0; int nolines = 6; int nocols = 80; for (int i = 0; i < nolines; ++i) { int baseBias = nocols * (i + 1); // 1-char case pos1 = baseBias + i; buff.position(pos1); dbgLog.finer("\tposition(1)=" + buff.position()); int j = 6 * i; nlch[j] = buff.get(); if (nlch[j] == 10) { ucase++; } else if (nlch[j] == 13) { mcase++; } // 2-char case pos2 = baseBias + 2 * i; buff.position(pos2); dbgLog.finer("\tposition(2)=" + buff.position()); nlch[j + 1] = buff.get(); nlch[j + 2] = buff.get(); // 3-char case pos3 = baseBias + 3 * i; buff.position(pos3); dbgLog.finer("\tposition(3)=" + buff.position()); nlch[j + 3] = buff.get(); nlch[j + 4] = buff.get(); nlch[j + 5] = buff.get(); dbgLog.finer(i + "-th iteration position =" + nlch[j] + "\t" + nlch[j + 1] + "\t" + nlch[j + 2]); dbgLog.finer(i + "-th iteration position =" + nlch[j + 3] + "\t" + nlch[j + 4] + "\t" + nlch[j + 5]); if ((nlch[j + 3] == 13) && (nlch[j + 4] == 13) && (nlch[j + 5] == 10)) { three++; } else if ((nlch[j + 1] == 13) && (nlch[j + 2] == 10)) { wcase++; } buff.rewind(); } boolean windowsNewLine = true; if (three == nolines) { windowsNewLine = false; // lineTerminator = "0D0D0A" } else if ((ucase == nolines) && (wcase < nolines)) { windowsNewLine = false; // lineTerminator = "0A" } else if ((ucase < nolines) && (wcase == nolines)) { windowsNewLine = true; //lineTerminator = "0D0A" } else if ((mcase == nolines) && (wcase < nolines)) { windowsNewLine = false; //lineTerminator = "0D" } buff.rewind(); int PORmarkPosition = POR_MARK_POSITION_DEFAULT; if (windowsNewLine) { PORmarkPosition = PORmarkPosition + 5; } else if (three == nolines) { PORmarkPosition = PORmarkPosition + 10; } byte[] pormark = new byte[8]; buff.position(PORmarkPosition); buff.get(pormark, 0, 8); String pormarks = new String(pormark); //dbgLog.fine("pormark =>" + pormarks + "<-"); dbgLog.fine( "pormark[hex: 53 50 53 53 50 4F 52 54 == SPSSPORT] =>" + new String(Hex.encodeHex(pormark)) + "<-"); if (pormarks.equals(POR_MARK)) { dbgLog.fine("POR ID toke test: Passed"); init(); smd.getFileInformation().put("mimeType", MIME_TYPE); smd.getFileInformation().put("fileFormat", MIME_TYPE); } else { dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("decodeHeader: POR ID token was not found"); } // save the POR file without new line characters FileOutputStream fileOutPOR = null; Writer fileWriter = null; // Scanner class can handle three-character line-terminator Scanner porScanner = null; try { tempPORfile = File.createTempFile("tempPORfile.", ".por"); fileOutPOR = new FileOutputStream(tempPORfile); fileWriter = new BufferedWriter(new OutputStreamWriter(fileOutPOR, "utf8")); porScanner = new Scanner(stream); // Because 64-bit and 32-bit machines decode POR's first 40-byte // sequence differently, the first 5 leader lines are skipped from // the new-line-stripped file int lineCounter = 0; while (porScanner.hasNextLine()) { lineCounter++; if (lineCounter <= 5) { String line = porScanner.nextLine().toString(); dbgLog.fine("line=" + lineCounter + ":" + line.length() + ":" + line); } else { fileWriter.write(porScanner.nextLine().toString()); } } } finally { try { if (fileWriter != null) { fileWriter.close(); } } catch (IOException ex) { ex.printStackTrace(); } if (porScanner != null) { porScanner.close(); } } return tempPORfile; }
From source file:com.joliciel.talismane.other.Extensions.java
/** * To be called just before running the Talismane command, to * prepare anything specifically required for extensions to function correctly. * @param config/*from w ww . jav a 2 s. c om*/ * @param talismane */ public void prepareCommand(TalismaneConfig config, Talismane talismane) { try { if (command == null) return; if (command.equals(ExtendedCommand.toStandoff)) { StandoffWriter standoffWriter = new StandoffWriter(); talismane.setParseConfigurationProcessor(standoffWriter); } else if (command.equals(ExtendedCommand.toStandoffSentences)) { InputStream inputStream = StandoffWriter.class.getResourceAsStream("standoffSentences.ftl"); Reader templateReader = new BufferedReader(new InputStreamReader(inputStream)); FreemarkerTemplateWriter templateWriter = new FreemarkerTemplateWriter(templateReader); talismane.setParseConfigurationProcessor(templateWriter); } else if (command.equals(ExtendedCommand.fromStandoff)) { Scanner scanner = new Scanner(config.getReader()); StandoffReader standoffReader = new StandoffReader(scanner); config.setParserCorpusReader(standoffReader); } else if (command.equals(ExtendedCommand.corpusStatistics)) { CorpusStatistics stats = new CorpusStatistics(); if (referenceStatsPath != null) { File referenceStatsFile = new File(referenceStatsPath); CorpusStatistics referenceStats = CorpusStatistics.loadFromFile(referenceStatsFile); stats.setReferenceWords(referenceStats.getWords()); stats.setReferenceLowercaseWords(referenceStats.getLowerCaseWords()); } File csvFile = new File(config.getOutDir(), config.getBaseName() + "_stats.csv"); csvFile.delete(); csvFile.createNewFile(); Writer csvFileWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8")); stats.setWriter(csvFileWriter); File serializationFile = new File(config.getOutDir(), config.getBaseName() + "_stats.zip"); serializationFile.delete(); stats.setSerializationFile(serializationFile); ParserRegexBasedCorpusReader corpusReader = (ParserRegexBasedCorpusReader) config .getParserCorpusReader(); corpusReader.setPredictTransitions(false); talismane.setParseConfigurationProcessor(stats); } else if (command.equals(ExtendedCommand.modifyCorpus)) { if (corpusRulesPath == null) throw new TalismaneException("corpusRules is required for modifyCorpus command"); List<String> corpusRules = new ArrayList<String>(); File corpusRulesFile = new File(corpusRulesPath); Scanner scanner = new Scanner( new BufferedReader(new InputStreamReader(new FileInputStream(corpusRulesFile), "UTF-8"))); while (scanner.hasNextLine()) { corpusRules.add(scanner.nextLine()); } CorpusModifier corpusModifier = new CorpusModifier(config.getParseConfigurationProcessor(), corpusRules); talismane.setParseConfigurationProcessor(corpusModifier); } else if (command.equals(ExtendedCommand.projectify)) { CorpusProjectifier projectifier = new CorpusProjectifier(config.getParseConfigurationProcessor()); talismane.setParseConfigurationProcessor(projectifier); } else { throw new RuntimeException("Unknown command: " + command); } } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:componentes.DocumentoCxP.java
public void seleccionarArchivoXML(FileUploadEvent event) { try {/*from ww w .ja va 2s .c om*/ File tempFile = File.createTempFile(event.getFile().getFileName(), "tmp"); tempFile.deleteOnExit(); FileOutputStream out = new FileOutputStream(tempFile); IOUtils.copy(event.getFile().getInputstream(), out); out.close(); StringBuilder fileContents = new StringBuilder((int) tempFile.length()); Scanner scanner = new Scanner(tempFile); String lineSeparator = System.getProperty("line.separator"); try { while (scanner.hasNextLine()) { fileContents.append(scanner.nextLine()).append(lineSeparator); } } finally { scanner.close(); } //Validaciones String codDoc = utilitario.getValorEtiqueta(fileContents.toString(), "codDoc"); if (codDoc == null || codDoc.equals(TipoComprobanteEnum.FACTURA.getCodigo()) == false) { utilitario.agregarMensajeError("Error archivo XML", "Tipo de comprobante no vlido"); return; } String ide_geper = ser_proveedor .getIdeProveedor(utilitario.getValorEtiqueta(fileContents.toString(), "ruc")); if (ide_geper == null) { utilitario.agregarMensajeError("Error", "El proveedor " + utilitario.getValorEtiqueta(fileContents.toString(), "razonSocial") + " no existe en la base de datos"); return; } String autorizacio_cpcfa = utilitario.getValorEtiqueta(fileContents.toString(), "numeroAutorizacion"); if (ser_cuentas_cxp.isExisteDocumentoElectronico(autorizacio_cpcfa)) { utilitario.agregarMensajeError("Error", "La factura electronica seleccionada ya existe"); return; } com_tipo_documento.setValue(parametros.get("p_con_tipo_documento_factura")); cargarProveedores(); String numero_cpcfa = utilitario.getValorEtiqueta(fileContents.toString(), "estab") + "-" + utilitario.getValorEtiqueta(fileContents.toString(), "ptoEmi") + "-" + utilitario.getValorEtiqueta(fileContents.toString(), "secuencial"); tab_cab_documento.setValor("ide_geper", ide_geper); tab_cab_documento.setValor("autorizacio_cpcfa", autorizacio_cpcfa); tab_cab_documento.setValor("numero_cpcfa", numero_cpcfa); tab_cab_documento.setValor("fecha_emisi_cpcfa", utilitario.getFormatoFecha(utilitario.toDate( utilitario.getFormatoFecha( utilitario.getValorEtiqueta(fileContents.toString(), "fechaEmision")), "dd/MM/yyyy"))); tab_cab_documento.setValor("ide_cndfp", ser_cuentas_cxp .getFormaPago(utilitario.getValorEtiqueta(fileContents.toString(), "formaPago"))); //Detalles String cadenaDetalles = utilitario.getValorEtiqueta(fileContents.toString(), "detalles"); String strDetalles[] = cadenaDetalles.split("</detalle>"); tab_det_documento.limpiar(); for (String strDetalleActual : strDetalles) { tab_det_documento.insertar(); tab_det_documento.setValor("cantidad_cpdfa", utilitario.getValorEtiqueta(strDetalleActual, "cantidad")); tab_det_documento.setValor("observacion_cpdfa", utilitario.getValorEtiqueta(strDetalleActual, "descripcion")); tab_det_documento.setValor("precio_cpdfa", utilitario.getValorEtiqueta(strDetalleActual, "precioUnitario")); tab_det_documento.setValor("valor_cpdfa", utilitario.getValorEtiqueta(strDetalleActual, "precioTotalSinImpuesto")); String codigoPorcentaje = utilitario.getValorEtiqueta(strDetalleActual, "codigoPorcentaje"); if (codigoPorcentaje.equals(TipoImpuestoIvaEnum.IVA_VENTA_0.getCodigo())) { //NO IVA tab_det_documento.setValor("iva_inarti_cpdfa", "-1"); } else if (codigoPorcentaje.equals(TipoImpuestoIvaEnum.IVA_NO_OBJETO.getCodigo())) { tab_det_documento.setValor("iva_inarti_cpdfa", "0"); } else { tab_det_documento.setValor("iva_inarti_cpdfa", "1"); } } calcularTotalDocumento(); dia_cxp_xml.cerrar(); utilitario.addUpdate( "tab_documenoCxP:0:tab_cab_documento,tab_documenoCxP:0:tab_det_documento,tab_documenoCxP:0:gri_pto"); } catch (Exception ex) { utilitario.crearError("Error al Leer Factura XML", "en el mtodo seleccionarArchivoXML()", ex); } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReader.java
private File decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeHeader(): start"); File tempPORfile = null;/*ww w. j a va2 s .c o m*/ if (stream == null) { throw new IllegalArgumentException("file == null!"); } byte[] headerByes = new byte[POR_HEADER_SIZE]; if (stream.markSupported()) { stream.mark(1000); } int nbytes = stream.read(headerByes, 0, POR_HEADER_SIZE); //printHexDump(headerByes, "hex dump of the byte-array"); if (nbytes == 0) { throw new IOException("decodeHeader: reading failure"); } else if (nbytes < 491) { // Size test: by defnition, it must have at least // 491-byte header, i.e., the file size less than this threshold // is not a POR file dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("file is not spss-por type"); } // rewind the current reading position back to the beginning if (stream.markSupported()) { stream.reset(); } // line-terminating characters are usually one or two by defnition // however, a POR file saved by a genuine SPSS for Windows // had a three-character line terminator, i.e., failed to remove the // original file's one-character terminator when it was opened, and // saved it with the default two-character terminator without // removing original terminators. So we have to expect such a rare // case // // terminator // windows [0D0A]=> [1310] = [CR/LF] // unix [0A] => [10] // mac [0D] => [13] // 3char [0D0D0A]=> [131310] spss for windows rel 15 // // terminating characters should be found at the following // column positions[counting from 0]: // unix case: [0A] : [80], [161], [242], [323], [404], [485] // windows case: [0D0A] : [81], [163], [245], [327], [409], [491] // : [0D0D0A] : [82], [165], [248], [331], [414], [495] // convert b into a ByteBuffer ByteBuffer buff = ByteBuffer.wrap(headerByes); byte[] nlch = new byte[36]; int pos1; int pos2; int pos3; int ucase = 0; int wcase = 0; int mcase = 0; int three = 0; int nolines = 6; int nocols = 80; for (int i = 0; i < nolines; ++i) { int baseBias = nocols * (i + 1); // 1-char case pos1 = baseBias + i; buff.position(pos1); dbgLog.finer("\tposition(1)=" + buff.position()); int j = 6 * i; nlch[j] = buff.get(); if (nlch[j] == 10) { ucase++; } else if (nlch[j] == 13) { mcase++; } // 2-char case pos2 = baseBias + 2 * i; buff.position(pos2); dbgLog.finer("\tposition(2)=" + buff.position()); nlch[j + 1] = buff.get(); nlch[j + 2] = buff.get(); // 3-char case pos3 = baseBias + 3 * i; buff.position(pos3); dbgLog.finer("\tposition(3)=" + buff.position()); nlch[j + 3] = buff.get(); nlch[j + 4] = buff.get(); nlch[j + 5] = buff.get(); dbgLog.finer(i + "-th iteration position =" + nlch[j] + "\t" + nlch[j + 1] + "\t" + nlch[j + 2]); dbgLog.finer(i + "-th iteration position =" + nlch[j + 3] + "\t" + nlch[j + 4] + "\t" + nlch[j + 5]); if ((nlch[j + 3] == 13) && (nlch[j + 4] == 13) && (nlch[j + 5] == 10)) { three++; } else if ((nlch[j + 1] == 13) && (nlch[j + 2] == 10)) { wcase++; } buff.rewind(); } boolean windowsNewLine = true; if (three == nolines) { windowsNewLine = false; // lineTerminator = "0D0D0A" } else if ((ucase == nolines) && (wcase < nolines)) { windowsNewLine = false; // lineTerminator = "0A" } else if ((ucase < nolines) && (wcase == nolines)) { windowsNewLine = true; //lineTerminator = "0D0A" } else if ((mcase == nolines) && (wcase < nolines)) { windowsNewLine = false; //lineTerminator = "0D" } buff.rewind(); int PORmarkPosition = POR_MARK_POSITION_DEFAULT; if (windowsNewLine) { PORmarkPosition = PORmarkPosition + 5; } else if (three == nolines) { PORmarkPosition = PORmarkPosition + 10; } byte[] pormark = new byte[8]; buff.position(PORmarkPosition); buff.get(pormark, 0, 8); String pormarks = new String(pormark); //dbgLog.fine("pormark =>" + pormarks + "<-"); dbgLog.fine( "pormark[hex: 53 50 53 53 50 4F 52 54 == SPSSPORT] =>" + new String(Hex.encodeHex(pormark)) + "<-"); if (pormarks.equals(POR_MARK)) { dbgLog.fine("POR ID toke test: Passed"); init(); dataTable.setOriginalFileFormat(MIME_TYPE); dataTable.setUnf("UNF:6:NOTCALCULATED"); } else { dbgLog.fine("this file is NOT spss-por type"); throw new IllegalArgumentException("decodeHeader: POR ID token was not found"); } // save the POR file without new line characters FileOutputStream fileOutPOR = null; Writer fileWriter = null; // Scanner class can handle three-character line-terminator Scanner porScanner = null; try { tempPORfile = File.createTempFile("tempPORfile.", ".por"); fileOutPOR = new FileOutputStream(tempPORfile); fileWriter = new BufferedWriter(new OutputStreamWriter(fileOutPOR, "utf8")); porScanner = new Scanner(stream); // Because 64-bit and 32-bit machines decode POR's first 40-byte // sequence differently, the first 5 leader lines are skipped from // the new-line-stripped file int lineCounter = 0; while (porScanner.hasNextLine()) { lineCounter++; if (lineCounter <= 5) { String line = porScanner.nextLine(); dbgLog.fine("line=" + lineCounter + ":" + line.length() + ":" + line); } else { fileWriter.write(porScanner.nextLine()); } } } finally { try { if (fileWriter != null) { fileWriter.close(); } } catch (IOException ex) { ex.printStackTrace(); } if (porScanner != null) { porScanner.close(); } } return tempPORfile; }
From source file:org.fhaes.neofhchart.svg.FireChartSVG.java
/** * The constructor builds the DOM of the SVG. * /*from www . ja v a2 s. c o m*/ * @param f */ public FireChartSVG(AbstractFireHistoryReader f) { // Initialize the builder objects compositePlotEB = new CompositePlotElementBuilder(this); legendEB = new LegendElementBuilder(this); percentScarredPlotEB = new PercentScarredPlotElementBuilder(this); sampleRecorderPlotEB = new SampleRecorderPlotElementBuilder(this); seriesEB = new SeriesElementBuilder(this); timeAxisEB = new TimeAxisElementBuilder(this); // Assign number for message passing from ECMAscript chartNum = chartCounter; chartCounter++; if (chart_map == null) { chart_map = new HashMap<Integer, FireChartSVG>(); } chart_map.put(chartNum, this); reader = f; ArrayList<FHSeriesSVG> seriesToAdd = FireChartUtil.seriesListToSeriesSVGList(f.getSeriesList()); if (!seriesSVGList.isEmpty()) { seriesSVGList.clear(); } for (int i = 0; i < seriesToAdd.size(); i++) { try { FHSeries currentSeries = seriesToAdd.get(i); // Add the default category entry if the current series has no defined entries (this is necessary for category groupings) if (currentSeries.getCategoryEntries().isEmpty()) { currentSeries.getCategoryEntries() .add(new FHCategoryEntry(currentSeries.getTitle(), "default", "default")); } seriesSVGList.add(new FHSeriesSVG(seriesToAdd.get(i), i)); } catch (Exception e) { e.printStackTrace(); } } Element svgRoot = doc.getDocumentElement(); // Set up the scripts for Java / ECMAScript interop Element script = doc.createElementNS(svgNS, "script"); script.setAttributeNS(null, "type", "text/ecmascript"); try { // File script_file = new File("./script.js"); ClassLoader cl = org.fhaes.neofhchart.svg.FireChartSVG.class.getClassLoader(); Scanner scanner = new Scanner(cl.getResourceAsStream("script.js")); String script_string = ""; while (scanner.hasNextLine()) { script_string += scanner.nextLine(); } script_string += ("var chart_num = " + chartNum + ";"); Text script_text = doc.createTextNode(script_string); script.appendChild(script_text); scanner.close(); } catch (Exception e) { e.printStackTrace(); } svgRoot.appendChild(script); // The padding_grouper is used to add in some padding around the chart as a whole Element padding_grouper = doc.createElementNS(svgNS, "g"); padding_grouper.setAttributeNS(null, "id", "padding_g"); padding_grouper.setAttributeNS(null, "transform", "translate (" + chartXOffset + ",20)"); svgRoot.appendChild(padding_grouper); // Build grouper to hold annotation elements Element annote_g = doc.createElementNS(svgNS, "g"); annote_g.setAttributeNS(null, "id", "annote_g"); padding_grouper.appendChild(annote_g); // Build chart title Element chart_title_g = doc.createElementNS(svgNS, "g"); chart_title_g.setAttributeNS(null, "id", "chart_title_g"); padding_grouper.appendChild(chart_title_g); // Build the time axis Element time_axis_g = doc.createElementNS(svgNS, "g"); time_axis_g.setAttributeNS(null, "id", "time_axis_g"); padding_grouper.appendChild(time_axis_g); // Build index plot Element index_plot_g = doc.createElementNS(svgNS, "g"); index_plot_g.setAttributeNS(null, "id", "index_plot_g"); padding_grouper.appendChild(index_plot_g); // Build chronology plot Element chrono_plot_g = doc.createElementNS(svgNS, "g"); chrono_plot_g.setAttributeNS(null, "id", "chrono_plot_g"); padding_grouper.appendChild(chrono_plot_g); // Build composite plot Element comp_plot_g = doc.createElementNS(svgNS, "g"); comp_plot_g.setAttributeNS(null, "id", "comp_plot_g"); padding_grouper.appendChild(comp_plot_g); // Build legend Element legend_g = doc.createElementNS(svgNS, "g"); legend_g.setAttributeNS(null, "id", "legend_g"); padding_grouper.appendChild(legend_g); // Finish up the initialization buildElements(); positionSeriesLines(); positionChartGroupersAndDrawTimeAxis(); sortSeriesAccordingToPreference(); }
From source file:com.maxl.java.amikodesk.AMiKoDesk.java
static void sTitle() { List<String> m = new ArrayList<String>(); if (!m_curr_uistate.isComparisonMode()) { med_id.clear();/*from w ww . j ava2 s .c om*/ Pattern p_red = Pattern.compile(".*O]"); Pattern p_green = Pattern.compile(".*G]"); if (med_search.size() < BigCellNumber && m_curr_uistate.isSearchMode()) { for (int i = 0; i < med_search.size(); ++i) { Medication ms = med_search.get(i); String pack_info_str = ""; Scanner pack_str_scanner = new Scanner(ms.getPackInfo()); while (pack_str_scanner.hasNextLine()) { String pack_str_line = pack_str_scanner.nextLine(); Matcher m_red = p_red.matcher(pack_str_line); Matcher m_green = p_green.matcher(pack_str_line); if (m_red.find()) pack_info_str += "<font color=red>" + pack_str_line + "</font><br>"; else if (m_green.find()) pack_info_str += "<font color=green>" + pack_str_line + "</font><br>"; else pack_info_str += "<font color=gray>" + pack_str_line + "</font><br>"; } pack_str_scanner.close(); m.add("<html><b>" + ms.getTitle() + "</b><br><font size=-1>" + pack_info_str + "</font></html>"); med_id.add(ms.getId()); } } else if (!m_curr_uistate.isSearchMode()) { for (int i = 0; i < med_search.size(); ++i) { Medication ms = med_search.get(i); m.add("<html><body style='width: 1024px;'><b>" + ms.getTitle() + "</b></html>"); med_id.add(ms.getId()); } } } else { list_of_articles.clear(); for (int i = 0; i < rose_search.size(); ++i) { Article as = rose_search.get(i); list_of_articles.add(as); m.add("<html><body style='width: 1024px;'><b>" + as.getPackTitle() + "</b><br>" + "<font color=gray size=-1>Lager: " + as.getItemsOnStock() + " (CHF " + as.getCleanExfactoryPrice() + ")" + "</font></html>"); } } m_list_titles.update(m); }