List of usage examples for java.io BufferedInputStream available
public synchronized int available() throws IOException
From source file:org.apache.carbondata.sdk.file.ImageTest.java
@Test public void testWriteWithByteArrayDataType() throws IOException, InvalidLoadOptionException, InterruptedException { String imagePath = "./src/test/resources/image/carbondatalogo.jpg"; int num = 1;/* w ww . j a v a2s .c om*/ int rows = 10; String path = "./target/binary"; try { FileUtils.deleteDirectory(new File(path)); } catch (IOException e) { e.printStackTrace(); } Field[] fields = new Field[5]; fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); fields[2] = new Field("image1", DataTypes.BINARY); fields[3] = new Field("image2", DataTypes.BINARY); fields[4] = new Field("image3", DataTypes.BINARY); byte[] originBinary = null; // read and write image data for (int j = 0; j < num; j++) { CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)) .writtenBy("SDKS3Example").withPageSizeInMb(1).build(); for (int i = 0; i < rows; i++) { // read image and encode to Hex BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath)); originBinary = new byte[bis.available()]; while ((bis.read(originBinary)) != -1) { } // write data writer.write(new Object[] { "robot" + (i % 10), i, originBinary, originBinary, originBinary }); bis.close(); } writer.close(); } CarbonReader reader = CarbonReader.builder(path, "_temp").build(); System.out.println("\nData:"); int i = 0; while (i < 20 && reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); byte[] outputBinary = (byte[]) row[1]; byte[] outputBinary2 = (byte[]) row[2]; byte[] outputBinary3 = (byte[]) row[3]; System.out.println(row[0] + " " + row[1] + " image1 size:" + outputBinary.length + " image2 size:" + outputBinary2.length + " image3 size:" + outputBinary3.length); for (int k = 0; k < 3; k++) { byte[] originBinaryTemp = (byte[]) row[1 + k]; // validate output binary data and origin binary data assert (originBinaryTemp.length == outputBinary.length); for (int j = 0; j < originBinaryTemp.length; j++) { assert (originBinaryTemp[j] == outputBinary[j]); } // save image, user can compare the save image and original image String destString = "./target/binary/image" + k + "_" + i + ".jpg"; BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString)); bos.write(originBinaryTemp); bos.close(); } i++; } System.out.println("\nFinished"); reader.close(); }
From source file:org.apache.carbondata.sdk.file.ImageTest.java
public void testWriteTwoImageColumn() throws Exception { String imagePath = "./src/test/resources/image/vocForSegmentationClass"; String path = "./target/vocForSegmentationClass"; int num = 1;/*from w ww. j a v a2 s . c o m*/ Field[] fields = new Field[4]; fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); fields[2] = new Field("rawImage", DataTypes.BINARY); fields[3] = new Field("segmentationClass", DataTypes.BINARY); byte[] originBinary = null; byte[] originBinary2 = null; Object[] files = listFiles(imagePath, ".jpg").toArray(); // read and write image data for (int j = 0; j < num; j++) { CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)) .writtenBy("SDKS3Example").withPageSizeInMb(1).build(); for (int i = 0; i < files.length; i++) { // read image and encode to Hex String filePath = (String) files[i]; BufferedInputStream bis = new BufferedInputStream(new FileInputStream(filePath)); originBinary = new byte[bis.available()]; while ((bis.read(originBinary)) != -1) { } BufferedInputStream bis2 = new BufferedInputStream( new FileInputStream(filePath.replace(".jpg", ".png"))); originBinary2 = new byte[bis2.available()]; while ((bis2.read(originBinary2)) != -1) { } // write data writer.write(new Object[] { "robot" + (i % 10), i, originBinary, originBinary2 }); bis.close(); bis2.close(); } writer.close(); } CarbonReader reader = CarbonReader.builder(path, "_temp").build(); System.out.println("\nData:"); int i = 0; while (i < 20 && reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); byte[] outputBinary = (byte[]) row[1]; byte[] outputBinary2 = (byte[]) row[2]; System.out.println(row[0] + " " + row[3] + " image1 size:" + outputBinary.length + " image2 size:" + outputBinary2.length); for (int k = 0; k < 2; k++) { byte[] originBinaryTemp = (byte[]) row[1 + k]; // save image, user can compare the save image and original image String destString = null; if (k == 0) { destString = "./target/vocForSegmentationClass/image" + k + "_" + i + ".jpg"; } else { destString = "./target/vocForSegmentationClass/image" + k + "_" + i + ".png"; } BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString)); bos.write(originBinaryTemp); bos.close(); } i++; } System.out.println("\nFinished"); reader.close(); }
From source file:org.apache.carbondata.sdk.file.ImageTest.java
@Test public void testBinaryWithManyImages() throws IOException, InvalidLoadOptionException, InterruptedException { int num = 1;/*from w w w . j a v a 2 s . c o m*/ String path = "./target/flowers"; Field[] fields = new Field[5]; fields[0] = new Field("binaryId", DataTypes.INT); fields[1] = new Field("binaryName", DataTypes.STRING); fields[2] = new Field("binary", "Binary"); fields[3] = new Field("labelName", DataTypes.STRING); fields[4] = new Field("labelContent", DataTypes.STRING); String imageFolder = "./src/test/resources/image/flowers"; byte[] originBinary = null; // read and write image data for (int j = 0; j < num; j++) { CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)) .writtenBy("SDKS3Example").withPageSizeInMb(1).build(); File file = new File(imageFolder); File[] files = file.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name == null) { return false; } return name.endsWith(".jpg"); } }); if (null != files) { for (int i = 0; i < files.length; i++) { // read image and encode to Hex BufferedInputStream bis = new BufferedInputStream(new FileInputStream(files[i])); char[] hexValue = null; originBinary = new byte[bis.available()]; while ((bis.read(originBinary)) != -1) { hexValue = Hex.encodeHex(originBinary); } String txtFileName = files[i].getCanonicalPath().split(".jpg")[0] + ".txt"; BufferedInputStream txtBis = new BufferedInputStream(new FileInputStream(txtFileName)); String txtValue = null; byte[] txtBinary = null; txtBinary = new byte[txtBis.available()]; while ((txtBis.read(txtBinary)) != -1) { txtValue = new String(txtBinary, "UTF-8"); } // write data System.out.println(files[i].getCanonicalPath()); writer.write(new String[] { String.valueOf(i), files[i].getCanonicalPath(), String.valueOf(hexValue), txtFileName, txtValue }); bis.close(); } } writer.close(); } CarbonReader reader = CarbonReader.builder(path).build(); System.out.println("\nData:"); int i = 0; while (i < 20 && reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); byte[] outputBinary = (byte[]) row[1]; System.out.println(row[0] + " " + row[2] + " image size:" + outputBinary.length); // save image, user can compare the save image and original image String destString = "./target/flowers/image" + i + ".jpg"; BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString)); bos.write(outputBinary); bos.close(); i++; } System.out.println("\nFinished"); reader.close(); }
From source file:org.apache.carbondata.sdk.file.ImageTest.java
@Test public void testBinaryWithOrWithoutFilter() throws IOException, InvalidLoadOptionException, InterruptedException, DecoderException { String imagePath = "./src/test/resources/image/carbondatalogo.jpg"; int num = 1;//from w w w .j a v a 2 s . c o m int rows = 1; String path = "./target/binary"; try { FileUtils.deleteDirectory(new File(path)); } catch (IOException e) { e.printStackTrace(); } Field[] fields = new Field[3]; fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); fields[2] = new Field("image", DataTypes.BINARY); byte[] originBinary = null; // read and write image data for (int j = 0; j < num; j++) { CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)) .writtenBy("SDKS3Example").withPageSizeInMb(1).build(); for (int i = 0; i < rows; i++) { // read image and encode to Hex BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath)); char[] hexValue = null; originBinary = new byte[bis.available()]; while ((bis.read(originBinary)) != -1) { hexValue = Hex.encodeHex(originBinary); } // write data writer.write(new String[] { "robot" + (i % 10), String.valueOf(i), String.valueOf(hexValue) }); bis.close(); } writer.close(); } // Read data with filter EqualToExpression equalToExpression = new EqualToExpression(new ColumnExpression("name", DataTypes.STRING), new LiteralExpression("robot0", DataTypes.STRING)); CarbonReader reader = CarbonReader.builder(path, "_temp").filter(equalToExpression).build(); System.out.println("\nData:"); int i = 0; while (i < 20 && reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); byte[] outputBinary = Hex.decodeHex(new String((byte[]) row[1]).toCharArray()); System.out.println(row[0] + " " + row[2] + " image size:" + outputBinary.length); // validate output binary data and origin binary data assert (originBinary.length == outputBinary.length); for (int j = 0; j < originBinary.length; j++) { assert (originBinary[j] == outputBinary[j]); } String value = new String(outputBinary); Assert.assertTrue(value.startsWith("PNG")); // save image, user can compare the save image and original image String destString = "./target/binary/image" + i + ".jpg"; BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString)); bos.write(outputBinary); bos.close(); i++; } System.out.println("\nFinished"); reader.close(); CarbonReader reader2 = CarbonReader.builder(path, "_temp").build(); System.out.println("\nData:"); i = 0; while (i < 20 && reader2.hasNext()) { Object[] row = (Object[]) reader2.readNextRow(); byte[] outputBinary = Hex.decodeHex(new String((byte[]) row[1]).toCharArray()); System.out.println(row[0] + " " + row[2] + " image size:" + outputBinary.length); // validate output binary data and origin binary data assert (originBinary.length == outputBinary.length); for (int j = 0; j < originBinary.length; j++) { assert (originBinary[j] == outputBinary[j]); } // save image, user can compare the save image and original image String destString = "./target/binary/image" + i + ".jpg"; BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString)); bos.write(outputBinary); bos.close(); i++; } reader2.close(); try { FileUtils.deleteDirectory(new File(path)); } catch (IOException e) { e.printStackTrace(); } System.out.println("\nFinished"); }
From source file:org.wso2.carbon.connector.integration.test.amazonsqs.AmazonsqsConnectorIntegrationTest.java
private String getFileContent(String path) throws IOException { String fileContent;//from www .j av a 2s . c om BufferedInputStream bfist = new BufferedInputStream(new FileInputStream(path)); byte[] buf = new byte[bfist.available()]; bfist.read(buf); fileContent = new String(buf); if (bfist != null) { bfist.close(); } return fileContent; }
From source file:eionet.meta.exports.ods.Ods.java
/** * Zips file./*from w w w .jav a 2 s. c om*/ * * @param fileToZip * file to zip (full path) * @param fileName * file name * @throws java.lang.Exception * if operation fails. */ private void zip(String fileToZip, String fileName) throws Exception { // get source file File src = new File(workingFolderPath + ODS_FILE_NAME); ZipFile zipFile = new ZipFile(src); // create temp file for output File tempDst = new File(workingFolderPath + ODS_FILE_NAME + ".zip"); ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(tempDst)); // iterate on each entry in zip file Enumeration<? extends ZipEntry> entries = zipFile.entries(); while (entries.hasMoreElements()) { ZipEntry zipEntry = entries.nextElement(); BufferedInputStream bis; if (StringUtils.equals(fileName, zipEntry.getName())) { bis = new BufferedInputStream(new FileInputStream(new File(fileToZip))); } else { bis = new BufferedInputStream(zipFile.getInputStream(zipEntry)); } ZipEntry ze = new ZipEntry(zipEntry.getName()); zos.putNextEntry(ze); while (bis.available() > 0) { zos.write(bis.read()); } zos.closeEntry(); bis.close(); } zos.finish(); zos.close(); zipFile.close(); // rename file src.delete(); tempDst.renameTo(src); }
From source file:com.civprod.writerstoolbox.OpenNLP.training.SentenceDetectorTrainer.java
private void cmdTrainSentenceDetectorActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainSentenceDetectorActionPerformed final SentenceDetectorTrainer tempThis = this; new Thread(() -> { textTestResults.setText(""); Charset charset = Charset.forName("UTF-8"); //read other models SentenceDetector stdDetector = null; try {/*from w w w.j a va2 s. co m*/ stdDetector = OpenNLPUtils.createSentenceDetector(); } catch (IOException ex) { } List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-sent.train"); File testFile = new File("en-sent.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); java.io.BufferedOutputStream trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedOutputStream( new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedOutputStream testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(testFile)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { int read = curTrainingFileReader.read(); testFileWriter.write(read); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create SentenceDetectorFactory part of the training context SentenceDetectorFactory mySentenceDetectorFactory = new SentenceDetectorFactory("EN", cbUseTokenEnd.isSelected(), mAbbreviationDictionary, txtEosChars.getText().toCharArray()); ObjectStream<String> trainingLineStream = null; SentenceModel train = null; try { trainingLineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(trainingLineStream); train = SentenceDetectorME.train("EN", sampleStream, mySentenceDetectorFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } trainingLineStream = null; if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(testingLineStream); SentenceDetectorME testDetector = new SentenceDetectorME(train); SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); if (stdDetector != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); sampleStream = new SentenceSampleStream(testingLineStream); SentenceDetectorEvaluator stdEvaluator = new SentenceDetectorEvaluator(stdDetector); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); } textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create SentenceDetectorFactory part of the training context SentenceDetectorFactory mySentenceDetectorFactory = new SentenceDetectorFactory("EN", cbUseTokenEnd.isSelected(), mAbbreviationDictionary, txtEosChars.getText().toCharArray()); //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(lineStream); this.createdObject = SentenceDetectorME.train("EN", sampleStream, mySentenceDetectorFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-fiction-sent.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }
From source file:com.civprod.writerstoolbox.OpenNLP.training.TokenizerTrainer.java
private void cmdTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainActionPerformed final TokenizerTrainer tempThis = this; new Thread(() -> { textTestResults.setText(""); Charset charset = Charset.forName("UTF-8"); //create TokenizerFactory part of the training context String alphaNumericRegex = txtAlphaNumericPattern.getText(); alphaNumericRegex = alphaNumericRegex.trim(); if (alphaNumericRegex.isEmpty()) { alphaNumericRegex = "^[A-Za-z0-9]+$"; }/*from w w w.j a v a 2 s .c o m*/ Pattern alphaNumericPattern = Pattern.compile(alphaNumericRegex); TokenizerFactory myTokenizerFactory = new TokenizerFactory("EN", mAbbreviationDictionary, this.cbUseAlphaNumericOptimization.isSelected(), alphaNumericPattern); Tokenizer stdTokenizer = null; try { stdTokenizer = OpenNLPUtils.createTokenizer(); } catch (IOException ex) { Logger.getLogger(TokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); } List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-token.train"); File testFile = new File("en-token.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); java.io.BufferedOutputStream trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedOutputStream( new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedOutputStream testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(testFile)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { int read = curTrainingFileReader.read(); testFileWriter.write(read); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> trainingLineStream = null; TokenizerModel train = null; try { trainingLineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(trainingLineStream); train = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(testingLineStream); TokenizerME testDetector = new TokenizerME(train); TokenizerEvaluator evaluator = new TokenizerEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); if (stdTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator stdEvaluator = new TokenizerEvaluator(stdTokenizer); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); } textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(lineStream); this.createdObject = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-fiction-token.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }
From source file:com.civprod.writerstoolbox.OpenNLP.training.ThoughtAndSpeechTrainer.java
private void cmdTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainActionPerformed final ThoughtAndSpeechTrainer tempThis = this; new Thread(() -> { textTestResults.setText(""); Charset charset = Charset.forName("UTF-8"); //create TokenizerFactory part of the training context ThoughtAndSpeechParserFactory myTokenizerFactory = new ThoughtAndSpeechParserFactory("EN", this.saidWordsDictionary, this.thoughtWordsDictionary); /*ThoughtAndSpeechParser stdTokenizer = null; try {/* w w w .j a v a 2 s . c om*/ stdTokenizer = OpenNLPUtils.createTokenizer(); } catch (IOException ex) { Logger.getLogger(TokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); }*/ List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-ThoughtAndSpeech.train"); File testFile = new File("en-ThoughtAndSpeech.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); java.io.BufferedOutputStream trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedOutputStream( new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedOutputStream testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(testFile)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { int read = curTrainingFileReader.read(); testFileWriter.write(read); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> trainingLineStream = null; ThoughtAndSpeechModel train = null; try { trainingLineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(trainingFile), charset); ObjectStream<ThoughtAndSpeechSample> sampleStream = null; try { sampleStream = new ThoughtAndSpeechSampleStream(trainingLineStream); train = ThoughtAndSpeechParserME.train("en", sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(testFile), charset); ObjectStream<ThoughtAndSpeechSample> sampleStream = null; try { sampleStream = new ThoughtAndSpeechSampleStream(testingLineStream); ThoughtAndSpeechParserME testDetector = new ThoughtAndSpeechParserME(train); ThoughtAndSpeechEvaluator evaluator = new ThoughtAndSpeechEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); /*if (stdTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator stdEvaluator = new TokenizerEvaluator(stdTokenizer); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); }*/ textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(trainingFile), charset); ObjectStream<ThoughtAndSpeechSample> sampleStream = null; try { sampleStream = new ThoughtAndSpeechSampleStream(lineStream); this.createdObject = ThoughtAndSpeechParserME.train("en", sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-ThoughtAndSpeech-token.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }
From source file:org.frc836.database.DBSyncService.java
private boolean loadTimestamp() { try {//from w w w. j a v a 2 s. c om BufferedInputStream bis = new BufferedInputStream(openFileInput(FILENAME)); byte[] buffer = new byte[bis.available()]; bis.read(buffer, 0, buffer.length); lastSync = new Timestamp(Long.valueOf(new String(buffer))); return true; } catch (Exception e) { if (lastSync == null) lastSync = new Timestamp(0); return false; } }