Java tutorial
/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Author: Avijit Gupta (mailforavijit@gmail.com) package contrail.correct; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.commons.io.FileUtils; public class TestFlashExtension { //5 records - added a \ at 27:212, 22:239 & 27:226 because of presence of " private String joinedFastq = "@SRR022868.8762453/1 TAATTTAACTTTGTCTGATAATTGTACGCTTAAATCAACGTCTTTCGGTAAAATATCAGATTGTGCTGGAATTAATAACACATCATCAACCGTTAATGATT II0IIII8IIIIIIIII=I,IIIIIIIIIII,1+II4)HIBIIII?IIE6*(1I1;I,E&I+I9I5/IA&&3=+#I)%+(5*2&&/0%+#\"/$(%$*0%(+ " + "@SRR022868.8762453/2 AGGCTGTTTTTAATGTGGGAAAGTAAATTTGCAAAAGAATCATTAACGTTTGATGATGTGTTATTAATTCCAGCACAATCTGATATTTTACCGAAAGACGT CIIIIIIIIIII>IIIIII&,5II+(*IIIID:*&5I4.II*II.)CI5AI;5IC+0I%1(6&26&-,.)+#2.*3)%-.0-$'%&'&)%)&,$##&#+'+ " + "@SRR022868.8762473/1 TTTTTATTTAAATTAATCATATAATTGCGAGGAGAATATTATGGATTTCGTTAATAATGATACAAGACAAATTGCTAAAAACTTATTAGGTGTCAAAGTGA IIIIIIII=@=FIIIIIIIIIIIIIIIII3II%I>/I,II=III2IIIIII:<1F/7I<I5B<4-I*9;&-A)I?@,$*)')/020+(;/#@+,,#%6%?' " + "@SRR022868.8762473/2 TTCCACGATGTAGCCTGTATACGTTTGAGTGGTATCCTGATAAATCACTTTGACACCTAATAAGTTTTTAGCAATTTGTCTTGTATCATTATTAACGAAAT IIIIIIIIIIIIIIIIGIIII4I5GII.I)II1&9F/II&:*I&I62(744I723.3=>/90&@5?1,<&1-,'.,/,&-2*+.&&+%/)(--$#(,%##* " + "@SRR022868.8762487/1 AGTAATATTGTTGCGTCTGGTATTGTATTAAGTGTAGTTGTTATTTTTGTCAAAGATCAATCAGATTTATCATTGTATGTATTTACTATTGCTATTGTGAC 8II05IIIIIIIIIIIIIIIFEIII82II/+III?+I?IIFI(IIIII22I6+*I+=0*%F/&7&C:?,?-*9@=3&6@1&<.1%*@%/32&,#./-('#% " + "@SRR022868.8762487/2 ACGAACGAAACAATTGCCAGACGTGTATCCAATTAACCGAAACAAAGCTAATGTATCGTTTTAAATAGATAAACAAAGGTAATTGGTTTAATACCGTCACA 59IABIII6EIIAIIIII;I;EI9I:;IID-&II+9C9I%*(9+-&..;&()5'&6-:'0,/*&*((*%+%#%&&#$*&$&\")%&*%&&#%(''\")$*#&# " + "@SRR022868.8762490/1 TTTATATGAAGAAATGAACCATTTATTGTATTGTAGTTCAGCTGGTCATGAGCCTGGATATATTTATCGCGCTGAAAAAGAAGACTTTGAAGAAATTTCAG III4IHIII=IIIIIIIIIIHIII>IEIIIIIIH9IDII1IIIIB:A.II$I-:CHF(8*.+834(1/@1<4+.''%#'/%%2'#+)*.#%2#%&(')(&) " + "@SRR022868.8762490/2 ATTAAATCATCAAGGTATATTGGAATGTCTTGTTGTTGATATCGTGTTTGTGAACTGATTCCTAACACTCTACCTCTAACTGAAATTTCTTCACATTCTTC ,IIDI:IIIII8III+/I4IIIF,/I$IFHI97=IC6>)8&4.I6?47-1&2%(*2;&+,)(,&&*'&1-)#+%%+)$%))'$$$'%&)%'(%$%+%&$(& " + "@SRR022868.8762492/1 CCAGGATCAAACTCTCCATAAAAATTATGATGTTTGATTAGCTCATAAGTACTAAATAATGTTTGTAACTTATAGTTACGTTTTTTGGAATTAACGTTGAC II;IIBIII8>IIIIII7IDIA0IIIDII6IIIIII,II9IIII4I+3I80II+'.I*1II3<>8;)+4@<+I*F.A,0F.0443*&)&#$<#(-*%+,#' " + "@SRR022868.8762492/2 TGTCGGTAAGAAAAATGAACATTGAAAACTGAATGACAATATGTCAACGTTAATTCCAAAAAACGTAACTATAAGTTACAAACATTATTTAGTACTTATTT IIIIII0IHI?III49B&:852ID'4A;)56.%/B(7(%/)E.*@(+/+#0&)*/(,&&%+(,('$)((+%%$&$%+$(#&$$#$)$&*&&#$#'%&$&!# "; // Path of flash binary private String flashBinaryPath = ""; //remember to add spaces after the records - they are delimitors private String expectedOutput = "@SRR022868.8762453 TAATTTAACTTTGTCTGATAATTGTACGCTTAAATCAACGTCTTTCGGTAAAATATCAGATTGTGCTGGAATTAATAACACATCATCAAACGTTAATGATTCTTTTGCAAATTTACTTTCCCACATTAAAAACAGCCT II0IIII8IIIIIIIII=I,IIIIIIIIIII,1+II4+HIBIIII?IIE6*(1I1;I0E-I+I9I5/IA.,3=62I6(1(I02CI5;IA#IC).II*II.4I5&*:DIIII*(+II5,&IIIIII>IIIIIIIIIIIC " + "@SRR022868.8762473 TTTTTATTTAAATTAATCATATAATTGCGAGGAGAATATTATGGATTTCGTTAATAATGATACAAGACAAATTGCTAAAAACTTATTAGGTGTCAAAGTGATTTATCAGGATACCACTCAAACGTATACAGGCTACATCGTGGAA IIIIIIII=@=FIIIIIIIIIIIIIIIII3II%I>/I,II=III2IIIIII:<1F/7I<I5B<42I*9;,.A,I?@<,1?5@/090>=;/3@7I44762?I&I*:&II/F9&1II)I.IIG5I4IIIIGIIIIIIIIIIIIIIII " + "@SRR022868.8762490 TTTATATGAAGAAATGAACCATTTATTGTATTGTAGTTCAGCTGGTCATGAGCCTGGATATATTTATCGCGCTGAAAAAGAAGAATTTGAAGAAATTTCAGTTAGAGGTAGAGTGTTAGGAATCAGTTCACAAACACGATATCAACAACAAGACATTCCAATATACCTTGATGATTTAAT III4IHIII=IIIIIIIIIIHIII>IEIIIIIIH9IDII1IIIIB:A.II$I-:CHF(8*.+834(1/@1<4+.''%#'/(%2'#+#*.'%2&%'(')())%$)+%%+#)-1&'*&&,(),+&;2*(%2&1-74?6I.4&8)>6CI=79IHFI$I/,FIII4I/+III8IIIII:IDII, " + "@SRR022868.8762492 CCAGGATCAAACTCTCCATAAAAATTATGATGTTTGATTAGCTCATAAGTACTAAATAATGTTTGTAACTTATAGTTACGTTTTTTGGAATTAACGTTGACATATTGTCATTCAGTTTTCAATGTTCATTTTTCTTACCGACA II;IIBIII8>IIIIII7IDIA0IIIDII6IIIIII,II9II#!4I+3I80II+*.I*1II3<>8;++4@<+I+F.A,0F.0443*,)/*)<0(-/++@*.E)/%(7(B/%.65);A4'DI258:&B94III?IHI0IIIIII "; public void testMap() throws IOException { File temp = getTempDirectory(); File outputFile = new File(temp, "output"); File flashInput = new File(temp, "flashInput.avro"); runFlashTest(temp, flashInput, outputFile); File outputAvroFile = new File(outputFile, "part-00000.avro"); Schema schema = (new fastqrecord()).getSchema(); DatumReader<fastqrecord> datum_reader = new SpecificDatumReader<fastqrecord>(schema); DataFileReader<fastqrecord> reader = new DataFileReader<fastqrecord>(outputAvroFile, datum_reader); int numberOfFastqReads = 0; ArrayList<fastqrecord> output = new ArrayList<fastqrecord>(); while (reader.hasNext()) { fastqrecord record = reader.next(); output.add(record); numberOfFastqReads++; } HashMap<String, String> expectedHashMap = getExpectedHashMap(); assertEquals(expectedHashMap.size(), numberOfFastqReads); assertMapOutput(output, expectedHashMap); if (temp.exists()) { FileUtils.deleteDirectory(temp); } } public void testRun() throws IOException { File temp = getTempDirectory(); File outputFile = new File(temp, "output"); File outFile = new File(temp, "flashInput.avro"); runFlashTest(temp, outFile, outputFile); if (temp.exists()) { FileUtils.deleteDirectory(temp); } } private File getTempDirectory() { File temp = null; try { temp = File.createTempFile("temp", Long.toString(System.nanoTime())); } catch (IOException exception) { fail("Could not create temporary file. Exception:" + exception.getMessage()); } if (!(temp.delete())) { throw new RuntimeException("Could not delete temp file: " + temp.getAbsolutePath()); } if (!(temp.mkdir())) { throw new RuntimeException("Could not create temp directory: " + temp.getAbsolutePath()); } return temp; } private void runFlashTest(File tempDirectory, File flashInput, File outputPath) { writeDataToFile(flashInput); runApp(flashInput, outputPath); } private void runApp(File input, File output) { InvokeFlash flashInvoker = new InvokeFlash(); InvokeFlash.blockSize = 3; String[] args = { "--inputpath=" + input.toURI().toString(), "--outputpath=" + output.toURI().toString(), "--flash_binary=" + flashBinaryPath }; try { flashInvoker.run(args); } catch (Exception exception) { fail("Exception occured:" + exception.getMessage()); } } private void writeDataToFile(File outFile) { // Write the data to the file. Schema schema = (new MatePair()).getSchema(); DatumWriter<MatePair> datum_writer = new SpecificDatumWriter<MatePair>(schema); DataFileWriter<MatePair> writer = new DataFileWriter<MatePair>(datum_writer); StringTokenizer st = new StringTokenizer(joinedFastq, " "); try { writer.create(schema, outFile); while (st.hasMoreTokens()) { MatePair mateRecord = getMateRecord(st); writer.append(mateRecord); } writer.close(); } catch (IOException exception) { fail("There was a problem writing to an avro file. Exception:" + exception.getMessage()); } } private void assertMapOutput(ArrayList<fastqrecord> actualOutput, HashMap<String, String> expectedHashMap) { Iterator<fastqrecord> iterator = actualOutput.iterator(); while (iterator.hasNext()) { fastqrecord flashedRecord = iterator.next(); String id = flashedRecord.getId().toString(); String dna = flashedRecord.getRead().toString(); String qvalue = flashedRecord.getQvalue().toString(); String receivedValue = dna + " " + qvalue; assertEquals(expectedHashMap.get(id), receivedValue); } } /** * Reads next 6 records separated by spaces and creates a mateRecord from them * @param testStringToken * @return */ private MatePair getMateRecord(StringTokenizer testStringToken) { MatePair mateRecord = new MatePair(); mateRecord.setLeft(new fastqrecord()); mateRecord.setRight(new fastqrecord()); mateRecord.getLeft().setId(testStringToken.nextToken()); mateRecord.getLeft().setRead(testStringToken.nextToken()); mateRecord.getLeft().setQvalue(testStringToken.nextToken()); mateRecord.getRight().setId(testStringToken.nextToken()); mateRecord.getRight().setRead(testStringToken.nextToken()); mateRecord.getRight().setQvalue(testStringToken.nextToken()); return mateRecord; } private HashMap<String, String> getExpectedHashMap() { HashMap<String, String> expectedHashMap = new HashMap<String, String>(); StringTokenizer tokenizer = new StringTokenizer(expectedOutput, " "); while (tokenizer.hasMoreTokens()) { String seqId = tokenizer.nextToken(); String dna = tokenizer.nextToken(); String qvalue = tokenizer.nextToken(); String expectedString = dna + " " + qvalue; expectedHashMap.put(seqId, expectedString); } return expectedHashMap; } /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length != 1 || !args[0].contains("--flash_binary=")) { throw new IOException( "Specify --flash_binary parameter only\nArgument Example: --flash_binary=/path/to/flash/binary"); } TestFlashExtension tester = new TestFlashExtension(); tester.flashBinaryPath = args[0].substring(args[0].indexOf('=') + 1); if (tester.flashBinaryPath.trim().length() == 0) { throw new IOException( "Specify --flash_binary parameter only\nArgument Example: --flash_binary=/path/to/flash/binary"); } tester.testRun(); System.out.println("Execution Test PASSED"); tester.testMap(); System.out.println("Correctness Test PASSED"); } }