Example usage for java.util.regex Pattern split

List of usage examples for java.util.regex Pattern split

Introduction

In this page you can find the example usage for java.util.regex Pattern split.

Prototype

public String[] split(CharSequence input) 

Source Link

Document

Splits the given input sequence around matches of this pattern.

Usage

From source file:com.github.lindenb.jvarkit.tools.backlocate.BackLocate.java

private void loadkgXRefFromUri(String kgURI) throws IOException {

    LOG.info("loading " + kgURI);
    BufferedReader in = IOUtils.openURIForBufferedReading(kgURI);
    String line;//from   w w w.  j  a va2s .  c om
    Pattern tab = Pattern.compile("[\t]");
    while ((line = in.readLine()) != null) {
        if (line.isEmpty())
            continue;
        String tokens[] = tab.split(line);
        String kgId = tokens[0];
        if (!this.knwonGenes.containsKey(kgId))
            continue;
        String geneSymbol = tokens[4];
        Set<String> kglist = geneSymbol2kg.get(geneSymbol.toUpperCase());
        if (kglist == null) {
            kglist = new HashSet<String>();
            geneSymbol2kg.put(geneSymbol.toUpperCase(), kglist);
        }
        kglist.add(kgId);//kgID
    }
    in.close();
    LOG.info("kgxref:" + geneSymbol2kg.size());
}

From source file:com.mirth.connect.model.converters.ER7Serializer.java

public Map<String, String> getMetadataFromEncoded(String source) throws SerializerException {
    Map<String, String> metadata = new HashMap<String, String>();

    if (useStrictParser) {
        try {/* w w  w. ja v a 2 s .  co  m*/
            // XXX: This had a replaceAll("\n", "\r") before 1.7
            Message message = pipeParser.parse(source.trim());
            Terser terser = new Terser(message);
            metadata.put("version", message.getVersion());
            metadata.put("type", terser.get("/MSH-9-1") + "-" + terser.get("/MSH-9-2"));
            metadata.put("source", terser.get("/MSH-4-1"));
        } catch (Exception e) {
            new SerializerException(e);
        }

        return metadata;
    } else {
        // XXX: This had a replaceAll("\n", "\r") before 1.7
        source = source.trim();

        if ((source == null) || (source.length() < 3)) {
            logger.error("Unable to parse, message is null or too short: " + source);
            throw new SerializerException("Unable to parse, message is null or too short: " + source);
        } else if (source.substring(0, 3).equalsIgnoreCase("MSH")) {
            String segmentDelimeter = "\r";
            String fieldDelimeter = String.valueOf(source.charAt(3));
            String elementDelimeter = String.valueOf(source.charAt(4));
            String mshFields[] = source.split(segmentDelimeter)[0].split(Pattern.quote(fieldDelimeter));
            Pattern elementPattern = Pattern.compile(Pattern.quote(elementDelimeter));

            if (mshFields.length > 8) {
                // MSH.9
                String[] msh9 = elementPattern.split(mshFields[8]);
                // MSH.9.1
                String type = msh9[0];

                if (msh9.length > 1) {
                    // MSH.9.2
                    type += "-" + msh9[1];
                }

                metadata.put("type", type);
            } else {
                metadata.put("type", "Unknown");
            }

            if (mshFields.length > 3) {
                // MSH.4.1
                metadata.put("source", elementPattern.split(mshFields[3])[0]);
            } else {
                metadata.put("source", "");
            }

            if (mshFields.length > 11) {
                // MSH.12.1
                metadata.put("version", elementPattern.split(mshFields[11])[0]);
            } else {
                metadata.put("version", "");
            }
        } else {
            metadata.put("type", "Unknown");
            metadata.put("source", "Unknown");
            metadata.put("version", "Unknown");
        }

        return metadata;
    }
}

From source file:com.github.lindenb.jvarkit.tools.backlocate.BackLocate.java

private void loadKnownGenesFromUri(String kgURI) throws IOException {
    if (this.indexedFastaSequenceFile.getSequenceDictionary() == null) {
        throw new IOException(
                "Cannot get sequence dictionary for REF : " + getMessageBundle("picard.dictionary.needed"));
    }/*from   ww w.  jav a 2s  .  c o m*/

    LOG.info("loading genes");
    Set<String> unknown = new HashSet<String>();
    BufferedReader in = IOUtils.openURIForBufferedReading(kgURI);
    String line;
    Pattern tab = Pattern.compile("[\t]");
    while ((line = in.readLine()) != null) {
        if (line.isEmpty())
            continue;
        String tokens[] = tab.split(line);
        KnownGene g = new KnownGene(tokens);
        Interval rgn = new Interval(g.getContig(), g.getTxStart() + 1, g.getTxEnd());
        if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(rgn.getContig()) == null) {
            if (!unknown.contains(g.getContig())) {
                LOG.warn("The reference doesn't contain chromosome " + g.getContig());
                unknown.add(g.getContig());
            }
            continue;
        }

        this.knwonGenes.put(g.getName(), g);
    }
    in.close();
    LOG.info("genes:" + this.knwonGenes.size());
}

From source file:com.mirth.connect.plugins.datatypes.hl7v2.HL7v2AutoResponder.java

private Response generateACK(Status status, String hl7Message,
        HL7v2ResponseGenerationProperties hl7v2Properties) throws Exception {
    boolean errorOnly = false;
    boolean always = false;
    boolean successOnly = false;

    hl7Message = hl7Message.trim();/* w w  w  .j  a  v a 2s . com*/
    boolean isXML = StringUtils.isNotBlank(hl7Message) && hl7Message.charAt(0) == '<';

    String ACK = null;
    String statusMessage = null;
    String error = null;

    try {
        if (serializationProperties.isConvertLineBreaks() && !isXML) {
            hl7Message = StringUtil.convertLineBreaks(hl7Message, serializationSegmentDelimiter);
        }

        // Check if we have to look at MSH15     
        if (hl7v2Properties.isMsh15ACKAccept()) {
            // MSH15 Dictionary:
            // AL: Always
            // NE: Never
            // ER: Error / Reject condition
            // SU: Successful completion only

            String msh15 = "";

            // Check if the message is ER7 or XML
            if (isXML) { // XML form
                XPath xpath = XPathFactory.newInstance().newXPath();
                XPathExpression msh15Query = xpath.compile("//MSH.15/text()");
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
                DocumentBuilder builder = domFactory.newDocumentBuilder();
                Reader reader = new CharArrayReader(hl7Message.toCharArray());
                Document doc = builder.parse(new InputSource(reader));
                msh15 = msh15Query.evaluate(doc);
            } else { // ER7
                char fieldDelim = hl7Message.charAt(3); // Usually |
                char componentDelim = hl7Message.charAt(4); // Usually ^

                Pattern fieldPattern = Pattern.compile(Pattern.quote(String.valueOf(fieldDelim)));
                Pattern componentPattern = Pattern.compile(Pattern.quote(String.valueOf(componentDelim)));

                String mshString = StringUtils.split(hl7Message, serializationSegmentDelimiter)[0];
                String[] mshFields = fieldPattern.split(mshString);

                if (mshFields.length > 14) {
                    msh15 = componentPattern.split(mshFields[14])[0]; // MSH.15.1
                }
            }

            if (msh15 != null && !msh15.equals("")) {
                if (msh15.equalsIgnoreCase("AL")) {
                    always = true;
                } else if (msh15.equalsIgnoreCase("NE")) {
                    logger.debug("MSH15 is NE, Skipping ACK");
                    return null;
                } else if (msh15.equalsIgnoreCase("ER")) {
                    errorOnly = true;
                } else if (msh15.equalsIgnoreCase("SU")) {
                    successOnly = true;
                }
            }
        }

        String ackCode = "AA";
        String ackMessage = "";
        boolean nack = false;

        if (status == Status.ERROR) {
            if (successOnly) {
                // we only send an ACK on success
                return null;
            }
            ackCode = hl7v2Properties.getErrorACKCode();
            ackMessage = hl7v2Properties.getErrorACKMessage();
            nack = true;
        } else if (status == Status.FILTERED) {
            if (successOnly) {
                return null;
            }
            ackCode = hl7v2Properties.getRejectedACKCode();
            ackMessage = hl7v2Properties.getRejectedACKMessage();
            nack = true;
        } else {
            if (errorOnly) {
                return null;
            }
            ackCode = hl7v2Properties.getSuccessfulACKCode();
            ackMessage = hl7v2Properties.getSuccessfulACKMessage();
        }

        ACK = HL7v2ACKGenerator.generateAckResponse(hl7Message, isXML, ackCode, ackMessage,
                generationProperties.getDateFormat(), new String(), deserializationSegmentDelimiter);
        statusMessage = "HL7v2 " + (nack ? "N" : "") + "ACK successfully generated.";
        logger.debug("HL7v2 " + (nack ? "N" : "") + "ACK successfully generated: " + ACK);
    } catch (Exception e) {
        logger.warn("Error generating HL7v2 ACK.", e);
        throw new Exception("Error generating HL7v2 ACK.", e);
    }

    return new Response(status, ACK, statusMessage, error);
}

From source file:gov.va.vinci.v3nlp.negex.GenNegEx.java

public String negCheck(String sentenceString, String phraseString, List<String> ruleStrings,
        boolean negatePossible) throws Exception {

    Sorter s = new Sorter();
    String sToReturn = "";
    String sScope = "";
    List<String> sortedRules = new ArrayList<String>();

    String filler = "_";
    boolean negPoss = negatePossible;

    // Sort the rules by length in descending order.
    // Rules need to be sorted so the longest rule is always tried to match
    // first.//from   w w w. jav  a2  s  .c  o  m
    // Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
    // would match before longer legitimate negation rules.
    //

    // There is efficiency issue here. It is better if rules are sorted by the
    // calling program once and used without sorting in GennegEx.
    sortedRules = s.sortRules(ruleStrings);

    // Process the sentence and tag each matched negation
    // rule with correct negation rule tag.
    //
    // At the same time check for the phrase that we want to decide
    // the negation status for and
    // tag the phrase with [PHRASE] ... [PHRASE]
    // In both the negation rules and in the  phrase replace white space
    // with "filler" string. (This could cause problems if the sentences
    // we study has "filler" on their own.)

    // Sentence needs one character in the beginning and end to match.
    // We remove the extra characters after processing.
    String sentence = "." + sentenceString + ".";

    // Tag the phrases we want to detect for negation.
    // Should happen before rule detection.
    String phrase = phraseString;
    Pattern pph = null;
    try {
        pph = Pattern.compile(phrase.trim(), Pattern.CASE_INSENSITIVE);
    } catch (Exception e) {
        // IF There was an exception, escape the phrase for special regex characters. It is more
        // efficient to only escape if an error, as most phrases will work fine.
        logger.info("In Special processing... (" + phrase.trim() + ")");
        pph = Pattern.compile(escapeRegexCharacters(phrase.trim()), Pattern.CASE_INSENSITIVE);
    }
    Matcher mph = pph.matcher(sentence);

    while (mph.find() == true) {
        sentence = mph.replaceAll(" [PHRASE]" + mph.group().trim().replaceAll(" ", filler) + "[PHRASE]");
    }

    Iterator<String> iRule = sortedRules.iterator();
    while (iRule.hasNext()) {
        String rule = iRule.next();
        Pattern p = Pattern.compile("[\\t]+"); // Working.
        String[] ruleTokens = p.split(rule.trim());
        // Add the regular expression characters to tokens and asemble the rule again.
        String[] ruleMembers = ruleTokens[0].trim().split(" ");
        String rule2 = "";
        for (int i = 0; i <= ruleMembers.length - 1; i++) {
            if (!ruleMembers[i].equals("")) {
                if (ruleMembers.length == 1) {
                    rule2 = ruleMembers[i];
                } else {
                    rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
                }
            }
        }
        // Remove the last s+
        if (rule2.endsWith("\\s+")) {
            rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
        }

        rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";

        Pattern p2 = Pattern.compile(ruleTokens[0].trim());
        Matcher m = p2.matcher(sentence);

        while (m.find()) {
            String rpWith = ruleTokens[2].substring(2).trim();
            sentence = m.replaceAll(" " + rpWith + m.group().trim().replaceAll(" ", filler) + rpWith + " ");
        }
    }

    // Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
    // based of PREN, POST rules and if flag is set to true
    // then based on PREP and POSP, as well.

    // Because PRENEGATION [PREN} is checked first it takes precedent over
    // POSTNEGATION [POST].
    // Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
    // and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].

    Pattern pSpace = Pattern.compile("[\\s+]");
    String[] sentenceTokens = pSpace.split(sentence);
    StringBuilder sb = new StringBuilder();

    // Check for [PREN]
    for (int i = 0; i < sentenceTokens.length; i++) {
        sb.append(" " + sentenceTokens[i].trim());
        if (sentenceTokens[i].trim().startsWith("[PREN]") || sentenceTokens[i].trim().startsWith("[PRE_NEG]")) {

            for (int j = i + 1; j < sentenceTokens.length; j++) {
                if (sentenceTokens[j].trim().startsWith("[CONJ]")
                        || sentenceTokens[j].trim().startsWith("[PSEU]")
                        || sentenceTokens[j].trim().startsWith("[POST]")
                        || sentenceTokens[j].trim().startsWith("[PREP]")
                        || sentenceTokens[j].trim().startsWith("[POSP]")) {
                    break;
                }

                if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
                    sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
                }
            }
        }
    }

    sentence = sb.toString();
    pSpace = Pattern.compile("[\\s+]");
    sentenceTokens = pSpace.split(sentence);
    StringBuilder sb2 = new StringBuilder();

    // Check for [POST]
    for (int i = sentenceTokens.length - 1; i > 0; i--) {
        sb2.insert(0, sentenceTokens[i] + " ");
        if (sentenceTokens[i].trim().startsWith("[POST]")) {
            for (int j = i - 1; j > 0; j--) {
                if (sentenceTokens[j].trim().startsWith("[CONJ]")
                        || sentenceTokens[j].trim().startsWith("[PSEU]")
                        || sentenceTokens[j].trim().startsWith("[PRE_NEG]")
                        || sentenceTokens[j].trim().startsWith("[PREN]")
                        || sentenceTokens[j].trim().startsWith("[PREP]")
                        || sentenceTokens[j].trim().startsWith("[POSP]")) {
                    break;
                }

                if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
                    sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
                }
            }
        }
    }

    sentence = sb2.toString();

    // If POSSIBLE negation is detected as negation.
    // negatePossible being set to "true" then check for [PREP] and [POSP].
    if (negPoss == true) {
        pSpace = Pattern.compile("[\\s+]");
        sentenceTokens = pSpace.split(sentence);

        StringBuilder sb3 = new StringBuilder();

        // Check for [PREP]
        for (int i = 0; i < sentenceTokens.length; i++) {
            sb3.append(" " + sentenceTokens[i].trim());
            if (sentenceTokens[i].trim().startsWith("[PREP]")) {

                for (int j = i + 1; j < sentenceTokens.length; j++) {
                    if (sentenceTokens[j].trim().startsWith("[CONJ]")
                            || sentenceTokens[j].trim().startsWith("[PSEU]")
                            || sentenceTokens[j].trim().startsWith("[POST]")
                            || sentenceTokens[j].trim().startsWith("[PRE_NEG]")
                            || sentenceTokens[j].trim().startsWith("[PREN]")
                            || sentenceTokens[j].trim().startsWith("[POSP]")) {
                        break;
                    }

                    if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
                        sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
                    }
                }
            }
        }

        sentence = sb3.toString();
        pSpace = Pattern.compile("[\\s+]");
        sentenceTokens = pSpace.split(sentence);
        StringBuilder sb4 = new StringBuilder();

        // Check for [POSP]
        for (int i = sentenceTokens.length - 1; i > 0; i--) {
            sb4.insert(0, sentenceTokens[i] + " ");
            if (sentenceTokens[i].trim().startsWith("[POSP]")) {
                for (int j = i - 1; j > 0; j--) {
                    if (sentenceTokens[j].trim().startsWith("[CONJ]")
                            || sentenceTokens[j].trim().startsWith("[PSEU]")
                            || sentenceTokens[j].trim().startsWith("[PREN]")
                            || sentenceTokens[j].trim().startsWith("[PRE_NEG]")
                            || sentenceTokens[j].trim().startsWith("[PREP]")
                            || sentenceTokens[j].trim().startsWith("[POST]")) {
                        break;
                    }

                    if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
                        sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
                    }
                }
            }
        }

        sentence = sb4.toString();
    }

    // Remove the filler character we used.
    sentence = sentence.replaceAll(filler, " ");

    // Remove the extra periods at the beginning
    // and end of the sentence.
    sentence = sentence.substring(0, sentence.trim().lastIndexOf('.'));
    sentence = sentence.replaceFirst(".", "");

    // Get the scope of the negation for PREN and PREP
    if (sentence.contains("[PRE_NEG]") || sentence.contains("[PREN]") || sentence.contains("[PREP]")) {
        int startOffset = sentence.indexOf("[PREN]");
        if (startOffset == -1) {
            startOffset = sentence.indexOf("[PRE_NEG]");
        }
        if (startOffset == -1) {
            startOffset = sentence.indexOf("[PREP]");
        }

        int endOffset = sentence.indexOf("[CONJ]");
        if (endOffset == -1) {
            endOffset = sentence.indexOf("[PSEU]");
        }
        if (endOffset == -1) {
            endOffset = sentence.indexOf("[POST]");
        }
        if (endOffset == -1) {
            endOffset = sentence.indexOf("[POSP]");
        }
        if (endOffset == -1 || endOffset < startOffset) {
            endOffset = sentence.length() - 1;
        }
        sScope = sentence.substring(startOffset, endOffset + 1);
    }

    // Get the scope of the negation for POST and POSP
    if (sentence.contains("[POST]") || sentence.contains("[POSP]")) {
        int endOffset = sentence.lastIndexOf("[POST]");
        if (endOffset == -1) {
            endOffset = sentence.lastIndexOf("[POSP]");
        }

        int startOffset = sentence.lastIndexOf("[CONJ]");
        if (startOffset == -1) {
            startOffset = sentence.lastIndexOf("[PSEU]");
        }
        if (startOffset == -1) {
            startOffset = sentence.lastIndexOf("[PREN]");
        }
        if (startOffset == -1) {
            startOffset = sentence.lastIndexOf("[PRE_NEG]");
        }
        if (startOffset == -1) {
            startOffset = sentence.lastIndexOf("[PREP]");
        }
        if (startOffset == -1) {
            startOffset = 0;
        }
        sScope = sentence.substring(startOffset, endOffset);
    }

    // Classify to: negated/possible/affirmed
    if (sentence.contains("[NEGATED]")) {
        sentence = sentence + "\t" + "negated" + "\t" + sScope;
    } else if (sentence.contains("[POSSIBLE]")) {
        sentence = sentence + "\t" + "possible" + "\t" + sScope;
    } else {
        sentence = sentence + "\t" + "affirmed" + "\t" + sScope;
    }

    sToReturn = sentence;

    return sToReturn;
}

From source file:pivotal.au.se.gemfirexdweb.controller.QueryController.java

private String[] spiltQuery(String query) {
    Pattern pattern = Pattern.compile(";\\s", Pattern.MULTILINE);
    String[] splitQueryStr = pattern.split(query);

    logger.debug("split query = {" + Arrays.toString(splitQueryStr) + "}");
    return splitQueryStr;
}

From source file:fr.paris.lutece.plugins.profanityfilter.service.ProfanityFilter.java

@Override
public ProfanityResult checkStringCounter(String str, String strResourceType) {
    Counter counter = CounterHome.findByResourceTypeKey(strResourceType);
    Pattern p = Pattern.compile("\\W");

    if (counter == null) {
        Counter newCounter = new Counter();
        newCounter.setCounter(0);//from   ww w  .  ja v  a  2s.c  o  m
        newCounter.setResourceType(strResourceType);
        counter = CounterHome.create(newCounter);
    }

    ProfanityResult profResult = new ProfanityResult();
    String[] wordStr = null;
    Collection<Word> wordList = WordHome.getWordsList();

    if ((str != null) && StringUtils.isNotEmpty(str) && StringUtils.isNotBlank(str)) {
        wordStr = p.split(str);
    }

    boolean _isSwearWords = false;
    int number = 0;

    if (wordStr != null) {
        for (String word : wordStr) {
            if (containsReferenceTo(wordList, word)) {
                profResult.addWord(word);
                _isSwearWords = true;
                number++;
                counter.setCounter(counter.getCounter() + 1);
                CounterHome.update(counter);
                profResult.setCounterSwearWords(counter.getCounter());
            }
        }
    }

    profResult.setIsSwearWords(_isSwearWords);
    profResult.setNumberSwearWords(number);

    return profResult;
}

From source file:com.mozilla.socorro.hadoop.RawDumpSize.java

public int run(String[] args) throws Exception {
    if (args.length != 1) {
        return printUsage();
    }/*ww w.j a  v a2  s . c  o m*/

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;
        FileSystem hdfs = null;
        DescriptiveStatistics rawStats = new DescriptiveStatistics();
        long rawTotal = 0L;
        DescriptiveStatistics processedStats = new DescriptiveStatistics();
        long processedTotal = 0L;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            Pattern tabPattern = Pattern.compile("\t");
            for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) {
                if (!status.isDir()) {
                    BufferedReader reader = null;
                    try {
                        reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                        String line = null;
                        while ((line = reader.readLine()) != null) {
                            String[] splits = tabPattern.split(line);
                            int byteSize = Integer.parseInt(splits[2]);
                            if ("raw".equals(splits[1])) {
                                rawStats.addValue(byteSize);
                                rawTotal += byteSize;
                            } else if ("processed".equals(splits[1])) {
                                processedStats.addValue(byteSize);
                                processedTotal += byteSize;
                            }
                        }
                    } finally {
                        if (reader != null) {
                            reader.close();
                        }
                    }
                }
            }
        } finally {
            if (hdfs != null) {
                hdfs.close();
            }
        }

        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(),
                rawStats.getMax(), rawStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + rawTotal);
        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(),
                processedStats.getMax(), processedStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d),
                processedStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + processedTotal);
    }

    return rc;
}

From source file:com.github.lindenb.jvarkit.tools.misc.BamCmpCoverage.java

private void readBedFile(File bedFile) {
    if (this.intervals == null) {
        intervals = new IntervalTreeMap<Boolean>();
    }//w  w w.j a v  a 2  s .c om
    try {
        LOG.info("Reading " + bedFile);
        BufferedReader r = IOUtils.openFileForBufferedReading(bedFile);
        String line;
        Pattern tab = Pattern.compile("[\t]");
        while ((line = r.readLine()) != null) {
            if (line.startsWith("#") || line.isEmpty())
                continue;
            String tokens[] = tab.split(line);
            if (tokens.length < 3) {
                throw new IOException("Bad bed line in " + bedFile + " " + line);
            }
            Interval interval = new Interval(tokens[0], Integer.parseInt(tokens[1]) + 1,
                    Integer.parseInt(tokens[2]));
            intervals.put(interval, true);
        }
        CloserUtil.close(r);
    } catch (IOException err) {
        throw new RuntimeException(err);
    }
}

From source file:org.ala.hbase.NTripleDataLoader.java

/**
 * Reads a sorted tab delimited source line by line using subject to determine
 * a concept change.//from  w w  w . ja  va2  s.c o m
 * 
 * @param reader
 * @throws Exception
 */
public void load(Reader reader) throws Exception {
    Pattern p = Pattern.compile("\t");
    BufferedReader br = new BufferedReader(reader);
    String record = null;
    long start = System.currentTimeMillis();
    int lineNumber = 0;
    int successfulSync = 0;
    int failedSync = 0;

    final String documentId = "0"; //fake doc id
    final String infoSourceId = "0"; //fake info source id

    try {
        String currentSubject = null;
        List<Triple> triples = new ArrayList<Triple>();

        while ((record = br.readLine()) != null) {

            //split into subject, predicate, object ignoring any extra columns
            String[] tripleAsArray = p.split(record);
            if (tripleAsArray.length >= 3)
                continue;

            //create triple
            Triple triple = new Triple(tripleAsArray[0], tripleAsArray[1], tripleAsArray[2]);

            if (currentSubject == null) {
                currentSubject = triple.subject;
                triples.add(triple);
            } else if (triple.subject.equals(currentSubject)) {
                //if subject unchanged, add to list
                triples.add(triple);
            } else {

                Document doc = new Document();
                doc.setId(Integer.parseInt(documentId));
                doc.setInfoSourceId(Integer.parseInt(infoSourceId));
                //subject has changed - sync to hbase
                boolean success = taxonConceptDao.syncTriplesReturnSuccess(doc, triples,
                        new HashMap<String, String>(), false);
                if (success)
                    successfulSync++;
                else
                    failedSync++;

                triples = new ArrayList<Triple>();
                triples.add(triple);
                currentSubject = triple.subject;
            }
        }

        //sync the remaining batch
        if (!triples.isEmpty()) {
            Document doc = new Document();
            doc.setId(Integer.parseInt(documentId));
            doc.setInfoSourceId(Integer.parseInt(infoSourceId));
            boolean success = taxonConceptDao.syncTriplesReturnSuccess(doc, triples,
                    new HashMap<String, String>(), false);
            if (success)
                successfulSync++;
            else
                failedSync++;
        }

        long finish = System.currentTimeMillis();
        System.out.println("Loaded dbpedia data in: " + (((finish - start) / 1000) / 60) + " minutes.");
        System.out.println("Sync'd: " + successfulSync + ", Failed to sync:" + failedSync);

    } catch (Exception e) {
        System.err.println(lineNumber + " error on line");
        e.printStackTrace();
    }
}