Example usage for javax.xml.stream XMLInputFactory newInstance

Introduction

In this page you can find the example usage for javax.xml.stream XMLInputFactory newInstance.

Prototype

public static XMLInputFactory newInstance() throws FactoryConfigurationError

Source Link

Document

Creates a new instance of the factory in exactly the same manner as the #newFactory() method.

Usage

From source file:act.installer.pubchem.PubchemParser.java

/**
 * Initializes a PubchemParser.  Must be called before the PubchemParser can be used.
 * @throws XPathExpressionException/*from   w  ww .  ja v  a  2s  .  c o  m*/
 * @throws ParserConfigurationException
 */
public void init() throws ParserConfigurationException, JaxenException {
    // Would rather do this in its own block, but have to handle the XPath exception. :(
    for (PC_XPATHS x : PC_XPATHS.values()) {
        xpaths.put(x, x.compile());
    }

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    documentBuilder = factory.newDocumentBuilder();

    xmlInputFactory = XMLInputFactory.newInstance();

    /* Configure the XMLInputFactory to return event streams that coalesce consecutive character events.  Without this
     * we can end up with malformed names and InChIs, as XPath will only fetch the first text node if there are several
     * text children under one parent. */
    xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, ENABLE_XML_STREAM_TEXT_COALESCING);
    if ((Boolean) xmlInputFactory.getProperty(XMLInputFactory.IS_COALESCING)) {
        LOGGER.info("Successfully configured XML stream to coalesce character elements.");
    } else {
        LOGGER.error("Unable to configure XML stream to coalesce character elements.");
    }
}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

public MultiValueHashMap<String, String> getPageTitle2Redirects(InputStream sWikipediaDump)
        throws FileNotFoundException, XMLStreamException {
    // <text xml:space="preserve">#REDIRECT [[Autopoiesis]]</text>
    // <text xml:space="preserve">#REDIRECT:[[Hans Leo Haler]]</text>
    // <text xml:space="preserve">#redirect [[Weier Hai]]</text>
    // #weiterleitung
    // <page>
    // <title>Autopoiesis</title>

    Logger.getLogger(WikipediaDumpParser.class.getName()).info("will collect redirects from wikipedia dump...");

    MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueBalancedTreeMap<String, String>();

    String strCurrentTitle = "";
    XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();

    XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(sWikipediaDump, "Utf-8");
    int iTitlesRead = 0;
    while (xmlEventReader.hasNext()) {
        XMLEvent xmlEvent = xmlEventReader.nextEvent();

        if (!xmlEvent.isStartElement())
            continue;
        // wenn wir einen Title haben, dann merken wir uns den, falls wir ihn brauchen
        if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) {
            strCurrentTitle = readNextCharEventsText(xmlEventReader);

            iTitlesRead++;/*from   ww w . ja v  a2 s  . c  o m*/
            if (iTitlesRead % 200000 == 0)
                Logger.getLogger(WikipediaDumpParser.class.getName())
                        .info("read doc #" + StringUtils.beautifyNumber(iTitlesRead));

            continue;
        }

        if (!xmlEvent.asStartElement().getName().getLocalPart().equals("text"))
            continue;

        // jetzt haben wir ein text-tag. Wir schauen, ob jetzt ein redirect kommt
        // entweder kommt ein charEvent oder ein EndEvent. Leere Texte gibts wohl auch
        XMLEvent nextEvent = xmlEventReader.peek();

        if (!nextEvent.isCharacters())
            continue;

        String strCharEventData = readNextCharEventsText(xmlEventReader);
        if (strCharEventData == null)
            continue;

        strCharEventData = strCharEventData.trim();

        boolean bRedirect = false;

        if (strCharEventData.length() >= 9 && strCharEventData.substring(0, 9).equalsIgnoreCase("#redirect"))
            bRedirect = true;
        if (!bRedirect && strCharEventData.length() >= 8
                && strCharEventData.substring(0, 8).equalsIgnoreCase("redirect")
                && !strCharEventData.contains("\n"))
            bRedirect = true;
        if (!bRedirect && strCharEventData.length() >= 14
                && strCharEventData.substring(0, 14).equalsIgnoreCase("#weiterleitung"))
            bRedirect = true;
        if (!bRedirect && strCharEventData.length() >= 13
                && strCharEventData.substring(0, 13).equalsIgnoreCase("weiterleitung")
                && !strCharEventData.contains("\n"))
            bRedirect = true;

        if (!bRedirect)
            continue;

        // wir haben einen redirect - der wird in unsere Datenstruktur eingetragen
        int iStart = strCharEventData.indexOf("[[");
        int iEnd = strCharEventData.indexOf("]]");
        if (iStart < 0 || iEnd < 0)
            continue;
        if (iEnd <= iStart)
            continue;
        if ((iStart + 2) > strCharEventData.length() || iEnd > strCharEventData.length())
            continue;

        String strRedirectTarget = strCharEventData.substring(iStart + 2, iEnd).trim();
        hsPageTitle2Redirects.add(strRedirectTarget, strCurrentTitle);

        // if("Venceslav Konstantinov".equalsIgnoreCase(strCurrentTitle) || "Venceslav Konstantinov".equalsIgnoreCase(strRedirectTarget))
        // System.out.println("redirect found: (" + hsPageTitle2Redirects.keySize() + ") " + strCurrentTitle + " => '" + strRedirectTarget + "'");

    }

    Logger.getLogger(WikipediaDumpParser.class.getName())
            .info("Redirects found: " + StringUtils.beautifyNumber(hsPageTitle2Redirects.valueSize()));

    return hsPageTitle2Redirects;

}

From source file:edu.harvard.iq.safe.lockss.impl.LOCKSSDaemonStatusTableXmlStreamParser.java

/**
 *
 * @param stream/*from   w  ww  . j a v  a 2 s.  c  o m*/
 * @param encoding
 */
@Override
public void read(InputStream stream, String encoding) {
    // logger.setLevel(Level.FINE);
    // 1. create Input factory
    XMLInputFactory xmlif = XMLInputFactory.newInstance();
    xmlif.setProperty("javax.xml.stream.isCoalescing", java.lang.Boolean.TRUE);
    xmlif.setProperty("javax.xml.stream.isNamespaceAware", java.lang.Boolean.TRUE);

    long startTime = System.currentTimeMillis();

    int noAUs = 0;
    String aus = null;
    String currentTableId = null;
    String currentTableTitle = null;
    String currentTableKey = null;
    boolean hasErrorsColumn = false;
    String siAuId = null;
    XMLStreamReader xmlr = null;

    try {

        // create reader
        xmlr = xmlif.createXMLStreamReader(new BufferedInputStream(stream), encoding);

        String curElement = "";

        boolean isLastTagnameTable = false;
        String targetTagName = "row";
        String cellTagName = "columnname";
        boolean withinSummaryinfo = false;
        boolean withinColumndescriptor = false;
        boolean withinRow = false;
        boolean withinCell = false;
        boolean withinReference = false;
        boolean isCrawlStatusActive = false;
        boolean isCrawlStatusColumn = false;
        int valueTagCounter = 0;
        String currentColumnName = null;
        String currentCellValue = null;
        String currentCellKey = null;
        SummaryInfo si = null;

        List<String> rowData = null;
        Map<String, String> rowDataH = null;

        w1: while (xmlr.hasNext()) {
            int eventType = xmlr.next();
            switch (eventType) {
            case XMLStreamConstants.START_ELEMENT:
                curElement = xmlr.getLocalName(); // note: getName() ->
                // QName
                logger.log(Level.FINE, "--------- start tag = <{0}> ---------", curElement);
                // check the table name first
                if (curElement.equals("table")) {
                    isLastTagnameTable = true;
                } else if (curElement.equals("error")) {
                    isTargetPageValid = false;
                    break w1;
                }

                if (isLastTagnameTable) {
                    if (curElement.equals("name")) {
                        currentTableId = xmlr.getElementText();
                        logger.log(Level.FINE, "########### table Id = [{0}] ###########", currentTableId);
                        tableId = currentTableId;
                        if (belongsInclusionTableList.contains(currentTableId)) {
                            logger.log(Level.FINE, "!!!!! Table ({0}) belongs to the target list !!!!!",
                                    currentTableId);

                        } else {
                            logger.log(Level.FINE,
                                    "XXXXXXXXXXX Table ({0}) does not belong to the target list XXXXXXXXXXX",
                                    currentTableId);
                            break w1;
                        }
                    } else if (curElement.equals("key")) {
                        currentTableKey = xmlr.getElementText();
                        logger.log(Level.FINE, "---------- table key = ({0}) ----------", currentTableKey);
                        tableKey = currentTableKey;
                    } else if (curElement.equals("title")) {
                        currentTableTitle = xmlr.getElementText();
                        logger.log(Level.FINE, "+++++++++ table Title = ({0}) +++++++++", currentTableTitle);
                        if (currentTableId.equals("PeerRepair")) {
                            if (currentTableTitle.startsWith("Repair candidates for AU: ")) {
                                currentTableTitle = currentTableTitle.replaceFirst("Repair candidates for AU: ",
                                        "");
                                logger.log(Level.FINE, "save this modified table-Title as auName={0}",
                                        currentTableTitle);
                                this.tableTitle = currentTableTitle;
                            } else {
                                logger.log(Level.WARNING,
                                        "The table-Title does not start with the expected token={0}",
                                        currentTableTitle);
                            }
                        }
                        isLastTagnameTable = false;
                    }
                }

                if (curElement.equals("columndescriptor")) {
                    withinColumndescriptor = true;
                } else if (curElement.equals("row")) {
                    withinRow = true;
                    rowCounter++;
                    logger.log(Level.FINE, "================== {0}-th row starts here ==================",
                            rowCounter);
                    // set-up the table storage
                    //if (rowCounter == 1) {
                    // 1st row
                    rowData = new ArrayList<String>();
                    rowDataH = new LinkedHashMap<String, String>();
                    //}
                } else if (curElement.equals("cell")) {
                    logger.log(Level.FINE, "entering a cell");
                    withinCell = true;
                } else if (curElement.equals("reference")) {
                    withinReference = true;
                    logger.log(Level.FINE, "within reference on");
                } else if (curElement.equals("summaryinfo")) {
                    withinSummaryinfo = true;
                    si = new SummaryInfo();
                } else if (curElement.equals("value")) {
                    logger.log(Level.FINE, "entering a value");
                    valueTagCounter++;
                }
                //---- columndescriptor tag ---------------------------------------------------
                if (withinColumndescriptor) {
                    if (curElement.equals("name")) {

                        String nameText = xmlr.getElementText();
                        logger.log(Level.FINE, "\tcolumndescriptor: name = {0}", nameText);
                        columndescriptorList.add(nameText);
                    } else if (curElement.equals("title")) {
                        String titleText = xmlr.getElementText();
                        logger.log(Level.FINE, "\tcolumndescriptor: title = {0}", titleText);
                    } else if (curElement.equals("type")) {
                        String typeText = xmlr.getElementText();
                        logger.log(Level.FINE, "\tcolumndescriptor: type = {0}", typeText);
                        getTypeList().add(typeText);
                    }
                }
                //---- cell tag ----------------------------------------------------------------
                if (withinCell) {
                    logger.log(Level.FINE, "parsing withinCell");
                    if (curElement.equals("columnname")) {

                        String columnname = xmlr.getElementText();
                        logger.log(Level.FINE, "\t\tcolumnname = {0}", columnname);
                        currentColumnName = columnname;
                        if (columnname.equals("crawl_status")) {
                            isCrawlStatusColumn = true;
                        } else {
                            isCrawlStatusColumn = false;
                        }

                        if (columnname.equals("Errors")) {
                            hasErrorsColumn = true;
                        }

                    } else {
                        // value tag block: either value-tag WO a child element
                        // or with a child element
                        /*
                         * <value><reference>...<value>xxxx</value>
                         * <value>xxxx</value>
                         */
                        if ((curElement.equals("value")) && (!withinReference)) {
                            logger.log(Level.FINE, "entering el:value/WO-REF block");
                            if (!hasReferenceTag.contains(currentColumnName)) {
                                logger.log(Level.FINE, "No child reference tag is expected for this value tag");
                                logger.log(Level.FINEST, "xmlr.getEventType():pre-parsing={0}",
                                        xmlr.getEventType());
                                String cellValue = xmlr.getElementText();
                                // note: the above parsing action moves the
                                // cursor to the end-tag, i.e., </value>
                                // therefore, the end-element-switch-block below
                                // cannot catch this </value> tag

                                logger.log(Level.FINE, "\t\t\t[No ref: value] {0} = {1}",
                                        new Object[] { currentColumnName, cellValue });

                                currentCellValue = cellValue;
                                logger.log(Level.FINEST, "xmlr.getEventType():post-parsing={0}",
                                        xmlr.getEventType());
                                // store this value
                                // rowData
                                logger.log(Level.FINE, "current column name={0}", currentColumnName);
                                logger.log(Level.FINE, "valueTagCounter={0}", valueTagCounter);
                                if (currentColumnName.endsWith("Damaged")) {
                                    if (valueTagCounter <= 1) {
                                        // 2nd value tag is footnot for this column
                                        // ignore this value
                                        rowData.add(cellValue);
                                        rowDataH.put(currentColumnName, currentCellValue);
                                    }
                                } else {
                                    rowData.add(cellValue);
                                    rowDataH.put(currentColumnName, currentCellValue);
                                }
                            } else {
                                // previously this block was unthinkable, but
                                // it was found that there are columns that
                                // temporarily have a <reference> tag in
                                // crawl_status_table; these columns are
                                // included in hasReferenceTag by default;
                                // thus, for such unstable columns,
                                // when they hava a <reference tag,
                                // data are caputred in another within-
                                // reference block; however, when these
                                // columns no longer have <reference> tag,
                                // text data would be left uncaptured unless
                                // some follow-up processing takes place here
                                logger.log(Level.FINE, "May have to capture data: column={0}",
                                        currentColumnName);
                                if (mayHaveReferenceTag.contains(currentColumnName) && !isCrawlStatusActive) {
                                    // because the crawling is not active,
                                    // it is safely assume that the maybe columns have no reference tag

                                    // 2011-10-24 the above assumption was found wrong
                                    // a crawling cell does not say active but
                                    // subsequent columns have a reference
                                    logger.log(Level.FINE,
                                            "a text or a reference tag : try to parse it as a text");
                                    String cellValue = null;
                                    try {
                                        cellValue = xmlr.getElementText();
                                    } catch (javax.xml.stream.XMLStreamException ex) {
                                        continue;
                                    } finally {
                                    }
                                    logger.log(Level.FINE, "\t\t\t[value WO-ref(crawling_NOT_active case)={0}]",
                                            currentColumnName + " = " + cellValue);
                                    currentCellValue = cellValue;
                                    // store this value
                                    // rowData
                                    logger.log(Level.FINE, "\t\t\tcurrent columnName={0}", currentColumnName);
                                    rowData.add(cellValue);
                                    rowDataH.put(currentColumnName, currentCellValue);

                                } else {
                                    logger.log(Level.FINE, "WO-Ref: no processing items now:{0}", curElement);
                                }
                            }
                        } else if (withinReference) {
                            // reference tag exists
                            logger.log(Level.FINE, "WR:curElement={0}", curElement);

                            if (curElement.equals("key")) {
                                String cellKey = xmlr.getElementText();
                                logger.log(Level.FINE, "\t\tcurrentCellKey is set to={0}", cellKey);
                                currentCellKey = cellKey;
                            } else if (curElement.equals("value")) {
                                String cellValue = xmlr.getElementText();

                                logger.log(Level.FINE, "\t\twr: {0} = {1}",
                                        new Object[] { currentColumnName, cellValue });

                                // exception cases follow:
                                if (currentColumnName.equals("AuName")) {
                                    logger.log(Level.FINE, "\t\tAuName is replaced with the key[=AuId]= {0}",
                                            currentCellKey);
                                    // rowData                                  // This block is for ArchivalUnitStatusTable
                                    // add the key as a new datum (auId)
                                    // ahead of its value
                                    rowData.add(currentCellKey);
                                    rowDataH.put("AuId", currentCellKey);
                                    currentCellValue = cellValue;
                                } else if (currentColumnName.equals("auId")) {
                                    // This block is for V3PollerTable
                                    logger.log(Level.FINE, "\t\tnew value for auId(V3PollerTable)={0}",
                                            currentCellKey);
                                    // deprecated after 2012-02-02: use key as data
                                    // currentCellValue = currentCellKey;
                                    // add auName as a new column ahead of auId

                                    rowData.add(cellValue);
                                    rowDataH.put("auName", cellValue);
                                    logger.log(Level.FINE, "\t\tauName(V3PollerTable)={0}", cellValue);

                                    currentCellValue = currentCellKey;
                                } else if (currentColumnName.equals("pollId")) {
                                    // this block is for V3PollerTable
                                    logger.log(Level.FINE, "\t\tFull string (key) is used={0}", currentCellKey);
                                    // The key has the complete string whereas
                                    // the value is its truncated copy
                                    currentCellValue = currentCellKey;

                                } else if (currentColumnName.equals("au")) {
                                    logger.log(Level.FINE,
                                            "\t\tauId is used instead for au(crawl_status_table)={0}",
                                            currentCellKey);

                                    // 2012-02-02: add auName ahead of au
                                    rowData.add(cellValue);
                                    rowDataH.put("auName", cellValue);
                                    logger.log(Level.FINE, "\t\tauName={0}", cellValue);

                                    // rowData                                  // This block is for crawl_status_table
                                    // save the key(auId) instead of value
                                    currentCellValue = currentCellKey;

                                } else if (currentColumnName.equals("Peers")) {

                                    logger.log(Level.FINE, "\t\tURL (key) is used={0}", currentCellKey);
                                    currentCellValue = DaemonStatusDataUtil.escapeHtml(currentCellKey);
                                    logger.log(Level.FINE, "\t\tAfter encoding ={0}", currentCellValue);

                                } else {
                                    if (isCrawlStatusColumn) {
                                        // if the craw status column is
                                        // "active", some later columns
                                        // may have a reference tag
                                        // so turn on the switch
                                        if (cellValue.equals("Active") || (cellValue.equals("Pending"))) {
                                            isCrawlStatusActive = true;
                                        } else {
                                            isCrawlStatusActive = false;
                                        }
                                    }
                                    // the default processing
                                    currentCellValue = cellValue;
                                }
                                // store currentCellValue
                                logger.log(Level.FINE, "currentCellValue={0}", currentCellValue);
                                // rowData
                                rowData.add(currentCellValue);
                                rowDataH.put(currentColumnName, currentCellValue);
                            } // Within ref tag: key and valu processing
                        } // value with text or value with ref tag
                    } // columnname or value
                } // within cell
                // ---- summaryinfo tag --------------------------------------------------------
                if (withinSummaryinfo) {
                    logger.log(Level.FINE,
                            "============================ Within SummaryInfo ============================ ");
                    if (curElement.equals("title")) {
                        String text = xmlr.getElementText();
                        si.setTitle(text);

                        logger.log(Level.FINE, "\tsi:titile={0}", si.getTitle());
                    } else if (curElement.equals("type")) {
                        String text = xmlr.getElementText();
                        si.setType(Integer.parseInt(text));
                        logger.log(Level.FINE, "\tsi:type={0}", si.getType());
                    } else if (curElement.equals("key")) {
                        if (withinReference && si.getTitle().equals("Volume")) {
                            String text = xmlr.getElementText();
                            logger.log(Level.FINE, "\tsi:key contents(Volume case)={0}", text);
                            siAuId = text;
                            //                                    si.setValue(text);
                            logger.log(Level.FINE, "\tsi:value(Volume case)={0}", siAuId);
                        }
                    } else if (curElement.equals("value")) {
                        if (withinReference) {
                            if (hasRefTitileTagsSI.contains(si.getTitle())) {
                                if (si.getTitle().equals("Volume")) {
                                    // 2012-02-02 use the au name
                                    String text = xmlr.getElementText();
                                    si.setValue(text);
                                    logger.log(Level.FINE, "\tsi:value(Volume case)={0}", si.getValue());
                                } else {
                                    String text = xmlr.getElementText();
                                    si.setValue(text);
                                    logger.log(Level.FINE, "\tsi:value={0}", si.getValue());
                                }
                            }
                        } else {
                            // note: 2012-02-07
                            // daemon 1.59.2 uses the new layout for AU page
                            // this layout includes a summaryinfo tag
                            // that now contains a reference tag
                            String text = null;

                            try {
                                text = xmlr.getElementText();
                                if (!hasRefTitileTagsSI.contains(si.getTitle())) {
                                    si.setValue(text);
                                    logger.log(Level.FINE, "\tsi:value={0}", si.getValue());
                                }
                            } catch (javax.xml.stream.XMLStreamException ex) {
                                logger.log(Level.WARNING, "encounter a reference tag rather than text");
                                continue;
                            } finally {
                            }
                        }
                    }

                    /*
                     * aus = xmlr.getElementText();
                     * out.println("found token=[" + aus + "]"); if
                     * (currentTableId.equals("ArchivalUnitStatusTable")) {
                     * m = pau.matcher(aus); if (m.find()) {
                     * out.println("How many AUs=" + m.group(1)); noAUs =
                     * Integer.parseInt(m.group(1)); } else {
                     * out.println("not found within[" + aus + "]"); } }
                     */
                }

                break;
            case XMLStreamConstants.CHARACTERS:
                break;

            case XMLStreamConstants.ATTRIBUTE:
                break;

            case XMLStreamConstants.END_ELEMENT:
                if (xmlr.getLocalName().equals("columndescriptor")) {
                    withinColumndescriptor = false;
                    logger.log(Level.FINE, "leaving columndescriptor");
                } else if (xmlr.getLocalName().equals("row")) {
                    if (withinRow) {
                        logger.log(Level.FINE, "========= end of the target row element");
                        withinRow = false;
                    }
                    if (!isCrawlStatusActive) {
                        tabularData.add(rowData);
                        tableData.add(rowDataH);

                    } else {
                        rowIgnored++;
                        rowCounter--;
                    }
                    rowData = null;
                    rowDataH = null;
                    isCrawlStatusActive = false;
                } else if (xmlr.getLocalName().equals("cell")) {
                    // rowDataH.add(cellDatum);
                    cellCounter++;
                    withinCell = false;
                    currentColumnName = null;
                    currentCellValue = null;
                    currentCellKey = null;
                    isCrawlStatusColumn = false;
                    valueTagCounter = 0;
                    logger.log(Level.FINE, "leaving cell");
                } else if (xmlr.getLocalName().equals("columnname")) {
                    logger.log(Level.FINE, "leaving columnname");
                } else if (xmlr.getLocalName().equals("reference")) {
                    withinReference = false;
                } else if (xmlr.getLocalName().equals("summaryinfo")) {
                    logger.log(Level.FINE, "si={0}", si.toString());
                    summaryInfoList.add(si);
                    si = null;
                    withinSummaryinfo = false;
                } else if (xmlr.getLocalName().equals("value")) {
                    logger.log(Level.FINE, "leaving value");
                } else {
                    logger.log(Level.FINE, "--------- end tag = <{0}> ---------", curElement);
                }

                break;
            case XMLStreamConstants.END_DOCUMENT:
                logger.log(Level.FINE, "Total of {0} row occurrences", rowCounter);
            } // end: switch
        } // end:while
    } catch (XMLStreamException ex) {
        logger.log(Level.WARNING, "XMLStreamException occurs", ex);
        this.isTargetPageValid = false;

    } catch (RuntimeException re) {
        logger.log(Level.WARNING, "some RuntimeException occurs", re);
        this.isTargetPageValid = false;
    } catch (Exception e) {
        logger.log(Level.WARNING, "some Exception occurs", e);
        this.isTargetPageValid = false;
    } finally {
        // 5. close reader/IO
        if (xmlr != null) {
            try {
                xmlr.close();
            } catch (XMLStreamException ex) {
                logger.log(Level.WARNING, "XMLStreamException occurs during close()", ex);
            }
        }
        if (!this.isTargetPageValid) {
            logger.log(Level.WARNING,
                    "This parsing session may not be complete due to some exception reported earlier");
        }
    } // end of try

    if (currentTableId.equals("V3PollerDetailTable")) {
        summaryInfoList.add(new SummaryInfo("auId", 4, siAuId));
        summaryInfoMap = new LinkedHashMap<String, String>();
        for (SummaryInfo si : summaryInfoList) {
            summaryInfoMap.put(si.getTitle(), si.getValue());
        }
    }

    // parsing summary
    logger.log(Level.FINE, "###################### parsing summary ######################");
    logger.log(Level.FINE, "currentTableId={0}", currentTableId);
    logger.log(Level.FINE, "currentTableTitle={0}", currentTableTitle);
    logger.log(Level.FINE, "currentTableKey={0}", currentTableKey);

    logger.log(Level.FINE, "columndescriptorList={0}", columndescriptorList);
    logger.log(Level.FINE, "# of columndescriptors={0}", columndescriptorList.size());
    logger.log(Level.FINE, "typeList={0}", typeList);
    logger.log(Level.FINE, "# of rows counted={0}", rowCounter);
    logger.log(Level.FINE, "# of rows excluded[active ones are excluded]={0}", rowIgnored);
    logger.log(Level.FINE, "summaryInfoList:size={0}", summaryInfoList.size());
    logger.log(Level.FINE, "summaryInfoList={0}", summaryInfoList);
    logger.log(Level.FINE, "table: cell counts = {0}", cellCounter);
    logger.log(Level.FINE, "tableData[map]=\n{0}", tableData);
    logger.log(Level.FINE, "tabularData[list]=\n{0}", tabularData);

    /*
     * if (currentTableId.equals("ArchivalUnitStatusTable")) { if
     * (rowCounter == noAUs) { out.println("au counting is OK=" +
     * rowCounter); } else { err.println("au counting disagreement"); throw
     * new RuntimeException("parsing error is suspected"); } }
     */
    logger.log(Level.FINE, " completed in {0} ms\n\n", (System.currentTimeMillis() - startTime));

    if (!columndescriptorList.isEmpty()) {
        int noCols = columndescriptorList.size();
        if (currentTableId.equals("V3PollerTable") && !hasErrorsColumn) {
            noCols--;
        }
        int noCellsExpd = rowCounter * noCols;
        if (noCols > 0) {
            // this table has a table
            logger.log(Level.FINE, "checking parsing results: table dimmensions");
            if (noCellsExpd == cellCounter) {
                logger.log(Level.FINE, "table dimensions and cell-count are consistent");
            } else {
                int diff = noCellsExpd - cellCounter;
                logger.log(Level.FINE, "The table has {0} incomplete cells", diff);
                hasIncompleteRows = true;
                setIncompleteRowList();
                logger.log(Level.FINE, "incomplete rows: {0}", incompleteRows);
            }
        }
    }
}

From source file:at.gv.egiz.slbinding.SLUnmarshaller.java

/**
 * @param source a StreamSource wrapping a Reader (!) for the marshalled Object
 * @return the unmarshalled Object//w  w w.  j a  v a 2 s .c  o m
 * @throws XMLStreamException
 * @throws JAXBException
 */
public Object unmarshal(StreamSource source) throws XMLStreamException, JAXBException {
    Reader inputReader = source.getReader();

    /* Validate XML against XXE, XEE, and SSRF
     * 
     * This pre-validation step is required because com.sun.xml.stream.sjsxp-1.0.2 XML stream parser library does not 
     * support all XML parser features to prevent these types of attacks  
     */
    if (inputReader instanceof InputStreamReader) {
        try {
            //create copy of input stream
            InputStreamReader isReader = (InputStreamReader) inputReader;
            String encoding = isReader.getEncoding();
            byte[] backup = IOUtils.toByteArray(isReader, encoding);

            //validate input stream
            DOMUtils.validateXMLAgainstXXEAndSSRFAttacks(new ByteArrayInputStream(backup));

            //create new inputStreamReader for reak processing
            inputReader = new InputStreamReader(new ByteArrayInputStream(backup), encoding);

        } catch (XMLStreamException e) {
            log.error("XML data validation FAILED with msg: " + e.getMessage(), e);
            throw new XMLStreamException("XML data validation FAILED with msg: " + e.getMessage(), e);

        } catch (IOException e) {
            log.error("XML data validation FAILED with msg: " + e.getMessage(), e);
            throw new XMLStreamException("XML data validation FAILED with msg: " + e.getMessage(), e);

        }

    } else {
        log.error("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
        log.error(
                "Reader is not of type InputStreamReader -> can not make a copy of the InputStream --> extended XML validation is not possible!!! ");
        log.error("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");

    }

    /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     * parse XML with original functionality
     * 
     * This code implements the the original mocca XML processing by using 
     *  com.sun.xml.stream.sjsxp-1.0.2 XML stream parser library. Currently, this library is required to get full 
     *  security-layer specific XML processing. However, this lib does not fully support XXE, XEE and SSRF
     *  prevention mechanisms (e.g.: XMLInputFactory.SUPPORT_DTD flag is not used)    
     * 
     * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     */
    XMLInputFactory inputFactory = XMLInputFactory.newInstance();

    //disallow DTD and external entities
    inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
    inputFactory.setProperty("javax.xml.stream.isSupportingExternalEntities", false);

    XMLEventReader eventReader = inputFactory.createXMLEventReader(inputReader);
    RedirectEventFilter redirectEventFilter = new RedirectEventFilter();
    XMLEventReader filteredReader = inputFactory.createFilteredReader(eventReader, redirectEventFilter);

    Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
    ReportingValidationEventHandler validationEventHandler = new ReportingValidationEventHandler();
    unmarshaller.setEventHandler(validationEventHandler);

    unmarshaller.setListener(new RedirectUnmarshallerListener(redirectEventFilter));
    unmarshaller.setSchema(slSchema);

    Object object;
    try {
        log.trace("Before unmarshal().");
        object = unmarshaller.unmarshal(filteredReader);
        log.trace("After unmarshal().");
    } catch (UnmarshalException e) {
        if (log.isDebugEnabled()) {
            log.debug("Failed to unmarshal security layer message.", e);
        } else {
            log.info("Failed to unmarshal security layer message."
                    + (e.getMessage() != null ? " " + e.getMessage() : ""));
        }

        if (validationEventHandler.getErrorEvent() != null) {
            ValidationEvent errorEvent = validationEventHandler.getErrorEvent();
            if (e.getLinkedException() == null) {
                e.setLinkedException(errorEvent.getLinkedException());
            }
        }
        throw e;
    }

    return object;

}

From source file:edu.unc.lib.dl.services.TripleStoreManagerMulgaraImpl.java

/**
 * @param query// ww  w  .ja v  a2s .c  om
 *           an ITQL command
 * @return the message returned by Mulgara
 * @throws RemoteException
 *            for communication failure
 */
public String storeCommand(String query) {
    String result = null;
    String response = this.sendTQL(query);
    if (response != null) {
        StringReader sr = new StringReader(response);
        XMLInputFactory factory = XMLInputFactory.newInstance();
        factory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        XMLEventReader r = null;
        try {
            boolean inMessage = false;
            StringBuffer message = new StringBuffer();
            r = factory.createXMLEventReader(sr);
            while (r.hasNext()) {
                XMLEvent e = r.nextEvent();
                if (e.isStartElement()) {
                    StartElement s = e.asStartElement();
                    if ("message".equals(s.getName().getLocalPart())) {
                        inMessage = true;
                    }
                } else if (e.isEndElement()) {
                    EndElement end = e.asEndElement();
                    if ("message".equals(end.getName().getLocalPart())) {
                        inMessage = false;
                    }
                } else if (inMessage && e.isCharacters()) {
                    message.append(e.asCharacters().getData());
                }
            }
            result = message.toString();
        } catch (XMLStreamException e) {
            e.printStackTrace();
        } finally {
            if (r != null) {
                try {
                    r.close();
                } catch (Exception ignored) {
                    log.error(ignored);
                }
            }

        }
        sr.close();
    }
    return result;
}

From source file:ch.njol.skript.Updater.java

/**
 * Gets the changelogs and release dates of the newest versions
 * //from   w ww  . j  a v a  2  s. c o  m
 * @param sender
 */
final static void getChangelogs(final CommandSender sender) {
    InputStream in = null;
    InputStreamReader r = null;
    try {
        final URLConnection conn = new URL(RSSURL).openConnection();
        conn.setRequestProperty("User-Agent", "Skript/v" + Skript.getVersion() + " (by Njol)"); // Bukkit returns a 403 (forbidden) if no user agent is set
        in = conn.getInputStream();
        r = new InputStreamReader(in, conn.getContentEncoding() == null ? "UTF-8" : conn.getContentEncoding());
        final XMLEventReader reader = XMLInputFactory.newInstance().createXMLEventReader(r);

        infos.clear();
        VersionInfo current = null;

        outer: while (reader.hasNext()) {
            XMLEvent e = reader.nextEvent();
            if (e.isStartElement()) {
                final String element = e.asStartElement().getName().getLocalPart();
                if (element.equalsIgnoreCase("title")) {
                    final String name = reader.nextEvent().asCharacters().getData().trim();
                    for (final VersionInfo i : infos) {
                        if (name.equals(i.name)) {
                            current = i;
                            continue outer;
                        }
                    }
                    current = null;
                } else if (element.equalsIgnoreCase("description")) {
                    if (current == null)
                        continue;
                    final StringBuilder cl = new StringBuilder();
                    while ((e = reader.nextEvent()).isCharacters())
                        cl.append(e.asCharacters().getData());
                    current.changelog = "- " + StringEscapeUtils.unescapeHtml("" + cl).replace("<br>", "")
                            .replace("<p>", "").replace("</p>", "").replaceAll("\n(?!\n)", "\n- ");
                } else if (element.equalsIgnoreCase("pubDate")) {
                    if (current == null)
                        continue;
                    synchronized (RFC2822) { // to make FindBugs shut up
                        current.date = new Date(
                                RFC2822.parse(reader.nextEvent().asCharacters().getData()).getTime());
                    }
                }
            }
        }
    } catch (final IOException e) {
        stateLock.writeLock().lock();
        try {
            state = UpdateState.CHECK_ERROR;
            error.set(ExceptionUtils.toString(e));
            Skript.error(sender, m_check_error.toString());
        } finally {
            stateLock.writeLock().unlock();
        }
    } catch (final Exception e) {
        Skript.error(sender, m_internal_error.toString());
        Skript.exception(e, "Unexpected error while checking for a new version of Skript");
        stateLock.writeLock().lock();
        try {
            state = UpdateState.CHECK_ERROR;
            error.set(e.getClass().getSimpleName() + ": " + e.getLocalizedMessage());
        } finally {
            stateLock.writeLock().unlock();
        }
    } finally {
        if (in != null) {
            try {
                in.close();
            } catch (final IOException e) {
            }
        }
        if (r != null) {
            try {
                r.close();
            } catch (final IOException e) {
            }
        }
    }
}

From source file:com.pocketsoap.salesforce.soap.ChatterClient.java

private <T> T makeSoapRequest(String serverUrl, RequestEntity req, ResponseParser<T> respParser)
        throws XMLStreamException, IOException {
    PostMethod post = new PostMethod(serverUrl);
    post.addRequestHeader("SOAPAction", "\"\"");
    post.setRequestEntity(req);/* ww w .j av  a  2 s .c  om*/

    HttpClient http = new HttpClient();
    int sc = http.executeMethod(post);
    if (sc != 200 && sc != 500)
        throw new IOException("request to " + serverUrl + " returned unexpected HTTP status code of " + sc
                + ", check configuration.");

    XMLInputFactory f = XMLInputFactory.newInstance();
    f.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);

    XMLStreamReader rdr = f.createXMLStreamReader(post.getResponseBodyAsStream());
    rdr.require(XMLStreamReader.START_DOCUMENT, null, null);
    rdr.nextTag();
    rdr.require(XMLStreamReader.START_ELEMENT, SOAP_NS, "Envelope");
    rdr.nextTag();
    // TODO, should handle a Header appearing in the response.
    rdr.require(XMLStreamReader.START_ELEMENT, SOAP_NS, "Body");
    rdr.nextTag();
    if (rdr.getLocalName().equals("Fault")) {
        throw handleSoapFault(rdr);
    }
    try {
        T response = respParser.parse(rdr);
        while (rdr.hasNext())
            rdr.next();
        return response;
    } finally {
        try {
            rdr.close();
        } finally {
            post.releaseConnection();
        }
    }
}

From source file:com.stevpet.sonar.plugins.dotnet.mscover.parser.XmlParserSubject.java

/**
 * Gets the cursor for the given file//from  w w  w  .  ja va2  s . c  o  m
 * 
 * @param file
 * @return
 * @throws FactoryConfigurationError
 * @throws XMLStreamException
 */
public SMInputCursor getCursor(File file) {
    SMInputCursor result = null;
    try {
        SMInputFactory inf = new SMInputFactory(XMLInputFactory.newInstance());
        SMHierarchicCursor cursor = inf.rootElementCursor(file);
        result = cursor.advance();
    } catch (XMLStreamException e) {
        String msg = "Could not create cursor " + e.getMessage();
        LOG.error(msg);
        throw new SonarException(msg, e);
    }
    return result;
}

From source file:org.callimachusproject.rdfa.test.RDFaGenerationTest.java

public void setUp() throws Exception {
    xmlInputFactory = XMLInputFactory.newInstance();
    xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
    xmlInputFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
    xmlInputFactory.setProperty("http://java.sun.com/xml/stream/properties/ignore-external-dtd", true);
    xmlInputFactory.setProperty("http://java.sun.com/xml/stream/properties/report-cdata-event", true);

    // XPath/*w w  w.  j av a 2  s  . c  o m*/
    xPathFactory = XPathFactory.newInstance();

    // initialize an in-memory store
    sourceRepository = new SailRepository(new MemoryStore());
    sourceRepository.initialize();
    source = sourceRepository.getConnection();
    expectedRepository = new SailRepository(new MemoryStore());
    expectedRepository.initialize();
    expected = expectedRepository.getConnection();
    actualRepository = new SailRepository(new MemoryStore());
    actualRepository.initialize();
    actual = actualRepository.getConnection();
    initialize();
}

From source file:de.codesourcery.eve.skills.util.XMLMapper.java

public <T> Collection<T> read(Class<T> clasz, IFieldConverters converters, InputStream instream)
        throws XMLStreamException, IOException, IllegalArgumentException, InstantiationException,
        IllegalAccessException, InvocationTargetException, SecurityException, NoSuchMethodException {

    final Collection<T> result = new ArrayList<T>();

    try {//from  w  ww.j  av a  2s .  c  o m

        final BeanDescription desc = createBeanDescription(clasz);

        /* 
         * Create inverse mapping attribute name -> field. 
         */
        final Map<String, Field> inverseMapping = new HashMap<String, Field>();

        if (!this.propertyNameMappings.isEmpty()) {

            // key = property name  / value = attribute name
            for (Map.Entry<String, String> propToAttribute : this.propertyNameMappings.entrySet()) {
                inverseMapping.put(propToAttribute.getValue(), desc.getFieldByName(propToAttribute.getKey()));
            }

        } else { // create default mappings
            for (Field f : desc.getFields()) {
                inverseMapping.put(f.getName(), f);
            }
        }

        final int fieldCount = desc.getFields().size();

        final XMLInputFactory factory = XMLInputFactory.newInstance();
        final XMLStreamReader parser = factory.createXMLStreamReader(instream);

        boolean inRow = false;

        final Constructor<T> constructor = clasz.getConstructor(new Class<?>[0]);

        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
            switch (event) {
            case XMLStreamConstants.START_ELEMENT:
                if ("row".equals(parser.getLocalName())) { // parse row
                    if (inRow) {
                        throw new XMLStreamException("Found nested <row> tag ?", parser.getLocation());
                    }
                    inRow = true;

                    final T bean = constructor.newInstance(new Object[0]);
                    for (int i = 0; i < fieldCount; i++) {
                        final String attrName = parser.getAttributeLocalName(i);
                        final String attrValue = parser.getAttributeValue(i);
                        final Field field = inverseMapping.get(attrName);

                        if (!NIL.equals(attrValue)) {
                            final Object fieldValue = converters.getConverter(field)
                                    .toObject(fromAttributeValue(attrValue), field.getType());
                            field.set(bean, fieldValue);
                        } else {
                            field.set(bean, null);
                        }

                    }
                    result.add(bean);
                }
                break;

            case XMLStreamConstants.END_ELEMENT:
                if ("row".equals(parser.getLocalName())) { // parse row
                    if (!inRow) {
                        throw new XMLStreamException("Found </row> tag without start tag at ",
                                parser.getLocation());
                    }
                    inRow = false;
                }
                break;

            }
        }
    } finally {
        instream.close();
    }

    return result;
}