Example usage for java.util.concurrent ArrayBlockingQueue take

List of usage examples for java.util.concurrent ArrayBlockingQueue take

Introduction

In this page you can find the example usage for java.util.concurrent ArrayBlockingQueue take.

Prototype

public E take() throws InterruptedException 

Source Link

Usage

From source file:org.openiot.gsn.vsensor.RVirtualSensor.java

public void dataAvailable(String inputStreamName, StreamElement streamElement) {
    ArrayBlockingQueue<StreamElement> circularBuffer = circularBuffers.get(inputStreamName);

    // Get the circular buffer that matches the input stream. Create a new one
    // if none exists
    if (circularBuffer == null) {
        circularBuffer = new ArrayBlockingQueue<StreamElement>(windowSize);
        circularBuffers.put(inputStreamName, circularBuffer);
    }//from  w w  w .ja  va  2  s. c om
    try {
        circularBuffer.put(streamElement);

        logger.debug(
                "Window for " + inputStreamName + " contains: " + circularBuffer.size() + " of " + windowSize);

        if (circularBuffer.size() == windowSize) {
            logger.info("Window for " + inputStreamName + " contains: " + circularBuffer.size() + " of "
                    + windowSize);

            // Connect to Rserve and assign global variables
            try {
                rc = new RConnection(params.get(SERVER), Integer.parseInt(params.get(PORT)));

                logger.info("Connected to R server " + params.get(SERVER) + ":" + params.get(PORT));

                String[] fieldname = streamElement.getFieldNames();

                logger.info("Sending " + fieldname.length + " data attributes to R server.");

                // Assign R vector variables prior the script
                for (int n = 0; n < fieldname.length; n++) {
                    // Build the window
                    double[] values = new double[windowSize];
                    StreamElement elt = null;

                    // convert the circular buffer to an array
                    Object[] elts = circularBuffer.toArray();
                    for (int i = 0; i < elts.length; i++) {
                        elt = (StreamElement) elts[i];
                        values[i] = ((Number) elt.getData()[n]).doubleValue(); //
                    }

                    // assign vectors as R variables
                    rc.assign("gsn_" + fieldname[n].toLowerCase(), values);
                }

                logger.info("Done.");

                // read the R script
                // open the script file every time we do some processing (this can be
                // improved).
                File file = new File(params.get(SCRIPT).toString());
                script = FileUtils.readFileToString(file, "UTF-8");

                logger.info("Sending R script.");

                // evaluate the R script
                rc.voidEval(script);
                logger.info("Done.");

                // get the output timestamp
                logger.info("Performing computation in R server (please wait).");

                // collect vector values after computation
                DataField[] outStructure = null;

                outStructure = getVirtualSensorConfiguration().getOutputStructure();

                String[] plotFieldName = new String[outStructure.length];
                Byte[] plotFieldType = new Byte[outStructure.length];

                for (int w = 0; w < outStructure.length; w++) {
                    plotFieldName[w] = outStructure[w].getName();
                    plotFieldType[w] = outStructure[w].getDataTypeID();
                }

                Serializable[] outputData = null;
                StreamElement se = null;

                Byte[] fieldType = streamElement.getFieldTypes();

                // check if we have defined more attributes in the output structure
                if (outStructure.length > fieldname.length) {
                    outputData = new Serializable[outStructure.length];
                } else {
                    outputData = new Serializable[fieldname.length];
                }

                for (int n = 0; n < fieldname.length; n++) {
                    // evaluate/get attribute data from R server
                    xp = rc.parseAndEval(fieldname[n].toLowerCase());

                    if (fieldType[n] == DataTypes.DOUBLE) {
                        double[] b1 = xp.asDoubles();
                        outputData[n] = b1[b1.length - 1];
                    }

                    if (fieldType[n] == DataTypes.INTEGER) {
                        int[] b1 = xp.asIntegers();
                        outputData[n] = b1[b1.length - 1];
                    }

                    if (fieldType[n] == DataTypes.BIGINT) {
                        int[] b1 = xp.asIntegers();
                        outputData[n] = (long) b1[b1.length - 1];
                    }
                }

                int len1 = outStructure.length;
                int len2 = fieldname.length;

                // check if we have defined more attributes in the output structure
                if (len1 > len2) {
                    if (stype.equals("plot")) {
                        xp = rc.parseAndEval("gsn_plot");
                        outputData[len2] = xp.asBytes();

                        se = new StreamElement(plotFieldName, plotFieldType, outputData);
                    }
                } else {
                    se = new StreamElement(fieldname, fieldType, outputData);
                }

                logger.info("Computation finished.");

                dataProduced(se);
                logger.debug("Stream published: " + se.toString().toLowerCase());

                // Close connection to R server
                rc.close();
                logger.info("Connection to R server closed.");

            } catch (Exception e) {
                logger.warn(e);
                // Close connection to R server
                logger.info("Connection to R server closed.");
                rc.close();
            }

            // Remove step size elements from the beginning of the buffer
            for (int i = 0; i < stepSize; i++) {
                try {
                    circularBuffer.take();
                } catch (InterruptedException e) {
                    logger.warn(e.getMessage(), e);
                }
            }

        }

        // end if if for window
    } catch (InterruptedException e) {
        logger.warn(e.getMessage(), e);
    }

}

From source file:gsn.vsensor.RVirtualSensor.java

public void dataAvailable(String inputStreamName, StreamElement streamElement) {
    ArrayBlockingQueue<StreamElement> circularBuffer = circularBuffers.get(inputStreamName);

    // Get the circular buffer that matches the input stream. Create a new one
    // if none exists
    if (circularBuffer == null) {
        circularBuffer = new ArrayBlockingQueue<StreamElement>(windowSize);
        circularBuffers.put(inputStreamName, circularBuffer);
    }// w  w  w  . j  ava 2 s . c om
    try {
        circularBuffer.put(streamElement);

        logger.debug(
                "Window for " + inputStreamName + " contains: " + circularBuffer.size() + " of " + windowSize);

        if (circularBuffer.size() == windowSize) {
            logger.info("Window for " + inputStreamName + " contains: " + circularBuffer.size() + " of "
                    + windowSize);

            // Connect to Rserve and assign global variables
            try {
                rc = new RConnection(params.get(SERVER), Integer.parseInt(params.get(PORT)));

                logger.info("Connected to R server " + params.get(SERVER) + ":" + params.get(PORT));

                String[] fieldname = streamElement.getFieldNames();

                logger.info("Sending " + fieldname.length + " data attributes to R server.");

                // Assign R vector variables prior the script
                for (int n = 0; n < fieldname.length; n++) {
                    // Build the window
                    double[] values = new double[windowSize];
                    StreamElement elt = null;

                    // convert the circular buffer to an array
                    Object[] elts = circularBuffer.toArray();
                    for (int i = 0; i < elts.length; i++) {
                        elt = (StreamElement) elts[i];
                        values[i] = ((Number) elt.getData()[n]).doubleValue(); //
                    }

                    // assign vectors as R variables
                    rc.assign("gsn_" + fieldname[n].toLowerCase(), values);
                }

                logger.info("Done.");

                // read the R script
                // open the script file every time we do some processing (this can be
                // improved).
                File file = new File(params.get(SCRIPT).toString());
                script = FileUtils.readFileToString(file, "UTF-8");

                logger.info("Sending R script.");

                // evaluate the R script
                rc.voidEval(script);
                logger.info("Done.");

                // get the output timestamp
                logger.info("Performing computation in R server (please wait).");

                // collect vector values after computation
                DataField[] outStructure = null;

                outStructure = getVirtualSensorConfiguration().getOutputStructure();

                String[] plotFieldName = new String[outStructure.length];
                Byte[] plotFieldType = new Byte[outStructure.length];

                for (int w = 0; w < outStructure.length; w++) {
                    plotFieldName[w] = outStructure[w].getName();
                    plotFieldType[w] = outStructure[w].getDataTypeID();
                }

                Serializable[] outputData = null;
                StreamElement se = null;

                Byte[] fieldType = streamElement.getFieldTypes();

                // check if we have defined more attributes in the output structure
                if (outStructure.length > fieldname.length) {
                    outputData = new Serializable[outStructure.length];
                } else {
                    outputData = new Serializable[fieldname.length];
                }

                for (int n = 0; n < fieldname.length; n++) {
                    // evaluate/get attribute data from R server
                    xp = rc.parseAndEval(fieldname[n].toLowerCase());

                    if (fieldType[n] == DataTypes.DOUBLE) {
                        double[] b1 = xp.asDoubles();
                        outputData[n] = b1[b1.length - 1];
                    }

                    if (fieldType[n] == DataTypes.INTEGER) {
                        int[] b1 = xp.asIntegers();
                        outputData[n] = b1[b1.length - 1];
                    }

                    if (fieldType[n] == DataTypes.BIGINT) {
                        int[] b1 = xp.asIntegers();
                        outputData[n] = (long) b1[b1.length - 1];
                    }
                }

                int len1 = outStructure.length;
                int len2 = fieldname.length;

                // check if we have defined more attributes in the output structure
                if (len1 > len2) {
                    if (stype.equals("plot")) {
                        xp = rc.parseAndEval("gsn_plot");
                        outputData[len2] = xp.asBytes();

                        se = new StreamElement(plotFieldName, plotFieldType, outputData);
                    }
                } else {
                    se = new StreamElement(fieldname, fieldType, outputData);
                }

                logger.info("Computation finished.");

                dataProduced(se);
                logger.debug("Stream published: " + se.toString().toLowerCase());

                // Close connection to R server
                rc.close();
                logger.info("Connection to R server closed.");

            } catch (Exception e) {
                logger.warn(e.getMessage());
                // Close connection to R server
                logger.info("Connection to R server closed.");
                rc.close();
            }

            // Remove step size elements from the beginning of the buffer
            for (int i = 0; i < stepSize; i++) {
                try {
                    circularBuffer.take();
                } catch (InterruptedException e) {
                    logger.warn(e.getMessage(), e);
                }
            }

        }

        // end if if for window
    } catch (InterruptedException e) {
        logger.warn(e.getMessage(), e);
    }

}

From source file:efen.parsewiki.WikipediaDocumentSequence.java

@Override
public DocumentIterator iterator() throws IOException {
    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final MutableString nameSpaceAccumulator = new MutableString();
    final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>();
    final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16);
    for (int i = freeFactories.remainingCapacity(); i-- != 0;)
        freeFactories.add(this.factory.copy());
    final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>(
            freeFactories.size());/* ww w.j av  a  2 s  .  c o m*/

    final SAXParser parser;
    try {
        parser = saxParserFactory.newSAXParser();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    final DefaultHandler handler = new DefaultHandler() {
        private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        private boolean inText;
        private boolean inTitle;
        private boolean inId;
        private boolean inTimestamp;
        private boolean inNamespaceDef;
        private boolean redirect;
        private MutableString text = new MutableString();
        private MutableString title = new MutableString();
        private MutableString id = new MutableString();
        private MutableString timestamp = new MutableString();
        private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>();
        {
            metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8");
            metadata.put(MetadataKeys.REDIRECT, redirectAnchors);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                redirect = inText = inTitle = inId = inTimestamp = false;
                text.length(0);
                title.length(0);
                id.length(0);
                timestamp.length(0);
            } else if ("text".equals(localName))
                inText = true;
            else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first id/title elements.
            else if ("id".equals(localName) && id.length() == 0)
                inId = true;
            else if ("timestamp".equals(localName) && timestamp.length() == 0)
                inTimestamp = true;
            else if ("redirect".equals(localName)) {
                redirect = true;
                if (attributes.getValue("title") != null)
                    // Accumulate the title of the page as virtual text of the redirect page.
                    synchronized (redirectAnchors) {
                        final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true);
                        redirectAnchors.add(
                                new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length())
                                        .append(baseURL).append(link), title.copy()));
                    }
            } else if ("namespace".equals(localName)) {
                // Found a new namespace
                inNamespaceDef = true;
                nameSpaceAccumulator.length(0);
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("namespace".equals(localName)) { // Collecting a namespace
                if (nameSpaceAccumulator.length() != 0)
                    nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase());
                return;
            }

            if ("namespaces".equals(localName)) { // All namespaces collected
                nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator);
                return;
            }

            if (!redirect) {
                if ("title".equals(localName)) {
                    // Set basic metadata for the page
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy());
                    String link = Encoder.encodeTitleToUrl(title.toString(), true);
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI,
                            new MutableString(baseURL.length() + link.length()).append(baseURL).append(link));
                    inTitle = false;
                } else if ("id".equals(localName)) {
                    metadata.put(MetadataKeys.ID, Long.valueOf(id.toString()));
                    inId = false;
                } else if ("timestamp".equals(localName)) {
                    try {
                        metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString()));
                    } catch (ParseException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                    inTimestamp = false;
                } else if ("text".equals(localName)) {
                    inText = false;
                    if (!keepNamespaced) {
                        // Namespaces are case-insensitive and language-dependent
                        final int pos = title.indexOf(':');
                        if (pos != -1 && isATrueNamespace(title.substring(0, pos)))
                            return;
                    }
                    try {
                        final MutableString html = new MutableString();
                        DocumentFactory freeFactory;
                        try {
                            freeFactory = freeFactories.take();
                        } catch (InterruptedException e) {
                            throw new RuntimeException(e.getMessage(), e);
                        }
                        if (parseText) {
                            if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page.
                                /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */
                                final MutableString newLinks = new MutableString();
                                for (int start = 0, end; (start = BRACKETS_OPEN.search(text,
                                        start)) != -1; start = end) {
                                    end = start;
                                    final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start);
                                    // Note that we don't escape title because we are working at the Wikipedia raw text level.
                                    if (endOfLink != -1) {
                                        newLinks.append(text.array(), start, endOfLink - start).append('|')
                                                .append(title).append("]]\n");
                                        end = endOfLink;
                                    }
                                    end++;
                                }

                                text.append(newLinks);
                            }
                            // We separate categories by OXOXO, so we don't get overflowing phrases.
                            final MutableString category = new MutableString();
                            for (int start = 0, end; (start = CATEGORY_START.search(text,
                                    start)) != -1; start = end) {
                                end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length());
                                if (end != -1)
                                    category.append(text.subSequence(start, end)).append(" OXOXO ");
                                else
                                    break;
                            }
                            metadata.put(MetadataKeys.CATEGORY, category);

                            // Heuristics to get the first paragraph
                            metadata.put(MetadataKeys.FIRSTPAR, new MutableString());
                            String plainText = new WikiModel(imageBaseURL, linkBaseURL)
                                    .render(new PlainTextConverter(true), text.toString());
                            for (int start = 0; start < plainText.length(); start++) {
                                //System.err.println("Examining " + plainText.charAt( start )  );
                                if (Character.isWhitespace(plainText.charAt(start)))
                                    continue;
                                if (plainText.charAt(start) == '{') {
                                    //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 )  + "\" -> " );
                                    start = BRACES_CLOSED.search(plainText, start);
                                    //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" );
                                    if (start == -1)
                                        break;
                                    start++;
                                } else if (plainText.charAt(start) == '[') {
                                    start = BRACKETS_CLOSED.search(plainText, start);
                                    if (start == -1)
                                        break;
                                    start++;
                                } else {
                                    final int end = plainText.indexOf('\n', start);
                                    if (end != -1)
                                        metadata.put(MetadataKeys.FIRSTPAR,
                                                new MutableString(plainText.substring(start, end)));//new MutableString( new WikiModel( imageBaseURL, linkBaseURL ).render( new PlainTextConverter( true ), text.substring( start, end ).toString() ) ) );
                                    break;
                                }
                            }

                            try {
                                WikiModel wikiModel = new WikiModel(imageBaseURL, linkBaseURL);
                                wikiModel.render(new HTMLConverter(), text.toString(), html, false, false);
                                final Map<String, String> categories = wikiModel.getCategories();
                                // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering)
                                for (Entry<String, String> entry : categories.entrySet()) {
                                    final String key = entry.getKey();
                                    final String value = entry.getValue().trim();
                                    if (value.length() != 0) // There are empty such things
                                        html.append("\n<a href=\"").append(baseURL).append("Category:")
                                                .append(Encoder.encodeTitleToUrl(key, true)).append("\">")
                                                .append(HtmlEscapers.htmlEscaper().escape(key))
                                                .append("</a>\n");
                                }
                            } catch (Exception e) {
                                LOGGER.error("Unexpected exception while parsing " + title, e);
                            }
                        }
                        readyDocumentsAndFactories.put(new DocumentAndFactory(
                                freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8),
                                        new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)),
                                freeFactory));
                    } catch (InterruptedException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    } catch (IOException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                }
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
            if (inId)
                id.append(ch, start, length);
            if (inTimestamp)
                timestamp.append(ch, start, length);
            if (inNamespaceDef) {
                nameSpaceAccumulator.append(ch, start, length);
                inNamespaceDef = false; // Dirty, but it works
            }
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    final Thread parsingThread = new Thread() {
        public void run() {
            try {
                InputStream in = new FileInputStream(wikipediaXmlDump);
                if (bzipped)
                    in = new BZip2CompressorInputStream(in);
                parser.parse(
                        new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
                        handler);
                readyDocumentsAndFactories.put(END);
            } catch (Exception e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };

    parsingThread.start();

    return new AbstractDocumentIterator() {
        private DocumentFactory lastFactory;

        @Override
        public Document nextDocument() throws IOException {
            try {
                final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take();
                if (lastFactory != null)
                    freeFactories.put(lastFactory);
                if (documentAndFactory == END)
                    return null;
                lastFactory = documentAndFactory.factory;
                return documentAndFactory.document;
            } catch (InterruptedException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };
}

From source file:it.unimi.di.big.mg4j.document.WikipediaDocumentSequence.java

@Override
public DocumentIterator iterator() throws IOException {
    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final MutableString nameSpaceAccumulator = new MutableString();
    final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>();
    final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16);
    for (int i = freeFactories.remainingCapacity(); i-- != 0;)
        freeFactories.add(this.factory.copy());
    final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>(
            freeFactories.size());/*from w w w.  ja  v a  2 s  .co m*/

    final SAXParser parser;
    try {
        parser = saxParserFactory.newSAXParser();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    final DefaultHandler handler = new DefaultHandler() {
        private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        private boolean inText;
        private boolean inTitle;
        private boolean inId;
        private boolean inTimestamp;
        private boolean inNamespaceDef;
        private boolean redirect;
        private MutableString text = new MutableString();
        private MutableString title = new MutableString();
        private MutableString id = new MutableString();
        private MutableString timestamp = new MutableString();
        private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>();
        {
            metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8");
            metadata.put(MetadataKeys.REDIRECT, redirectAnchors);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                redirect = inText = inTitle = inId = inTimestamp = false;
                text.length(0);
                title.length(0);
                id.length(0);
                timestamp.length(0);
            } else if ("text".equals(localName))
                inText = true;
            else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first id/title elements.
            else if ("id".equals(localName) && id.length() == 0)
                inId = true;
            else if ("timestamp".equals(localName) && timestamp.length() == 0)
                inTimestamp = true;
            else if ("redirect".equals(localName)) {
                redirect = true;
                if (attributes.getValue("title") != null)
                    // Accumulate the title of the page as virtual text of the redirect page.
                    synchronized (redirectAnchors) {
                        final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true);
                        redirectAnchors.add(
                                new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length())
                                        .append(baseURL).append(link), title.copy()));
                    }
            } else if ("namespace".equals(localName)) {
                // Found a new namespace
                inNamespaceDef = true;
                nameSpaceAccumulator.length(0);
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("namespace".equals(localName)) { // Collecting a namespace
                if (nameSpaceAccumulator.length() != 0)
                    nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase());
                return;
            }

            if ("namespaces".equals(localName)) { // All namespaces collected
                nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator);
                return;
            }

            if (!redirect) {
                if ("title".equals(localName)) {
                    // Set basic metadata for the page
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy());
                    String link = Encoder.encodeTitleToUrl(title.toString(), true);
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI,
                            new MutableString(baseURL.length() + link.length()).append(baseURL).append(link));
                    inTitle = false;
                } else if ("id".equals(localName)) {
                    metadata.put(MetadataKeys.ID, Long.valueOf(id.toString()));
                    inId = false;
                } else if ("timestamp".equals(localName)) {
                    try {
                        metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString()));
                    } catch (ParseException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                    inTimestamp = false;
                } else if ("text".equals(localName)) {
                    inText = false;
                    if (!keepNamespaced) {
                        // Namespaces are case-insensitive and language-dependent
                        final int pos = title.indexOf(':');
                        if (pos != -1 && nameSpaces.contains(title.substring(0, pos).toLowerCase()))
                            return;
                    }
                    try {
                        final MutableString html = new MutableString();
                        DocumentFactory freeFactory;
                        try {
                            freeFactory = freeFactories.take();
                        } catch (InterruptedException e) {
                            throw new RuntimeException(e.getMessage(), e);
                        }
                        if (parseText) {
                            if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page.
                                /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */
                                final MutableString newLinks = new MutableString();
                                for (int start = 0, end; (start = BRACKETS_OPEN.search(text,
                                        start)) != -1; start = end) {
                                    end = start;
                                    final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start);
                                    // Note that we don't escape title because we are working at the Wikipedia raw text level.
                                    if (endOfLink != -1) {
                                        newLinks.append(text.array(), start, endOfLink - start).append('|')
                                                .append(title).append("]]\n");
                                        end = endOfLink;
                                    }
                                    end++;
                                }

                                text.append(newLinks);
                            }
                            // We separate categories by OXOXO, so we don't get overflowing phrases.
                            final MutableString category = new MutableString();
                            for (int start = 0, end; (start = CATEGORY_START.search(text,
                                    start)) != -1; start = end) {
                                end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length());
                                if (end != -1)
                                    category.append(text.subSequence(start, end)).append(" OXOXO ");
                                else
                                    break;
                            }
                            metadata.put(MetadataKeys.CATEGORY, category);

                            // Heuristics to get the first paragraph
                            metadata.put(MetadataKeys.FIRSTPAR, new MutableString());
                            String plainText = wikiModel.render(new PlainTextConverter(true), text.toString());
                            for (int start = 0; start < plainText.length(); start++) {
                                //System.err.println("Examining " + plainText.charAt( start )  );
                                if (Character.isWhitespace(plainText.charAt(start)))
                                    continue;
                                if (plainText.charAt(start) == '{') {
                                    //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 )  + "\" -> " );
                                    start = BRACES_CLOSED.search(plainText, start);
                                    //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" );
                                    if (start == -1)
                                        break;
                                    start++;
                                } else if (plainText.charAt(start) == '[') {
                                    start = BRACKETS_CLOSED.search(plainText, start);
                                    if (start == -1)
                                        break;
                                    start++;
                                } else {
                                    final int end = plainText.indexOf('\n', start);
                                    if (end != -1)
                                        metadata.put(MetadataKeys.FIRSTPAR,
                                                new MutableString(plainText.substring(start, end)));
                                    break;
                                }
                            }

                            try {
                                wikiModel.render(new HTMLConverter(), text.toString(), html, false, true);
                                final Map<String, String> categories = wikiModel.getCategories();
                                // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering)
                                for (Entry<String, String> entry : categories.entrySet()) {
                                    final String key = entry.getKey();
                                    final String value = entry.getValue().trim();
                                    if (value.length() != 0) // There are empty such things
                                        html.append("\n<a href=\"").append(baseURL).append("Category:")
                                                .append(Encoder.encodeTitleToUrl(key, true)).append("\">")
                                                .append(HtmlEscapers.htmlEscaper().escape(key))
                                                .append("</a>\n");
                                }
                            } catch (Exception e) {
                                LOGGER.error("Unexpected exception while parsing " + title, e);
                            }
                        }
                        readyDocumentsAndFactories.put(new DocumentAndFactory(
                                freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8),
                                        new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)),
                                freeFactory));
                    } catch (InterruptedException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    } catch (IOException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                }
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
            if (inId)
                id.append(ch, start, length);
            if (inTimestamp)
                timestamp.append(ch, start, length);
            if (inNamespaceDef) {
                nameSpaceAccumulator.append(ch, start, length);
                inNamespaceDef = false; // Dirty, but it works
            }
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    final Thread parsingThread = new Thread() {
        public void run() {
            try {
                InputStream in = new FileInputStream(wikipediaXmlDump);
                if (bzipped)
                    in = new BZip2CompressorInputStream(in);
                parser.parse(
                        new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
                        handler);
                readyDocumentsAndFactories.put(END);
            } catch (Exception e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };

    parsingThread.start();

    return new AbstractDocumentIterator() {
        private DocumentFactory lastFactory;

        @Override
        public Document nextDocument() throws IOException {
            try {
                final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take();
                if (lastFactory != null)
                    freeFactories.put(lastFactory);
                if (documentAndFactory == END)
                    return null;
                lastFactory = documentAndFactory.factory;
                return documentAndFactory.document;
            } catch (InterruptedException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };
}