Example usage for org.dom4j Node getName

List of usage examples for org.dom4j Node getName

Introduction

In this page you can find the example usage for org.dom4j Node getName.

Prototype

String getName();

Source Link

Document

getName returns the name of this node.

Usage

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2AWReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from ww  w. ja v  a  2  s  .c  o  m*/
public void getNext(JCas jCas) throws IOException, CollectionException {
    int offset = 0;
    String s = "";
    Element text = textIterator.next();

    Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>();
    Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>();
    Map<String, String> sats = new HashMap<String, String>();

    // Loop over all nodes to get the document text in order
    for (Iterator<Node> nodeIterator = text.nodeIterator(); nodeIterator.hasNext();) {

        Node node = nodeIterator.next();
        String nodeText = node.getText().replace('\n', ' ');
        String nodeName = node.getName();

        if (nodeName == null) {
            offset += nodeText.length();
            s += nodeText;
            continue;
        }

        // If the node is a satellite, create a LexicalItemConstituent
        if (nodeName.equals(SATELLITE_ELEMENT_NAME)) {
            String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME);
            LexicalItemConstituent lic = newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset,
                    nodeText.length());
            lics.put(id, lic);
        }

        // If the node is a head, create a LexicalItemConstituent and a WSDItem
        else if (nodeName.equals(HEAD_ELEMENT_NAME)) {
            Element head = (Element) node;
            String id = head.attributeValue(ID_ATTRIBUTE_NAME);
            String satellites = head.attributeValue(SATELLITES_ATTRIBUTE_NAME);

            lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length()));
            WSDItem wsdItem = newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), null, nodeText);
            wsdItems.put(id, wsdItem);

            if (satellites != null) {
                sats.put(id, satellites);
            }
        }

        // If the node is any other element, something is wrong
        else if (node.getNodeTypeName().equals("Entity") == false) {
            throw new CollectionException("unknown_element", new Object[] { node.getName() });
        }

        offset += nodeText.length();
        s += nodeText;
    }

    populateLexicalItemConstituents(jCas, wsdItems, lics, sats);

    jCas.setDocumentText(s);

    try {
        setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME));
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

    textCount++;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2LSReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w  ww  . j a v  a2 s  . c o  m
public void getNext(JCas jCas) throws IOException, CollectionException {
    // If there are no more <instance>s in this <lexelt>, get the next
    // <lexelt>
    if (instanceIterator.hasNext() == false) {
        lexelt = lexeltIterator.next();
        lexeltPOS = getLexeltPOS(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME));
        lexeltLemma = getLexeltLemma(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME));
        textCount++;
        instanceIterator = lexelt.elementIterator(INSTANCE_ELEMENT_NAME);
    }

    Element instance = instanceIterator.next();
    Element context = instance.element(CONTEXT_ELEMENT_NAME);
    int offset = 0;
    String s = "";
    Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>();
    Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>();
    Map<String, String> sats = new HashMap<String, String>();

    // Loop over all nodes to get the document text in order
    for (Iterator<Node> nodeIterator = context.nodeIterator(); nodeIterator.hasNext();) {

        Node node = nodeIterator.next();
        String nodeText = node.getText().replace('\n', ' ');
        String nodeName = node.getName();

        if (nodeName == null) {
            offset += nodeText.length();
            s += nodeText;
            continue;
        }

        // If the node is a satellite, create a LexicalItemConstituent
        if (nodeName.equals(SATELLITE_ELEMENT_NAME)) {
            String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME);
            lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length()));
        }

        // If the node is a head, create a LexicalItemConstituent and a
        // WSDItem
        else if (nodeName.equals(HEAD_ELEMENT_NAME)) {
            String id = instance.attributeValue(ID_ATTRIBUTE_NAME);
            String satellites = ((Element) node).attributeValue(SATELLITES_ATTRIBUTE_NAME);

            lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length()));
            wsdItems.put(id,
                    newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), lexeltPOS, lexeltLemma));

            if (satellites != null) {
                sats.put(id, satellites);
            }
        }

        // If the node is any other element, something is wrong
        else if (node.getNodeTypeName().equals("Entity") == false) {
            throw new CollectionException("unknown_element", new Object[] { node.getName() });
        }

        offset += nodeText.length();
        s += nodeText;
    }

    populateLexicalItemConstituents(jCas, wsdItems, lics, sats);

    jCas.setDocumentText(s);

    try {
        setDocumentMetadata(jCas, instance.attributeValue(ID_ATTRIBUTE_NAME));
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

}

From source file:edu.scripps.fl.dom4j.util.FormControlVisitorSupport.java

License:Apache License

public void visit(Element node) { // Visits the given Element
    if ("input".equalsIgnoreCase(node.getName())) {
        String value = node.valueOf("@value");
        if ("checkbox".equalsIgnoreCase(node.valueOf("@type")))
            if ("".equals(node.valueOf("@checked")))
                value = "";
        addParameter(node.valueOf("@name"), value);
    } else if ("textarea".equalsIgnoreCase(node.getName()))
        addParameter(node.valueOf("@name"), node.getText());
    else if ("select".equalsIgnoreCase(node.getName())) {
        for (Node n : (List<Node>) node.selectNodes("option")) {
            for (Node attr : (List<Node>) ((Element) n).attributes()) {
                if ("selected".equalsIgnoreCase(attr.getName())) {
                    addParameter(node.valueOf("@name"), n.valueOf("@value"));
                    return;
                }//www.j  a v a2  s  . co m
            }
        }
    }
}

From source file:edu.scripps.fl.pubchem.app.summary.NodesStage.java

License:Apache License

@Override
public void innerProcess(Object obj) throws StageException {
    Document document = (Document) obj;
    try {/*from   w ww.j av a  2s.c  o m*/
        Element setElem = (Element) document.selectSingleNode("/eSummaryResult/DocumentSummarySet");
        Iterator<Node> iter = setElem.nodeIterator();
        for (; iter.hasNext();) {
            Node node = iter.next();
            if ("DocumentSummary".equals(node.getName())) // eUtils version 2.0 
                emit(node);
        }
    } catch (Exception ex) {
        throw new StageException(this, ex);
    }
}

From source file:edu.scripps.fl.pubchem.PubChemFactory.java

License:Apache License

public void populateAssayFromSummaryDocument(PCAssay assay, Node docSumNode) throws Exception {
    Node errorNode = docSumNode.selectSingleNode("error");
    if (errorNode != null)
        throw new Exception("Entrez error: " + errorNode.getText());

    List<Node> list = docSumNode.selectNodes("*");
    String uid = docSumNode.valueOf("@uid");
    assay.setAID(Integer.parseInt(uid));
    for (Node node : list) {
        String name = node.getName();
        Object value = node.getText();
        if (node.selectNodes("*").size() > 0) {

        } else {//from  ww w  .  ja va2  s . com
            String property = propMap.getProperty(name);
            if (null != property) {
                Class clazz = PropertyUtils.getPropertyType(assay, property);
                if (clazz.isAssignableFrom(Date.class))
                    value = parseDate(value);
                BeanUtils.setProperty(assay, property, value);
            } else {
                if (!unprocessedProperties.containsKey(name)) {
                    unprocessedProperties.put(name, "");
                    log.warn(String.format("Cannot determine PCAssay bean property '%s'", name));
                }
            }
        }
    }
    String desc = assay.getDescription();
    // eutils summary description doesn't contain new lines 
    // so don't update it if it already has a value (when we populate via xml first).
    if (desc == null || "".equals(desc) || !desc.contains("\n")) {
        Node node = docSumNode.selectSingleNode("AssayDescription");
        assay.setDescription(node.getText());
    }
    return;
}

From source file:edu.scripps.fl.pubchem.PubChemXMLParserFactory.java

License:Apache License

protected PCAssay populateAssayFromXMLNode(Node topNode) throws Exception {
    //      String assayDescPath = "PC-AssaySubmit_assay/PC-AssaySubmit_assay_descr/PC-AssayDescription";
    Node assayDescNode = null;/*from   w  ww  . ja  v  a2 s  .com*/
    if (topNode.getName().equals("PC-AssayDescription"))
        assayDescNode = topNode;
    else {
        assayDescNode = topNode.selectSingleNode(".//PC-AssayDescription");
    }
    if (assayDescNode == null)
        throw new Exception(
                String.format("Cannot find PC-AssayDescription node in provided node %s", topNode.getPath()));

    Node node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_id");
    Integer aid = new Integer(node.getText());

    try {
        PCAssay assay = new PCAssay();
        if (aid > 0)
            assay.setAID(aid);

        node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_version");
        Integer version = new Integer(node.getText());
        assay.setVersion(version);

        node = assayDescNode.selectSingleNode("PC-AssayDescription_revision");
        Integer revision = new Integer(node.getText());
        assay.setRevision(revision);

        Node trackingNode = assayDescNode
                .selectSingleNode("PC-AssayDescription_aid-source/PC-Source/PC-Source_db/PC-DBTracking");

        node = trackingNode.selectSingleNode("PC-DBTracking_name");
        assay.setSourceName(node.getText());

        node = trackingNode.selectSingleNode("PC-DBTracking_source-id/Object-id/Object-id_str");
        assay.setExtRegId(node.getText());

        // hold until date
        node = trackingNode.selectSingleNode("PC-DBTracking_date");
        if (node != null) {
            String year = node.selectSingleNode("Date/Date_std/Date-std/Date-std_year").getText();
            String month = node.selectSingleNode("Date/Date_std/Date-std/Date-std_month").getText();
            String day = node.selectSingleNode("Date/Date_std/Date-std/Date-std_day").getText();
            if (DEBUGGING)
                log.info("year: " + year + " month: " + month + " day: " + day);
            Calendar calendar = Calendar.getInstance();
            calendar.set(Integer.parseInt(year), Integer.parseInt(month) - 1, Integer.parseInt(day));
            assay.setHoldUntilDate(calendar.getTime());
            if (DEBUGGING)
                log.info(calendar.getTime().toString());
        }

        node = assayDescNode.selectSingleNode("PC-AssayDescription_name");
        assay.setName(node.getText());

        List<Node> nodes = assayDescNode
                .selectNodes("PC-AssayDescription_description/PC-AssayDescription_description_E");
        assay.setDescription(join(nodes, separator));

        nodes = assayDescNode.selectNodes("PC-AssayDescription_protocol/PC-AssayDescription_protocol_E");
        assay.setProtocol(join(nodes, separator));

        nodes = assayDescNode.selectNodes("PC-AssayDescription_comment/PC-AssayDescription_comment_E");
        assay.setComment(join(nodes, separator));

        node = assayDescNode.selectSingleNode("PC-AssayDescription_activity-outcome-method");
        if (node != null)
            assay.setActivityOutcomeMethod(node.valueOf("@value"));

        node = assayDescNode
                .selectSingleNode("PC-AssayDescription_grant-number/PC-AssayDescription_grant-number_E");
        if (node != null)
            assay.setGrantNumber(node.getText());

        node = assayDescNode.selectSingleNode("PC-AssayDescription_project-category");
        if (node != null)
            assay.setProjectCategory(node.valueOf("@value"));

        assay.getAssayXRefs().removeAll(assay.getAssayXRefs());

        nodes = assayDescNode.selectNodes("PC-AssayDescription_xref/PC-AnnotatedXRef");
        handleXRefs(assay, null, nodes);

        nodes = assayDescNode.selectNodes("PC-AssayDescription_target/PC-AssayTargetInfo");
        handleTargetXRefs(assay, null, nodes);

        handlePanels(assay, assayDescNode);

        handleColumns(assay, assayDescNode);

        handleComments(assay, assayDescNode);

        return assay;
    } catch (Exception ex) {
        throw new RuntimeException("Problem with AID " + aid, ex);
    }
}

From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportHandler.java

/**
 * Procedure to populate the RDF metadata and ingest the files
 *//*from   ww  w  . j a  v a2 s .  c o  m*/
public boolean execute() throws Exception {
    if (filesPaths != null) {
        File file = null;
        // List the source files
        for (int i = 0; i < filesPaths.length; i++) {
            file = new File(filesPaths[i]);
            if (file.exists()) {
                listFile(filesMap, file);
            }
        }
    }

    String message = "";
    Document doc = null;
    DamsURI damsURI = null;

    String oid = null;
    int fLen = rdfFiles.length;
    String currFile = null;
    SAXReader saxReader = new SAXReader();
    for (int i = 0; i < fLen && !interrupted; i++) {
        currFile = rdfFiles[i].getName();
        setStatus(
                "Processing external import for file " + currFile + " (" + (i + 1) + " of " + fLen + ") ... ");
        try {
            doc = saxReader.read(rdfFiles[i]);
            List<Node> nodes = doc.selectNodes("//@rdf:about");
            for (int j = 0; j < nodes.size(); j++) {
                Node nUri = nodes.get(j);
                String iUri = nUri.getStringValue();
                Node parentNode = nUri.getParent();
                String nName = parentNode.getName();
                if (iUri.endsWith("/COL") || !(iUri.startsWith("http") && iUri.indexOf("/ark:/") > 0)) {
                    // Assign ARK
                    if (nName.endsWith("Object") || nName.endsWith("Component") || nName.endsWith("File")
                            || (((Element) parentNode).isRootElement()
                                    || (parentNode.getParent().isRootElement()
                                            && parentNode.getParent().getName().equals("RDF")))) {
                        String objId = iUri;

                        if (nName.endsWith("Component") || nName.endsWith("File")) {
                            damsURI = DamsURI.toParts(iUri, null);
                            objId = damsURI.getObject();
                        }
                        String srcObjKey = objId + "::" + rdfFiles[i].getAbsolutePath();
                        oid = idsMap.get(srcObjKey);

                        // Assign new ARK
                        if (oid == null) {
                            oid = getNewId();
                            idsMap.put(srcObjKey, oid);
                        }

                        if (nName.endsWith("Object")) {
                            objId = oid;
                            objRecords.put(objId, currFile);
                        } else if (nName.endsWith("Component") || nName.endsWith("File")) {
                            damsURI.setObject(oid);
                            // XXX
                            // Assign cid and fid for Component and FIle if required
                            objId = damsURI.toString();
                        } else
                            objId = oid;
                        nUri.setText(objId);
                        updateReference(doc, iUri, objId);
                    } else {
                        String field = null;
                        Node tNode = null;
                        String xPath = null;
                        Map<String, String> props = new TreeMap<String, String>();
                        String elemXPath = parentNode.getPath();
                        if (nName.endsWith("Collection") || nName.endsWith("CollectionPart")) {
                            // Retrieve the Collection record
                            field = "title_tesim";
                            xPath = "dams:title/mads:Title/mads:authoritativeLabel";
                            tNode = parentNode.selectSingleNode(xPath);
                            if (tNode == null) {
                                // Loop through to locate the rdfs:label if not selected by xPath.
                                Node n = parentNode.selectSingleNode("dams:title");
                                for (Iterator<Element> it = ((Element) n).elementIterator(); it.hasNext();) {
                                    Element elem = it.next();
                                    if (elem.getNamespacePrefix().equals("mads")
                                            && elem.getName().equals("Title"))
                                        tNode = elem.selectSingleNode("mads:authoritativeLabel");
                                }
                            }
                        } /* else if (nName.endsWith("Language") || nName.endsWith("Authority") || nName.endsWith("Subject") || nName.endsWith("Name") || nName.endsWith("Topic") || nName.endsWith("GenreForm") || nName.endsWith("Temporal") || nName.endsWith("Geographic")){
                            // Subject, Authority records use mads:authoritativeLabel
                            field = "name_tesim";
                            xPath = "mads:authoritativeLabel";
                            tNode = parentNode.selectSingleNode(xPath);
                          } */else if (nName.endsWith(COPYRIGHT)) {
                            // Copyright records use dams:copyrightStatus, plus other properties in the next step.
                            field = "status_tesim";
                            xPath = "dams:copyrightStatus";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = copyrightProperties(parentNode);
                        } else if (nName.endsWith(LICENSE)) {
                            // License records use dams:LicenseNote, plus other properties in the next step.
                            field = "note_tesim";
                            xPath = "dams:licenseNote";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = licenseProperties(parentNode);
                        } else if (nName.endsWith(OTHERRIGHTS)) {
                            // Copyright records use dams:copyrightStatus, plus other properties in the next step.
                            field = "otherRightsBasis_tesim";
                            xPath = "dams:otherRightsBasis";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = licenseProperties(parentNode);
                        } else if (nName.endsWith(RELATEDRESOURCE)) {
                            // RelatedResource records use dams:description, plus other properties in the next step.
                            field = "description_tesim";
                            xPath = "dams:description";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = relatedResourceProperties(parentNode);
                        } else if (nName.endsWith(SOURCECAPTURE)) {
                            // SourceCapture records use dams:sourceType, plus other properties in the next step.
                            field = "sourceType_tesim";
                            xPath = "dams:sourceType";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = sourceCaptureProperties(parentNode);
                        } else if (elemXPath.indexOf("mads", elemXPath.lastIndexOf('/') + 1) >= 0) {
                            // MADSScheme and Language
                            if (nName.endsWith(MADSSCHEME) || nName.equals(LANGUAGE)) {
                                field = "code_tesim";
                                xPath = "mads:code";
                                tNode = parentNode.selectSingleNode(xPath);
                                if (tNode == null) {
                                    field = "name_tesim";
                                    xPath = "rdfs:label";
                                    tNode = parentNode.selectSingleNode(xPath);
                                    if (tNode == null) {
                                        // Loop through to locate the rdfs:label if not selected by xPath.
                                        for (Iterator<Element> it = ((Element) parentNode).elementIterator(); it
                                                .hasNext();) {
                                            Element elem = it.next();
                                            if (elem.getNamespacePrefix().equals("rdfs")
                                                    && elem.getName().equals("label"))
                                                tNode = elem;
                                        }
                                    }
                                }
                            } else {
                                // Subject, Authority records use mads:authoritativeLabel
                                field = "name_tesim";
                                xPath = "mads:authoritativeLabel";
                                tNode = parentNode.selectSingleNode(xPath);
                                if (tNode == null) {
                                    // Try to use the mads:code for mapping when mads:authoritativeLabel is not available
                                    field = "code_tesim";
                                    xPath = "mads:code";
                                    tNode = parentNode.selectSingleNode(xPath);
                                }
                                // Mapping for mads:isMemberOfMADSScheme
                                String madsScheme = null;
                                Node madsSchemeNode = parentNode.selectSingleNode("mads:isMemberOfMADSScheme");
                                if (madsSchemeNode != null) {
                                    Node msValueNode = madsSchemeNode.selectSingleNode("@rdf:resource");
                                    if (msValueNode != null) {
                                        madsScheme = madsSchemeNode.getStringValue();
                                        props.put("scheme_tesim", madsScheme);
                                    } else if ((madsSchemeNode = madsSchemeNode
                                            .selectSingleNode("mads:MADSScheme")) != null
                                            && madsSchemeNode.hasContent()) {
                                        if ((msValueNode = madsSchemeNode
                                                .selectSingleNode("mads:code")) != null) {
                                            madsScheme = msValueNode.getText();
                                            props.put("scheme_code_tesim", madsScheme);
                                        } else if ((msValueNode = madsSchemeNode
                                                .selectSingleNode("rdfs:label")) != null) {
                                            madsScheme = msValueNode.getText();
                                            props.put("scheme_name_tesim", madsScheme);
                                        }
                                    } else {
                                        props.put("scheme_tesim", "");
                                    }
                                } else {
                                    props.put("scheme_tesim", null);
                                }
                            }

                        } else {
                            // XXX Other Rights records like Statute, License, Other Rights etc. 
                            field = "value_tesim";
                            xPath = "rdf:value";
                            tNode = parentNode.selectSingleNode(xPath);
                            field = "code_tesim";
                            if (tNode == null) {
                                xPath = "dams:code";
                                tNode = parentNode.selectSingleNode(xPath);
                            }
                        }
                        if (tNode == null) {
                            throw new Exception("Element " + xPath + " is missing from the " + nName
                                    + " record " + iUri + " in file " + currFile + ".");
                        }
                        updateDocument(doc, parentNode, field, tNode.getText(), props);
                    }
                } else if (nName.endsWith("Object")) {
                    objRecords.put(iUri, currFile);
                }
            }

            String dams4Rdf = doc.asXML();
            logData("dams4_" + rdfFiles[i].getName(), dams4Rdf);

            // Ingest the records
            String subjectId = null;
            DamsURI objURI = null;
            List<DamsURI> objURIs = null;
            RDFStore graph = null;

            rdfStore = new RDFStore();
            Model rdf = rdfStore.loadRDFXML(dams4Rdf);
            initHandler();

            Model iRdf = null;
            int jLen = items.size();

            for (int j = 0; j < jLen && !interrupted; j++) {
                graph = new RDFStore();
                recordsCount++;
                // Add subject
                subjectId = items.get(j);
                try {
                    setStatus("Processing metadata for record " + subjectId + " (" + (j + 1) + " of " + jLen
                            + ") in file " + currFile + " ... ");
                    boolean succeeded = false;
                    objURIs = objects.get(subjectId);

                    for (int k = 0; k < objURIs.size(); k++) {
                        objURI = objURIs.get(k);
                        iRdf = rdfStore.querySubject(objURI.toString());
                        graph.merge(iRdf);
                    }

                    // Update object
                    //log.info(j + " ingesting record " + subjectId + ":\n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT) + "\n\n");

                    succeeded = damsClient.updateObject(subjectId, graph.export(RDFStore.RDFXML_ABBREV_FORMAT),
                            Constants.IMPORT_MODE_ADD);

                    if (!succeeded) {
                        if (metadataFailed.indexOf(currFile) < 0)
                            failedCount++;
                        metadataFailed.append(subjectId + " (" + currFile + "), \n");
                        message = "Metadata import for record " + subjectId + " failed (" + (j + 1) + " of "
                                + jLen + ") in file " + currFile + ".";
                        setStatus(message);
                        logError(message + "\n Error RDF: \n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT));
                    } else {
                        recordsIngested.add(subjectId);
                        message = "Metadata import for record " + subjectId + " succeeded (" + (j + 1) + " of "
                                + jLen + ") in file " + currFile + ". ";
                        setStatus(message);
                        logMessage(message);
                        log.info(message);

                        // Update SOLR fre records ingested.
                        updateSOLR(subjectId);
                    }

                } catch (Exception e) {
                    e.printStackTrace();
                    if (metadataFailed.indexOf(currFile) < 0)
                        failedCount++;
                    metadataFailed.append(subjectId + " (" + currFile + "), \n");
                    message = "Metadata import failed: " + e.getMessage();
                    setStatus(message + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + ".");
                    logError(message);
                }

                try {
                    Thread.sleep(10);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                    interrupted = true;
                    failedCount++;
                    metadataFailed.append(subjectId + " (" + currFile + ") \n");
                    message = "Metadata import interrupted for subject " + subjectId + ". \n Error: "
                            + e.getMessage() + "\n";
                    setStatus("Canceled");
                    clearSession();
                    logError(message);
                }
            }

            // Ingest the source file
            if (importOption.equalsIgnoreCase("metadataAndFiles")) {
                uploadFiles(rdf, currFile);
            }
        } catch (Exception e) {
            e.printStackTrace();
            failedCount++;
            message = "Import failed for " + currFile + ": " + e.getMessage();
            setStatus(message + " (" + (i + 1) + " of " + fLen + ").");
            logError(message);
        } finally {
            // Update SOLR for files uploaded
            int iLen = objWithFiles.size();
            for (int j = 0; j < iLen && !interrupted; j++) {
                updateSOLR(objWithFiles.get(j));
            }
        }

        setProgressPercentage(((i + 1) * 100) / fLen);

        try {
            Thread.sleep(10);
        } catch (InterruptedException e) {
            e.printStackTrace();
            interrupted = true;
            failedCount++;
            message = "Import interrupted for oject in " + currFile + ". \n Error: " + e.getMessage() + "\n";
            setStatus("Canceled");
            clearSession();
            logError(message);
        }
    }
    return exeResult;
}

From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportHandler.java

/**
 * Update record for resource linking/*from  w  w  w . ja  va2s .c o  m*/
 * @param url
 * @param node
 */
public void toResourceLinking(String url, Node record) {
    Element pNode = record.getParent();
    if (pNode.getName().endsWith("List")
            && !record.getName().toLowerCase().endsWith(pNode.getName().toLowerCase())) {
        //List elements
        record.setName("rdf:Description");
        ((Element) record).clearContent();
        ((Element) record).selectSingleNode("@rdf:about").setText(toDamsUrl(url));
    } else {
        pNode.addAttribute("rdf:resource", toDamsUrl(url));
        record.detach();
    }
}

From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportTsHandler.java

/**
 * Procedure to populate the RDF metadata and ingest the files
 *///from   w  w  w  .  j a  v a 2  s. c om
@Override
public boolean execute() throws Exception {
    if (filesPaths != null) {
        File file = null;
        // List the source files
        for (int i = 0; i < filesPaths.length; i++) {
            file = new File(filesPaths[i]);
            if (file.exists()) {
                listFile(filesMap, file);
            }
        }
    }

    String message = "";
    Document doc = null;
    DamsURI damsURI = null;

    List<String> recordsToReplace = null;
    String oid = null;
    int fLen = rdfFiles.length;
    String currFile = null;
    SAXReader saxReader = new SAXReader();
    for (int i = 0; i < fLen && !interrupted; i++) {
        if (i == 0)
            logMessage("Object Import status:\n[Object title]   -   [URI]   -   [Status]   -   [Timestamp]");

        recordsToReplace = new ArrayList<>();
        currFile = rdfFiles[i].getName();

        preprocessedTimestamp = damsDateFormat.format(rdfFiles[i].lastModified());

        setStatus(
                "Processing external import for file " + currFile + " (" + (i + 1) + " of " + fLen + ") ... ");
        try {
            doc = saxReader.read(rdfFiles[i]);
            List<Node> nodes = doc.selectNodes("//@rdf:about");
            for (int j = 0; j < nodes.size(); j++) {
                Node nUri = nodes.get(j);
                String iUri = nUri.getStringValue();
                Node parentNode = nUri.getParent();
                String nName = parentNode.getName();
                if (iUri.endsWith("/COL") || !(iUri.startsWith("http") && iUri.indexOf("/ark:/") > 0)) {
                    // Assign ARK
                    if (nName.endsWith("Object") || nName.endsWith("Component") || nName.endsWith("File")
                            || (((Element) parentNode).isRootElement()
                                    || (parentNode.getParent().isRootElement()
                                            && parentNode.getParent().getName().equals("RDF")))) {
                        String objId = iUri;

                        if (nName.endsWith("Component") || nName.endsWith("File")) {
                            damsURI = DamsURI.toParts(iUri, null);
                            objId = damsURI.getObject();
                        }
                        String srcObjKey = objId + "::" + rdfFiles[i].getAbsolutePath();
                        oid = idsMap.get(srcObjKey);

                        // Assign new ARK
                        if (oid == null) {
                            oid = getNewId();
                            idsMap.put(srcObjKey, oid);
                        }

                        if (nName.endsWith("Object")) {
                            objId = oid;
                            objRecords.put(objId, currFile);
                        } else if (nName.endsWith("Component") || nName.endsWith("File")) {
                            damsURI.setObject(oid);
                            // XXX
                            // Assign cid and fid for Component and FIle if required
                            objId = damsURI.toString();
                        } else
                            objId = oid;
                        nUri.setText(objId);
                        updateReference(doc, iUri, objId);
                    } else {
                        String field = null;
                        Node tNode = null;
                        String xPath = null;
                        Map<String, String> props = new TreeMap<String, String>();
                        String elemXPath = parentNode.getPath();
                        if (nName.endsWith("Collection") || nName.endsWith("CollectionPart")) {
                            // Retrieve the Collection record
                            field = "dams:title/mads:authoritativeLabel";
                            xPath = "dams:title/mads:Title/mads:authoritativeLabel";
                            tNode = parentNode.selectSingleNode(xPath);
                            if (tNode == null) {
                                // Loop through to locate the rdfs:label if not selected by xPath.
                                Node n = parentNode.selectSingleNode("dams:title");
                                for (Iterator<Element> it = ((Element) n).elementIterator(); it.hasNext();) {
                                    Element elem = it.next();
                                    if (elem.getNamespacePrefix().equals("mads")
                                            && elem.getName().equals("Title"))
                                        tNode = elem.selectSingleNode("mads:authoritativeLabel");
                                }
                            }
                        } /* else if (nName.endsWith("Language") || nName.endsWith("Authority") || nName.endsWith("Subject") || nName.endsWith("Name") || nName.endsWith("Topic") || nName.endsWith("GenreForm") || nName.endsWith("Temporal") || nName.endsWith("Geographic")){
                            // Subject, Authority records use mads:authoritativeLabel
                            field = "name_tesim";
                            xPath = "mads:authoritativeLabel";
                            tNode = parentNode.selectSingleNode(xPath);
                          } */else if (nName.endsWith(COPYRIGHT)) {
                            // Copyright records use dams:copyrightStatus, plus other properties in the next step.
                            field = "dams:copyrightStatus";
                            xPath = "dams:copyrightStatus";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = copyrightProperties(parentNode);
                        } else if (nName.endsWith(LICENSE)) {
                            // License records use dams:LicenseNote, plus other properties in the next step.
                            field = "dams:licenseNote";
                            xPath = "dams:licenseNote";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = licenseProperties(parentNode);
                        } else if (nName.endsWith(OTHERRIGHTS)) {
                            // Copyright records use dams:copyrightStatus, plus other properties in the next step.
                            field = "dams:otherRightsBasis";
                            xPath = "dams:otherRightsBasis";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = otherRightsProperties(parentNode);
                        } else if (nName.endsWith(RELATEDRESOURCE)) {
                            // RelatedResource records use dams:description, plus other properties in the next step.
                            field = "dams:description";
                            xPath = "dams:description";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = relatedResourceProperties(parentNode);
                        } else if (nName.endsWith(SOURCECAPTURE)) {
                            // SourceCapture records use dams:sourceType, plus other properties in the next step.
                            field = "dams:sourceType";
                            xPath = "dams:sourceType";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = sourceCaptureProperties(parentNode);
                        } else if (nName.endsWith(NOTE)) {
                            // Note records use rdf:value, dams:type, dams:displayLabel.
                            field = "rdf:value";
                            xPath = "rdf:value";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = noteProperties(parentNode);
                        } else if (nName.endsWith(PERMISSION) || nName.equals(RESTRICTION)) {
                            field = "dams:type";
                            xPath = "dams:type";
                            tNode = parentNode.selectSingleNode(xPath);
                            props = dateProperties(parentNode);
                        } else if (elemXPath.indexOf("mads", elemXPath.lastIndexOf('/') + 1) >= 0) {
                            // MADSScheme and Language
                            if (nName.endsWith(MADSSCHEME)) {
                                field = "mads:code";
                                xPath = "mads:code";
                                tNode = parentNode.selectSingleNode(xPath);
                                if (tNode == null) {
                                    field = "rdfs:label";
                                    xPath = "rdfs:label";
                                    tNode = parentNode.selectSingleNode("*[name()='" + xPath + "']");
                                }
                            } else if (nName.endsWith(LANGUAGE)) {
                                field = "mads:code";
                                xPath = "mads:code";
                                tNode = parentNode.selectSingleNode(xPath);
                                if (tNode == null) {
                                    field = "mads:authoritativeLabel";
                                    xPath = "mads:authoritativeLabel";
                                    tNode = parentNode.selectSingleNode(xPath);
                                }
                            } else {
                                // Subject, Authority records use mads:authoritativeLabel
                                field = "mads:authoritativeLabel";
                                xPath = "mads:authoritativeLabel";
                                tNode = parentNode.selectSingleNode(xPath);
                                if (tNode == null) {
                                    // Try to use the mads:code for mapping when mads:authoritativeLabel is not available
                                    field = "mads:code";
                                    xPath = "mads:code";
                                    tNode = parentNode.selectSingleNode(xPath);
                                } else {
                                    Node diplayLabelNode = parentNode
                                            .selectSingleNode("*[name()='dams:displayLabel']");
                                    props.put("dams:displayLabel", encodeLiteralValue(diplayLabelNode));
                                }
                                // Mapping for mads:isMemberOfMADSScheme
                                String madsScheme = null;
                                Node madsSchemeNode = parentNode.selectSingleNode("mads:isMemberOfMADSScheme");
                                if (madsSchemeNode != null) {
                                    Node msValueNode = madsSchemeNode.selectSingleNode("@rdf:resource");
                                    if (msValueNode != null) {
                                        madsScheme = madsSchemeNode.getStringValue();
                                        props.put("mads:isMemberOfMADSScheme", "<" + madsScheme + ">");
                                    } else if ((madsSchemeNode = madsSchemeNode
                                            .selectSingleNode("mads:MADSScheme")) != null
                                            && madsSchemeNode.hasContent()) {
                                        if ((msValueNode = madsSchemeNode
                                                .selectSingleNode("mads:code")) != null) {
                                            madsScheme = encodeLiteralValue(msValueNode);
                                            props.put("mads:isMemberOfMADSScheme/mads:code", madsScheme);
                                        } else if ((msValueNode = madsSchemeNode
                                                .selectSingleNode("rdfs:label")) != null) {
                                            madsScheme = encodeLiteralValue(msValueNode);
                                            props.put("mads:isMemberOfMADSScheme/rdfs:label", madsScheme);
                                        }
                                    } else {
                                        props.put("mads:isMemberOfMADSScheme/rdfs:label", "\"\"");
                                    }
                                } else {
                                    props.put("mads:isMemberOfMADSScheme/rdfs:label", null);
                                }
                            }

                        } else {
                            // XXX Other Rights records like Statute, License, Other Rights etc. 
                            field = "rdf:value";
                            xPath = "rdf:value";
                            tNode = parentNode.selectSingleNode(xPath);
                            field = "dams:code";
                            if (tNode == null) {
                                xPath = "dams:code";
                                tNode = parentNode.selectSingleNode(xPath);
                            }
                        }
                        if (tNode == null && !field.equals("dams:licenseNote")) {
                            throw new Exception("Element " + xPath + " is missing from the " + nName
                                    + " record " + iUri + " in file " + currFile + ".");
                        }

                        updateDocument(doc, parentNode, field, encodeLiteralValue(tNode), props);
                    }
                } else {
                    if (nName.endsWith("Object"))
                        objRecords.put(iUri, currFile);
                    if (replace && !(nName.endsWith("Component") || nName.endsWith("File")))
                        recordsToReplace.add(iUri);
                }
            }

            String dams4Rdf = doc.asXML();
            logData("dams4_" + rdfFiles[i].getName(), dams4Rdf);

            // Ingest the records
            String subjectId = null;
            DamsURI objURI = null;
            List<DamsURI> objURIs = null;
            RDFStore graph = null;

            rdfStore = new RDFStore();
            rdfStore.loadRDFXML(dams4Rdf);
            initHandler();

            Model iRdf = null;

            items = sortRecords(items);
            int jLen = items.size();
            //System.out.println(currFile + " records found: " + jLen);
            for (int j = 0; j < jLen && !interrupted; j++) {

                processIndex = 0;
                status = new boolean[processNames.length];
                messages = new StringBuilder[processNames.length];
                for (int k = 0; k < messages.length; k++) {
                    messages[k] = new StringBuilder();
                }

                Model objModel = null;
                graph = new RDFStore();
                recordsCount++;
                // Add subject
                subjectId = items.get(j);
                try {
                    setStatus("Processing metadata for record " + subjectId + " (" + (j + 1) + " of " + jLen
                            + ") in file " + currFile + " ... ");
                    boolean succeeded = false;
                    objURIs = objects.get(subjectId);

                    for (int k = 0; k < objURIs.size(); k++) {
                        objURI = objURIs.get(k);
                        iRdf = rdfStore.querySubject(objURI.toString());
                        objModel = graph.merge(iRdf);
                    }

                    // Update object
                    //log.info(j + " ingesting record " + subjectId + ":\n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT) + "\n\n");
                    String importMode = Constants.IMPORT_MODE_ADD;
                    if (replace && recordsToReplace.indexOf(subjectId) >= 0)
                        importMode = Constants.IMPORT_MODE_ALL;
                    succeeded = damsClient.updateObject(subjectId, graph.export(RDFStore.RDFXML_ABBREV_FORMAT),
                            importMode);

                    // Logging for Object RDF/XML validation
                    status[processIndex] = succeeded;
                    messages[processIndex].append(damsDateFormat.format(new Date()));

                    if (!succeeded) {
                        if (metadataFailed.indexOf(currFile) < 0)
                            failedCount++;
                        metadataFailed.append(subjectId + " (" + currFile + "), \n");
                        message = "Metadata import for record " + subjectId + " failed (" + (j + 1) + " of "
                                + jLen + ") in file " + currFile + ".";
                        setStatus(message);
                        log.error(message + "\n Error RDF: \n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT));
                    } else {
                        recordsIngested.add(subjectId);
                        message = "Metadata import for record " + subjectId + " succeeded (" + (j + 1) + " of "
                                + jLen + ") in file " + currFile + ". ";
                        setStatus(message);
                        log.info(message);

                        processIndex++;
                        status[processIndex] = succeeded;
                        messages[processIndex].append(damsDateFormat.format(new Date()));
                        // Ingest the source file only if metadata ingested successfully
                        if (status[processIndex] && importOption.equalsIgnoreCase("metadataAndFiles")) {
                            uploadFiles(objModel, currFile, subjectId);
                        }
                    }

                } catch (Exception e) {
                    e.printStackTrace();

                    if (metadataFailed.indexOf(currFile) < 0)
                        failedCount++;
                    metadataFailed.append(subjectId + " (" + currFile + "), \n");
                    message = "Metadata import failed: " + e.getMessage();
                    setStatus(message + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + ".");
                    log.error(message);

                    String error = e.getMessage();
                    if (error.indexOf("Invalid RDF input") >= 0) {
                        messages[processIndex].append(error);
                    } else {
                        status[processIndex] = true;
                        messages[processIndex].append(damsDateFormat.format(new Date()));
                        processIndex++;
                        messages[processIndex].append(error);
                    }
                } finally {
                    int solrRequestIndex = processNames.length - 1;
                    try {
                        // Update SOLR for the record.
                        status[solrRequestIndex] = updateSOLR(subjectId);
                        messages[solrRequestIndex].append(damsDateFormat.format(new Date()));
                        log.info("SOLR update requested for " + subjectId + ": " + damsClient.getRequestURL()
                                + " " + status[solrRequestIndex]);

                    } catch (Exception e) {
                        e.printStackTrace();
                        exeResult = false;
                        log.error("SOLR Index failed " + subjectId + ": " + e.getMessage());

                        messages[processNames.length - 1].append(e.getMessage());
                    }

                    if (exeResult)
                        exeResult = status[processIndex];

                    String resultMessage = "http://" + Constants.CLUSTER_HOST_NAME + ".ucsd.edu/dc/object/"
                            + subjectId.substring(subjectId.lastIndexOf("/") + 1) + " - "
                            + (status[processIndex] && status[solrRequestIndex] ? "successful" : "failed")
                            + " - " + damsDateFormat.format(new Date());
                    if (objRecords.containsKey(subjectId)) {
                        String title = getTitle(objModel, subjectId);
                        if (StringUtils.isBlank(title))
                            title = "[Unknown Title]";

                        logMessage("\n" + title + " - " + resultMessage);
                        if (!status[processIndex] || !status[solrRequestIndex]) {
                            // Logging for pre-procesing - succeeded. 
                            logMessage("* Pre-processing - successful - " + preprocessedTimestamp);
                            for (int k = 0; k <= processIndex; k++) {
                                if (status[k] || !status[k] && status[k - 1]) {
                                    logMessage("* " + processNames[k] + " - "
                                            + (status[k] ? "successful" : "failed") + " - "
                                            + messages[k].toString());
                                }
                            }

                            // SOLR index request logging
                            if (!status[solrRequestIndex])
                                logMessage("* " + processNames[solrRequestIndex] + " - "
                                        + (status[solrRequestIndex] ? "successful" : "failed") + " - "
                                        + messages[solrRequestIndex].toString());
                        }

                    } else {

                        ingestMessages.append("\n" + resultMessage);
                        if (!status[processIndex]) {
                            for (int k = 0; k + 1 < processIndex; k++) {
                                if (status[k] || !status[k] && status[k - 1]) {
                                    logMessage("* " + processNames[k] + " - "
                                            + (status[k] ? "successful" : "failed") + " - "
                                            + messages[k].toString());
                                }
                            }
                        }
                    }
                }

                try {
                    Thread.sleep(10);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                    interrupted = true;
                    exeResult = false;
                    failedCount++;
                    metadataFailed.append(subjectId + " (" + currFile + ") \n");
                    message = "Metadata import interrupted for subject " + subjectId + ". \n Error: "
                            + e.getMessage() + "\n";
                    setStatus("Canceled");
                    clearSession();
                    log.error(message);

                    logMessage("Client Cancled - " + damsDateFormat.format(new Date()));
                }
            }

        } catch (Exception e) {
            e.printStackTrace();
            exeResult = false;
            failedCount++;
            message = "Import failed for " + currFile + ": " + e.getMessage();
            setStatus(message + " (" + (i + 1) + " of " + fLen + ").");
            log.error(message);
        }

        setProgressPercentage(((i + 1) * 100) / fLen);

        try {
            Thread.sleep(10);
        } catch (InterruptedException e) {
            e.printStackTrace();
            exeResult = false;
            interrupted = true;
            failedCount++;
            message = "Import interrupted for oject in " + currFile + ". \n Error: " + e.getMessage() + "\n";
            setStatus("Canceled");
            clearSession();
            log.error(message);

            messages[processIndex].append("Client canceled - " + damsDateFormat.format(new Date()));
        }
    }
    return exeResult;
}

From source file:edu.umd.cs.marmoset.utilities.ParseWebXml.java

License:Apache License

public static ParseWebXml parse(String webXmlFileName) throws FileNotFoundException, DocumentException {
    File file = new File(webXmlFileName);

    FileInputStream fis = new FileInputStream(file);
    SAXReader reader = new SAXReader();
    Document document = reader.read(fis);

    ParseWebXml webXml = new ParseWebXml();

    Element root = document.getRootElement();

    for (Iterator<?> ii = root.elementIterator("servlet-mapping"); ii.hasNext();) {
        Element elt = (Element) ii.next();
        //System.out.print("name: " +elt.getName());

        String urlPattern = null;
        String servletName = null;
        for (int jj = 0; jj < elt.nodeCount(); jj++) {
            Node node = elt.node(jj);
            if (node.getName() == null)
                continue;
            if (node.getName().equals(SERVLET_NAME)) {
                servletName = node.getText().trim();
                if (webXml.tryToMapServlet(servletName, urlPattern))
                    break;
            } else if (node.getName().equals(SERVLET_URL_PATTERN)) {
                urlPattern = node.getText().trim();
                if (webXml.tryToMapServlet(servletName, urlPattern))
                    break;
            }/*  w ww  .ja  v  a  2s  .co m*/
        }
        //System.out.println(" is mapped thusly: " +servletName +" => "+ urlPattern);
    }

    for (Iterator<?> ii = root.elementIterator("filter-mapping"); ii.hasNext();) {
        Element elt = (Element) ii.next();
        //System.out.print("name: " +elt.getName());

        String filterName = null;
        String urlPattern = null;
        for (int jj = 0; jj < elt.nodeCount(); jj++) {
            Node node = elt.node(jj);
            if (node.getName() == null)
                continue;
            if (node.getName().equals(FILTER_NAME)) {
                filterName = node.getText().trim();
                if (webXml.tryToCreateFilter(filterName, urlPattern))
                    break;
            } else if (node.getName().equals(FILTER_URL_PATTERN)) {
                urlPattern = node.getText().trim();
                if (webXml.tryToCreateFilter(filterName, urlPattern))
                    break;
            }
        }
        //System.out.println(" is mapped thusly: " +filterName+ " => "+ urlPattern);

    }

    return webXml;
}