List of usage examples for org.dom4j Node getName
String getName();
getName
returns the name of this node.
From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2AWReader.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from ww w. ja v a 2 s .c o m*/ public void getNext(JCas jCas) throws IOException, CollectionException { int offset = 0; String s = ""; Element text = textIterator.next(); Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>(); Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>(); Map<String, String> sats = new HashMap<String, String>(); // Loop over all nodes to get the document text in order for (Iterator<Node> nodeIterator = text.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); String nodeName = node.getName(); if (nodeName == null) { offset += nodeText.length(); s += nodeText; continue; } // If the node is a satellite, create a LexicalItemConstituent if (nodeName.equals(SATELLITE_ELEMENT_NAME)) { String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME); LexicalItemConstituent lic = newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length()); lics.put(id, lic); } // If the node is a head, create a LexicalItemConstituent and a WSDItem else if (nodeName.equals(HEAD_ELEMENT_NAME)) { Element head = (Element) node; String id = head.attributeValue(ID_ATTRIBUTE_NAME); String satellites = head.attributeValue(SATELLITES_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length())); WSDItem wsdItem = newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), null, nodeText); wsdItems.put(id, wsdItem); if (satellites != null) { sats.put(id, satellites); } } // If the node is any other element, something is wrong else if (node.getNodeTypeName().equals("Entity") == false) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } offset += nodeText.length(); s += nodeText; } populateLexicalItemConstituents(jCas, wsdItems, lics, sats); jCas.setDocumentText(s); try { setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME)); } catch (URISyntaxException e) { throw new IOException(e); } textCount++; }
From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2LSReader.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from w ww . j a v a2 s . c o m public void getNext(JCas jCas) throws IOException, CollectionException { // If there are no more <instance>s in this <lexelt>, get the next // <lexelt> if (instanceIterator.hasNext() == false) { lexelt = lexeltIterator.next(); lexeltPOS = getLexeltPOS(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME)); lexeltLemma = getLexeltLemma(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME)); textCount++; instanceIterator = lexelt.elementIterator(INSTANCE_ELEMENT_NAME); } Element instance = instanceIterator.next(); Element context = instance.element(CONTEXT_ELEMENT_NAME); int offset = 0; String s = ""; Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>(); Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>(); Map<String, String> sats = new HashMap<String, String>(); // Loop over all nodes to get the document text in order for (Iterator<Node> nodeIterator = context.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); String nodeName = node.getName(); if (nodeName == null) { offset += nodeText.length(); s += nodeText; continue; } // If the node is a satellite, create a LexicalItemConstituent if (nodeName.equals(SATELLITE_ELEMENT_NAME)) { String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length())); } // If the node is a head, create a LexicalItemConstituent and a // WSDItem else if (nodeName.equals(HEAD_ELEMENT_NAME)) { String id = instance.attributeValue(ID_ATTRIBUTE_NAME); String satellites = ((Element) node).attributeValue(SATELLITES_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length())); wsdItems.put(id, newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), lexeltPOS, lexeltLemma)); if (satellites != null) { sats.put(id, satellites); } } // If the node is any other element, something is wrong else if (node.getNodeTypeName().equals("Entity") == false) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } offset += nodeText.length(); s += nodeText; } populateLexicalItemConstituents(jCas, wsdItems, lics, sats); jCas.setDocumentText(s); try { setDocumentMetadata(jCas, instance.attributeValue(ID_ATTRIBUTE_NAME)); } catch (URISyntaxException e) { throw new IOException(e); } }
From source file:edu.scripps.fl.dom4j.util.FormControlVisitorSupport.java
License:Apache License
public void visit(Element node) { // Visits the given Element if ("input".equalsIgnoreCase(node.getName())) { String value = node.valueOf("@value"); if ("checkbox".equalsIgnoreCase(node.valueOf("@type"))) if ("".equals(node.valueOf("@checked"))) value = ""; addParameter(node.valueOf("@name"), value); } else if ("textarea".equalsIgnoreCase(node.getName())) addParameter(node.valueOf("@name"), node.getText()); else if ("select".equalsIgnoreCase(node.getName())) { for (Node n : (List<Node>) node.selectNodes("option")) { for (Node attr : (List<Node>) ((Element) n).attributes()) { if ("selected".equalsIgnoreCase(attr.getName())) { addParameter(node.valueOf("@name"), n.valueOf("@value")); return; }//www.j a v a2 s . co m } } } }
From source file:edu.scripps.fl.pubchem.app.summary.NodesStage.java
License:Apache License
@Override public void innerProcess(Object obj) throws StageException { Document document = (Document) obj; try {/*from w ww.j av a 2s.c o m*/ Element setElem = (Element) document.selectSingleNode("/eSummaryResult/DocumentSummarySet"); Iterator<Node> iter = setElem.nodeIterator(); for (; iter.hasNext();) { Node node = iter.next(); if ("DocumentSummary".equals(node.getName())) // eUtils version 2.0 emit(node); } } catch (Exception ex) { throw new StageException(this, ex); } }
From source file:edu.scripps.fl.pubchem.PubChemFactory.java
License:Apache License
public void populateAssayFromSummaryDocument(PCAssay assay, Node docSumNode) throws Exception { Node errorNode = docSumNode.selectSingleNode("error"); if (errorNode != null) throw new Exception("Entrez error: " + errorNode.getText()); List<Node> list = docSumNode.selectNodes("*"); String uid = docSumNode.valueOf("@uid"); assay.setAID(Integer.parseInt(uid)); for (Node node : list) { String name = node.getName(); Object value = node.getText(); if (node.selectNodes("*").size() > 0) { } else {//from ww w . ja va2 s . com String property = propMap.getProperty(name); if (null != property) { Class clazz = PropertyUtils.getPropertyType(assay, property); if (clazz.isAssignableFrom(Date.class)) value = parseDate(value); BeanUtils.setProperty(assay, property, value); } else { if (!unprocessedProperties.containsKey(name)) { unprocessedProperties.put(name, ""); log.warn(String.format("Cannot determine PCAssay bean property '%s'", name)); } } } } String desc = assay.getDescription(); // eutils summary description doesn't contain new lines // so don't update it if it already has a value (when we populate via xml first). if (desc == null || "".equals(desc) || !desc.contains("\n")) { Node node = docSumNode.selectSingleNode("AssayDescription"); assay.setDescription(node.getText()); } return; }
From source file:edu.scripps.fl.pubchem.PubChemXMLParserFactory.java
License:Apache License
protected PCAssay populateAssayFromXMLNode(Node topNode) throws Exception { // String assayDescPath = "PC-AssaySubmit_assay/PC-AssaySubmit_assay_descr/PC-AssayDescription"; Node assayDescNode = null;/*from w ww . ja v a2 s .com*/ if (topNode.getName().equals("PC-AssayDescription")) assayDescNode = topNode; else { assayDescNode = topNode.selectSingleNode(".//PC-AssayDescription"); } if (assayDescNode == null) throw new Exception( String.format("Cannot find PC-AssayDescription node in provided node %s", topNode.getPath())); Node node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_id"); Integer aid = new Integer(node.getText()); try { PCAssay assay = new PCAssay(); if (aid > 0) assay.setAID(aid); node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_version"); Integer version = new Integer(node.getText()); assay.setVersion(version); node = assayDescNode.selectSingleNode("PC-AssayDescription_revision"); Integer revision = new Integer(node.getText()); assay.setRevision(revision); Node trackingNode = assayDescNode .selectSingleNode("PC-AssayDescription_aid-source/PC-Source/PC-Source_db/PC-DBTracking"); node = trackingNode.selectSingleNode("PC-DBTracking_name"); assay.setSourceName(node.getText()); node = trackingNode.selectSingleNode("PC-DBTracking_source-id/Object-id/Object-id_str"); assay.setExtRegId(node.getText()); // hold until date node = trackingNode.selectSingleNode("PC-DBTracking_date"); if (node != null) { String year = node.selectSingleNode("Date/Date_std/Date-std/Date-std_year").getText(); String month = node.selectSingleNode("Date/Date_std/Date-std/Date-std_month").getText(); String day = node.selectSingleNode("Date/Date_std/Date-std/Date-std_day").getText(); if (DEBUGGING) log.info("year: " + year + " month: " + month + " day: " + day); Calendar calendar = Calendar.getInstance(); calendar.set(Integer.parseInt(year), Integer.parseInt(month) - 1, Integer.parseInt(day)); assay.setHoldUntilDate(calendar.getTime()); if (DEBUGGING) log.info(calendar.getTime().toString()); } node = assayDescNode.selectSingleNode("PC-AssayDescription_name"); assay.setName(node.getText()); List<Node> nodes = assayDescNode .selectNodes("PC-AssayDescription_description/PC-AssayDescription_description_E"); assay.setDescription(join(nodes, separator)); nodes = assayDescNode.selectNodes("PC-AssayDescription_protocol/PC-AssayDescription_protocol_E"); assay.setProtocol(join(nodes, separator)); nodes = assayDescNode.selectNodes("PC-AssayDescription_comment/PC-AssayDescription_comment_E"); assay.setComment(join(nodes, separator)); node = assayDescNode.selectSingleNode("PC-AssayDescription_activity-outcome-method"); if (node != null) assay.setActivityOutcomeMethod(node.valueOf("@value")); node = assayDescNode .selectSingleNode("PC-AssayDescription_grant-number/PC-AssayDescription_grant-number_E"); if (node != null) assay.setGrantNumber(node.getText()); node = assayDescNode.selectSingleNode("PC-AssayDescription_project-category"); if (node != null) assay.setProjectCategory(node.valueOf("@value")); assay.getAssayXRefs().removeAll(assay.getAssayXRefs()); nodes = assayDescNode.selectNodes("PC-AssayDescription_xref/PC-AnnotatedXRef"); handleXRefs(assay, null, nodes); nodes = assayDescNode.selectNodes("PC-AssayDescription_target/PC-AssayTargetInfo"); handleTargetXRefs(assay, null, nodes); handlePanels(assay, assayDescNode); handleColumns(assay, assayDescNode); handleComments(assay, assayDescNode); return assay; } catch (Exception ex) { throw new RuntimeException("Problem with AID " + aid, ex); } }
From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportHandler.java
/** * Procedure to populate the RDF metadata and ingest the files *//*from ww w . j a v a2 s . c o m*/ public boolean execute() throws Exception { if (filesPaths != null) { File file = null; // List the source files for (int i = 0; i < filesPaths.length; i++) { file = new File(filesPaths[i]); if (file.exists()) { listFile(filesMap, file); } } } String message = ""; Document doc = null; DamsURI damsURI = null; String oid = null; int fLen = rdfFiles.length; String currFile = null; SAXReader saxReader = new SAXReader(); for (int i = 0; i < fLen && !interrupted; i++) { currFile = rdfFiles[i].getName(); setStatus( "Processing external import for file " + currFile + " (" + (i + 1) + " of " + fLen + ") ... "); try { doc = saxReader.read(rdfFiles[i]); List<Node> nodes = doc.selectNodes("//@rdf:about"); for (int j = 0; j < nodes.size(); j++) { Node nUri = nodes.get(j); String iUri = nUri.getStringValue(); Node parentNode = nUri.getParent(); String nName = parentNode.getName(); if (iUri.endsWith("/COL") || !(iUri.startsWith("http") && iUri.indexOf("/ark:/") > 0)) { // Assign ARK if (nName.endsWith("Object") || nName.endsWith("Component") || nName.endsWith("File") || (((Element) parentNode).isRootElement() || (parentNode.getParent().isRootElement() && parentNode.getParent().getName().equals("RDF")))) { String objId = iUri; if (nName.endsWith("Component") || nName.endsWith("File")) { damsURI = DamsURI.toParts(iUri, null); objId = damsURI.getObject(); } String srcObjKey = objId + "::" + rdfFiles[i].getAbsolutePath(); oid = idsMap.get(srcObjKey); // Assign new ARK if (oid == null) { oid = getNewId(); idsMap.put(srcObjKey, oid); } if (nName.endsWith("Object")) { objId = oid; objRecords.put(objId, currFile); } else if (nName.endsWith("Component") || nName.endsWith("File")) { damsURI.setObject(oid); // XXX // Assign cid and fid for Component and FIle if required objId = damsURI.toString(); } else objId = oid; nUri.setText(objId); updateReference(doc, iUri, objId); } else { String field = null; Node tNode = null; String xPath = null; Map<String, String> props = new TreeMap<String, String>(); String elemXPath = parentNode.getPath(); if (nName.endsWith("Collection") || nName.endsWith("CollectionPart")) { // Retrieve the Collection record field = "title_tesim"; xPath = "dams:title/mads:Title/mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Loop through to locate the rdfs:label if not selected by xPath. Node n = parentNode.selectSingleNode("dams:title"); for (Iterator<Element> it = ((Element) n).elementIterator(); it.hasNext();) { Element elem = it.next(); if (elem.getNamespacePrefix().equals("mads") && elem.getName().equals("Title")) tNode = elem.selectSingleNode("mads:authoritativeLabel"); } } } /* else if (nName.endsWith("Language") || nName.endsWith("Authority") || nName.endsWith("Subject") || nName.endsWith("Name") || nName.endsWith("Topic") || nName.endsWith("GenreForm") || nName.endsWith("Temporal") || nName.endsWith("Geographic")){ // Subject, Authority records use mads:authoritativeLabel field = "name_tesim"; xPath = "mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); } */else if (nName.endsWith(COPYRIGHT)) { // Copyright records use dams:copyrightStatus, plus other properties in the next step. field = "status_tesim"; xPath = "dams:copyrightStatus"; tNode = parentNode.selectSingleNode(xPath); props = copyrightProperties(parentNode); } else if (nName.endsWith(LICENSE)) { // License records use dams:LicenseNote, plus other properties in the next step. field = "note_tesim"; xPath = "dams:licenseNote"; tNode = parentNode.selectSingleNode(xPath); props = licenseProperties(parentNode); } else if (nName.endsWith(OTHERRIGHTS)) { // Copyright records use dams:copyrightStatus, plus other properties in the next step. field = "otherRightsBasis_tesim"; xPath = "dams:otherRightsBasis"; tNode = parentNode.selectSingleNode(xPath); props = licenseProperties(parentNode); } else if (nName.endsWith(RELATEDRESOURCE)) { // RelatedResource records use dams:description, plus other properties in the next step. field = "description_tesim"; xPath = "dams:description"; tNode = parentNode.selectSingleNode(xPath); props = relatedResourceProperties(parentNode); } else if (nName.endsWith(SOURCECAPTURE)) { // SourceCapture records use dams:sourceType, plus other properties in the next step. field = "sourceType_tesim"; xPath = "dams:sourceType"; tNode = parentNode.selectSingleNode(xPath); props = sourceCaptureProperties(parentNode); } else if (elemXPath.indexOf("mads", elemXPath.lastIndexOf('/') + 1) >= 0) { // MADSScheme and Language if (nName.endsWith(MADSSCHEME) || nName.equals(LANGUAGE)) { field = "code_tesim"; xPath = "mads:code"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { field = "name_tesim"; xPath = "rdfs:label"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Loop through to locate the rdfs:label if not selected by xPath. for (Iterator<Element> it = ((Element) parentNode).elementIterator(); it .hasNext();) { Element elem = it.next(); if (elem.getNamespacePrefix().equals("rdfs") && elem.getName().equals("label")) tNode = elem; } } } } else { // Subject, Authority records use mads:authoritativeLabel field = "name_tesim"; xPath = "mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Try to use the mads:code for mapping when mads:authoritativeLabel is not available field = "code_tesim"; xPath = "mads:code"; tNode = parentNode.selectSingleNode(xPath); } // Mapping for mads:isMemberOfMADSScheme String madsScheme = null; Node madsSchemeNode = parentNode.selectSingleNode("mads:isMemberOfMADSScheme"); if (madsSchemeNode != null) { Node msValueNode = madsSchemeNode.selectSingleNode("@rdf:resource"); if (msValueNode != null) { madsScheme = madsSchemeNode.getStringValue(); props.put("scheme_tesim", madsScheme); } else if ((madsSchemeNode = madsSchemeNode .selectSingleNode("mads:MADSScheme")) != null && madsSchemeNode.hasContent()) { if ((msValueNode = madsSchemeNode .selectSingleNode("mads:code")) != null) { madsScheme = msValueNode.getText(); props.put("scheme_code_tesim", madsScheme); } else if ((msValueNode = madsSchemeNode .selectSingleNode("rdfs:label")) != null) { madsScheme = msValueNode.getText(); props.put("scheme_name_tesim", madsScheme); } } else { props.put("scheme_tesim", ""); } } else { props.put("scheme_tesim", null); } } } else { // XXX Other Rights records like Statute, License, Other Rights etc. field = "value_tesim"; xPath = "rdf:value"; tNode = parentNode.selectSingleNode(xPath); field = "code_tesim"; if (tNode == null) { xPath = "dams:code"; tNode = parentNode.selectSingleNode(xPath); } } if (tNode == null) { throw new Exception("Element " + xPath + " is missing from the " + nName + " record " + iUri + " in file " + currFile + "."); } updateDocument(doc, parentNode, field, tNode.getText(), props); } } else if (nName.endsWith("Object")) { objRecords.put(iUri, currFile); } } String dams4Rdf = doc.asXML(); logData("dams4_" + rdfFiles[i].getName(), dams4Rdf); // Ingest the records String subjectId = null; DamsURI objURI = null; List<DamsURI> objURIs = null; RDFStore graph = null; rdfStore = new RDFStore(); Model rdf = rdfStore.loadRDFXML(dams4Rdf); initHandler(); Model iRdf = null; int jLen = items.size(); for (int j = 0; j < jLen && !interrupted; j++) { graph = new RDFStore(); recordsCount++; // Add subject subjectId = items.get(j); try { setStatus("Processing metadata for record " + subjectId + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + " ... "); boolean succeeded = false; objURIs = objects.get(subjectId); for (int k = 0; k < objURIs.size(); k++) { objURI = objURIs.get(k); iRdf = rdfStore.querySubject(objURI.toString()); graph.merge(iRdf); } // Update object //log.info(j + " ingesting record " + subjectId + ":\n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT) + "\n\n"); succeeded = damsClient.updateObject(subjectId, graph.export(RDFStore.RDFXML_ABBREV_FORMAT), Constants.IMPORT_MODE_ADD); if (!succeeded) { if (metadataFailed.indexOf(currFile) < 0) failedCount++; metadataFailed.append(subjectId + " (" + currFile + "), \n"); message = "Metadata import for record " + subjectId + " failed (" + (j + 1) + " of " + jLen + ") in file " + currFile + "."; setStatus(message); logError(message + "\n Error RDF: \n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT)); } else { recordsIngested.add(subjectId); message = "Metadata import for record " + subjectId + " succeeded (" + (j + 1) + " of " + jLen + ") in file " + currFile + ". "; setStatus(message); logMessage(message); log.info(message); // Update SOLR fre records ingested. updateSOLR(subjectId); } } catch (Exception e) { e.printStackTrace(); if (metadataFailed.indexOf(currFile) < 0) failedCount++; metadataFailed.append(subjectId + " (" + currFile + "), \n"); message = "Metadata import failed: " + e.getMessage(); setStatus(message + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + "."); logError(message); } try { Thread.sleep(10); } catch (InterruptedException e) { e.printStackTrace(); interrupted = true; failedCount++; metadataFailed.append(subjectId + " (" + currFile + ") \n"); message = "Metadata import interrupted for subject " + subjectId + ". \n Error: " + e.getMessage() + "\n"; setStatus("Canceled"); clearSession(); logError(message); } } // Ingest the source file if (importOption.equalsIgnoreCase("metadataAndFiles")) { uploadFiles(rdf, currFile); } } catch (Exception e) { e.printStackTrace(); failedCount++; message = "Import failed for " + currFile + ": " + e.getMessage(); setStatus(message + " (" + (i + 1) + " of " + fLen + ")."); logError(message); } finally { // Update SOLR for files uploaded int iLen = objWithFiles.size(); for (int j = 0; j < iLen && !interrupted; j++) { updateSOLR(objWithFiles.get(j)); } } setProgressPercentage(((i + 1) * 100) / fLen); try { Thread.sleep(10); } catch (InterruptedException e) { e.printStackTrace(); interrupted = true; failedCount++; message = "Import interrupted for oject in " + currFile + ". \n Error: " + e.getMessage() + "\n"; setStatus("Canceled"); clearSession(); logError(message); } } return exeResult; }
From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportHandler.java
/** * Update record for resource linking/*from w w w . ja va2s .c o m*/ * @param url * @param node */ public void toResourceLinking(String url, Node record) { Element pNode = record.getParent(); if (pNode.getName().endsWith("List") && !record.getName().toLowerCase().endsWith(pNode.getName().toLowerCase())) { //List elements record.setName("rdf:Description"); ((Element) record).clearContent(); ((Element) record).selectSingleNode("@rdf:about").setText(toDamsUrl(url)); } else { pNode.addAttribute("rdf:resource", toDamsUrl(url)); record.detach(); } }
From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportTsHandler.java
/** * Procedure to populate the RDF metadata and ingest the files *///from w w w . j a v a 2 s. c om @Override public boolean execute() throws Exception { if (filesPaths != null) { File file = null; // List the source files for (int i = 0; i < filesPaths.length; i++) { file = new File(filesPaths[i]); if (file.exists()) { listFile(filesMap, file); } } } String message = ""; Document doc = null; DamsURI damsURI = null; List<String> recordsToReplace = null; String oid = null; int fLen = rdfFiles.length; String currFile = null; SAXReader saxReader = new SAXReader(); for (int i = 0; i < fLen && !interrupted; i++) { if (i == 0) logMessage("Object Import status:\n[Object title] - [URI] - [Status] - [Timestamp]"); recordsToReplace = new ArrayList<>(); currFile = rdfFiles[i].getName(); preprocessedTimestamp = damsDateFormat.format(rdfFiles[i].lastModified()); setStatus( "Processing external import for file " + currFile + " (" + (i + 1) + " of " + fLen + ") ... "); try { doc = saxReader.read(rdfFiles[i]); List<Node> nodes = doc.selectNodes("//@rdf:about"); for (int j = 0; j < nodes.size(); j++) { Node nUri = nodes.get(j); String iUri = nUri.getStringValue(); Node parentNode = nUri.getParent(); String nName = parentNode.getName(); if (iUri.endsWith("/COL") || !(iUri.startsWith("http") && iUri.indexOf("/ark:/") > 0)) { // Assign ARK if (nName.endsWith("Object") || nName.endsWith("Component") || nName.endsWith("File") || (((Element) parentNode).isRootElement() || (parentNode.getParent().isRootElement() && parentNode.getParent().getName().equals("RDF")))) { String objId = iUri; if (nName.endsWith("Component") || nName.endsWith("File")) { damsURI = DamsURI.toParts(iUri, null); objId = damsURI.getObject(); } String srcObjKey = objId + "::" + rdfFiles[i].getAbsolutePath(); oid = idsMap.get(srcObjKey); // Assign new ARK if (oid == null) { oid = getNewId(); idsMap.put(srcObjKey, oid); } if (nName.endsWith("Object")) { objId = oid; objRecords.put(objId, currFile); } else if (nName.endsWith("Component") || nName.endsWith("File")) { damsURI.setObject(oid); // XXX // Assign cid and fid for Component and FIle if required objId = damsURI.toString(); } else objId = oid; nUri.setText(objId); updateReference(doc, iUri, objId); } else { String field = null; Node tNode = null; String xPath = null; Map<String, String> props = new TreeMap<String, String>(); String elemXPath = parentNode.getPath(); if (nName.endsWith("Collection") || nName.endsWith("CollectionPart")) { // Retrieve the Collection record field = "dams:title/mads:authoritativeLabel"; xPath = "dams:title/mads:Title/mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Loop through to locate the rdfs:label if not selected by xPath. Node n = parentNode.selectSingleNode("dams:title"); for (Iterator<Element> it = ((Element) n).elementIterator(); it.hasNext();) { Element elem = it.next(); if (elem.getNamespacePrefix().equals("mads") && elem.getName().equals("Title")) tNode = elem.selectSingleNode("mads:authoritativeLabel"); } } } /* else if (nName.endsWith("Language") || nName.endsWith("Authority") || nName.endsWith("Subject") || nName.endsWith("Name") || nName.endsWith("Topic") || nName.endsWith("GenreForm") || nName.endsWith("Temporal") || nName.endsWith("Geographic")){ // Subject, Authority records use mads:authoritativeLabel field = "name_tesim"; xPath = "mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); } */else if (nName.endsWith(COPYRIGHT)) { // Copyright records use dams:copyrightStatus, plus other properties in the next step. field = "dams:copyrightStatus"; xPath = "dams:copyrightStatus"; tNode = parentNode.selectSingleNode(xPath); props = copyrightProperties(parentNode); } else if (nName.endsWith(LICENSE)) { // License records use dams:LicenseNote, plus other properties in the next step. field = "dams:licenseNote"; xPath = "dams:licenseNote"; tNode = parentNode.selectSingleNode(xPath); props = licenseProperties(parentNode); } else if (nName.endsWith(OTHERRIGHTS)) { // Copyright records use dams:copyrightStatus, plus other properties in the next step. field = "dams:otherRightsBasis"; xPath = "dams:otherRightsBasis"; tNode = parentNode.selectSingleNode(xPath); props = otherRightsProperties(parentNode); } else if (nName.endsWith(RELATEDRESOURCE)) { // RelatedResource records use dams:description, plus other properties in the next step. field = "dams:description"; xPath = "dams:description"; tNode = parentNode.selectSingleNode(xPath); props = relatedResourceProperties(parentNode); } else if (nName.endsWith(SOURCECAPTURE)) { // SourceCapture records use dams:sourceType, plus other properties in the next step. field = "dams:sourceType"; xPath = "dams:sourceType"; tNode = parentNode.selectSingleNode(xPath); props = sourceCaptureProperties(parentNode); } else if (nName.endsWith(NOTE)) { // Note records use rdf:value, dams:type, dams:displayLabel. field = "rdf:value"; xPath = "rdf:value"; tNode = parentNode.selectSingleNode(xPath); props = noteProperties(parentNode); } else if (nName.endsWith(PERMISSION) || nName.equals(RESTRICTION)) { field = "dams:type"; xPath = "dams:type"; tNode = parentNode.selectSingleNode(xPath); props = dateProperties(parentNode); } else if (elemXPath.indexOf("mads", elemXPath.lastIndexOf('/') + 1) >= 0) { // MADSScheme and Language if (nName.endsWith(MADSSCHEME)) { field = "mads:code"; xPath = "mads:code"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { field = "rdfs:label"; xPath = "rdfs:label"; tNode = parentNode.selectSingleNode("*[name()='" + xPath + "']"); } } else if (nName.endsWith(LANGUAGE)) { field = "mads:code"; xPath = "mads:code"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { field = "mads:authoritativeLabel"; xPath = "mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); } } else { // Subject, Authority records use mads:authoritativeLabel field = "mads:authoritativeLabel"; xPath = "mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Try to use the mads:code for mapping when mads:authoritativeLabel is not available field = "mads:code"; xPath = "mads:code"; tNode = parentNode.selectSingleNode(xPath); } else { Node diplayLabelNode = parentNode .selectSingleNode("*[name()='dams:displayLabel']"); props.put("dams:displayLabel", encodeLiteralValue(diplayLabelNode)); } // Mapping for mads:isMemberOfMADSScheme String madsScheme = null; Node madsSchemeNode = parentNode.selectSingleNode("mads:isMemberOfMADSScheme"); if (madsSchemeNode != null) { Node msValueNode = madsSchemeNode.selectSingleNode("@rdf:resource"); if (msValueNode != null) { madsScheme = madsSchemeNode.getStringValue(); props.put("mads:isMemberOfMADSScheme", "<" + madsScheme + ">"); } else if ((madsSchemeNode = madsSchemeNode .selectSingleNode("mads:MADSScheme")) != null && madsSchemeNode.hasContent()) { if ((msValueNode = madsSchemeNode .selectSingleNode("mads:code")) != null) { madsScheme = encodeLiteralValue(msValueNode); props.put("mads:isMemberOfMADSScheme/mads:code", madsScheme); } else if ((msValueNode = madsSchemeNode .selectSingleNode("rdfs:label")) != null) { madsScheme = encodeLiteralValue(msValueNode); props.put("mads:isMemberOfMADSScheme/rdfs:label", madsScheme); } } else { props.put("mads:isMemberOfMADSScheme/rdfs:label", "\"\""); } } else { props.put("mads:isMemberOfMADSScheme/rdfs:label", null); } } } else { // XXX Other Rights records like Statute, License, Other Rights etc. field = "rdf:value"; xPath = "rdf:value"; tNode = parentNode.selectSingleNode(xPath); field = "dams:code"; if (tNode == null) { xPath = "dams:code"; tNode = parentNode.selectSingleNode(xPath); } } if (tNode == null && !field.equals("dams:licenseNote")) { throw new Exception("Element " + xPath + " is missing from the " + nName + " record " + iUri + " in file " + currFile + "."); } updateDocument(doc, parentNode, field, encodeLiteralValue(tNode), props); } } else { if (nName.endsWith("Object")) objRecords.put(iUri, currFile); if (replace && !(nName.endsWith("Component") || nName.endsWith("File"))) recordsToReplace.add(iUri); } } String dams4Rdf = doc.asXML(); logData("dams4_" + rdfFiles[i].getName(), dams4Rdf); // Ingest the records String subjectId = null; DamsURI objURI = null; List<DamsURI> objURIs = null; RDFStore graph = null; rdfStore = new RDFStore(); rdfStore.loadRDFXML(dams4Rdf); initHandler(); Model iRdf = null; items = sortRecords(items); int jLen = items.size(); //System.out.println(currFile + " records found: " + jLen); for (int j = 0; j < jLen && !interrupted; j++) { processIndex = 0; status = new boolean[processNames.length]; messages = new StringBuilder[processNames.length]; for (int k = 0; k < messages.length; k++) { messages[k] = new StringBuilder(); } Model objModel = null; graph = new RDFStore(); recordsCount++; // Add subject subjectId = items.get(j); try { setStatus("Processing metadata for record " + subjectId + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + " ... "); boolean succeeded = false; objURIs = objects.get(subjectId); for (int k = 0; k < objURIs.size(); k++) { objURI = objURIs.get(k); iRdf = rdfStore.querySubject(objURI.toString()); objModel = graph.merge(iRdf); } // Update object //log.info(j + " ingesting record " + subjectId + ":\n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT) + "\n\n"); String importMode = Constants.IMPORT_MODE_ADD; if (replace && recordsToReplace.indexOf(subjectId) >= 0) importMode = Constants.IMPORT_MODE_ALL; succeeded = damsClient.updateObject(subjectId, graph.export(RDFStore.RDFXML_ABBREV_FORMAT), importMode); // Logging for Object RDF/XML validation status[processIndex] = succeeded; messages[processIndex].append(damsDateFormat.format(new Date())); if (!succeeded) { if (metadataFailed.indexOf(currFile) < 0) failedCount++; metadataFailed.append(subjectId + " (" + currFile + "), \n"); message = "Metadata import for record " + subjectId + " failed (" + (j + 1) + " of " + jLen + ") in file " + currFile + "."; setStatus(message); log.error(message + "\n Error RDF: \n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT)); } else { recordsIngested.add(subjectId); message = "Metadata import for record " + subjectId + " succeeded (" + (j + 1) + " of " + jLen + ") in file " + currFile + ". "; setStatus(message); log.info(message); processIndex++; status[processIndex] = succeeded; messages[processIndex].append(damsDateFormat.format(new Date())); // Ingest the source file only if metadata ingested successfully if (status[processIndex] && importOption.equalsIgnoreCase("metadataAndFiles")) { uploadFiles(objModel, currFile, subjectId); } } } catch (Exception e) { e.printStackTrace(); if (metadataFailed.indexOf(currFile) < 0) failedCount++; metadataFailed.append(subjectId + " (" + currFile + "), \n"); message = "Metadata import failed: " + e.getMessage(); setStatus(message + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + "."); log.error(message); String error = e.getMessage(); if (error.indexOf("Invalid RDF input") >= 0) { messages[processIndex].append(error); } else { status[processIndex] = true; messages[processIndex].append(damsDateFormat.format(new Date())); processIndex++; messages[processIndex].append(error); } } finally { int solrRequestIndex = processNames.length - 1; try { // Update SOLR for the record. status[solrRequestIndex] = updateSOLR(subjectId); messages[solrRequestIndex].append(damsDateFormat.format(new Date())); log.info("SOLR update requested for " + subjectId + ": " + damsClient.getRequestURL() + " " + status[solrRequestIndex]); } catch (Exception e) { e.printStackTrace(); exeResult = false; log.error("SOLR Index failed " + subjectId + ": " + e.getMessage()); messages[processNames.length - 1].append(e.getMessage()); } if (exeResult) exeResult = status[processIndex]; String resultMessage = "http://" + Constants.CLUSTER_HOST_NAME + ".ucsd.edu/dc/object/" + subjectId.substring(subjectId.lastIndexOf("/") + 1) + " - " + (status[processIndex] && status[solrRequestIndex] ? "successful" : "failed") + " - " + damsDateFormat.format(new Date()); if (objRecords.containsKey(subjectId)) { String title = getTitle(objModel, subjectId); if (StringUtils.isBlank(title)) title = "[Unknown Title]"; logMessage("\n" + title + " - " + resultMessage); if (!status[processIndex] || !status[solrRequestIndex]) { // Logging for pre-procesing - succeeded. logMessage("* Pre-processing - successful - " + preprocessedTimestamp); for (int k = 0; k <= processIndex; k++) { if (status[k] || !status[k] && status[k - 1]) { logMessage("* " + processNames[k] + " - " + (status[k] ? "successful" : "failed") + " - " + messages[k].toString()); } } // SOLR index request logging if (!status[solrRequestIndex]) logMessage("* " + processNames[solrRequestIndex] + " - " + (status[solrRequestIndex] ? "successful" : "failed") + " - " + messages[solrRequestIndex].toString()); } } else { ingestMessages.append("\n" + resultMessage); if (!status[processIndex]) { for (int k = 0; k + 1 < processIndex; k++) { if (status[k] || !status[k] && status[k - 1]) { logMessage("* " + processNames[k] + " - " + (status[k] ? "successful" : "failed") + " - " + messages[k].toString()); } } } } } try { Thread.sleep(10); } catch (InterruptedException e) { e.printStackTrace(); interrupted = true; exeResult = false; failedCount++; metadataFailed.append(subjectId + " (" + currFile + ") \n"); message = "Metadata import interrupted for subject " + subjectId + ". \n Error: " + e.getMessage() + "\n"; setStatus("Canceled"); clearSession(); log.error(message); logMessage("Client Cancled - " + damsDateFormat.format(new Date())); } } } catch (Exception e) { e.printStackTrace(); exeResult = false; failedCount++; message = "Import failed for " + currFile + ": " + e.getMessage(); setStatus(message + " (" + (i + 1) + " of " + fLen + ")."); log.error(message); } setProgressPercentage(((i + 1) * 100) / fLen); try { Thread.sleep(10); } catch (InterruptedException e) { e.printStackTrace(); exeResult = false; interrupted = true; failedCount++; message = "Import interrupted for oject in " + currFile + ". \n Error: " + e.getMessage() + "\n"; setStatus("Canceled"); clearSession(); log.error(message); messages[processIndex].append("Client canceled - " + damsDateFormat.format(new Date())); } } return exeResult; }
From source file:edu.umd.cs.marmoset.utilities.ParseWebXml.java
License:Apache License
public static ParseWebXml parse(String webXmlFileName) throws FileNotFoundException, DocumentException { File file = new File(webXmlFileName); FileInputStream fis = new FileInputStream(file); SAXReader reader = new SAXReader(); Document document = reader.read(fis); ParseWebXml webXml = new ParseWebXml(); Element root = document.getRootElement(); for (Iterator<?> ii = root.elementIterator("servlet-mapping"); ii.hasNext();) { Element elt = (Element) ii.next(); //System.out.print("name: " +elt.getName()); String urlPattern = null; String servletName = null; for (int jj = 0; jj < elt.nodeCount(); jj++) { Node node = elt.node(jj); if (node.getName() == null) continue; if (node.getName().equals(SERVLET_NAME)) { servletName = node.getText().trim(); if (webXml.tryToMapServlet(servletName, urlPattern)) break; } else if (node.getName().equals(SERVLET_URL_PATTERN)) { urlPattern = node.getText().trim(); if (webXml.tryToMapServlet(servletName, urlPattern)) break; }/* w ww .ja v a 2s .co m*/ } //System.out.println(" is mapped thusly: " +servletName +" => "+ urlPattern); } for (Iterator<?> ii = root.elementIterator("filter-mapping"); ii.hasNext();) { Element elt = (Element) ii.next(); //System.out.print("name: " +elt.getName()); String filterName = null; String urlPattern = null; for (int jj = 0; jj < elt.nodeCount(); jj++) { Node node = elt.node(jj); if (node.getName() == null) continue; if (node.getName().equals(FILTER_NAME)) { filterName = node.getText().trim(); if (webXml.tryToCreateFilter(filterName, urlPattern)) break; } else if (node.getName().equals(FILTER_URL_PATTERN)) { urlPattern = node.getText().trim(); if (webXml.tryToCreateFilter(filterName, urlPattern)) break; } } //System.out.println(" is mapped thusly: " +filterName+ " => "+ urlPattern); } return webXml; }