List of usage examples for org.dom4j Node setText
void setText(String text);
Sets the text data of this node or this method will throw an UnsupportedOperationException
if it is read-only.
From source file:dk.netarkivet.harvester.datamodel.H1HeritrixTemplate.java
License:Open Source License
/** * Auxiliary method to modify the orderXMLdoc Document with respect to setting the maximum number of bytes to * retrieve per domain. This method updates 'group-max-all-kb' element of the 'QuotaEnforcer' node, which again is a * subelement of 'pre-fetch-processors' node. with the value of the argument forceMaxBytesPerDomain * * @param forceMaxBytesPerDomain The maximum number of byte to retrieve per domain, or -1 for no limit. Note that * the number is divided by 1024 before being inserted into the orderXml, as Heritrix expects KB. * @throws PermissionDenied If unable to replace the QuotaEnforcer node of the orderXMLdoc Document * @throws IOFailure If the group-max-all-kb element cannot be found. TODO This group-max-all-kb check also be * performed in TemplateDAO.create, TemplateDAO.update *///from w w w . j a v a 2 s . co m @Override public void setMaxBytesPerDomain(Long forceMaxBytesPerDomain) { // get and set the group-max-all-kb Node of the orderXMLdoc: String xpath = GROUP_MAX_ALL_KB_XPATH; Node groupMaxSuccessKbNode = template.selectSingleNode(xpath); if (groupMaxSuccessKbNode != null) { if (forceMaxBytesPerDomain == 0) { groupMaxSuccessKbNode.setText("0"); } else if (forceMaxBytesPerDomain != Constants.HERITRIX_MAXBYTES_INFINITY) { // Divide by 1024 since Heritrix uses KB rather than bytes, // and add 1 to avoid to low limit due to rounding. groupMaxSuccessKbNode.setText( Long.toString((forceMaxBytesPerDomain / Constants.BYTES_PER_HERITRIX_BYTELIMIT_UNIT) + 1)); } else { groupMaxSuccessKbNode.setText(String.valueOf(Constants.HERITRIX_MAXBYTES_INFINITY)); } } else { throw new IOFailure("Unable to locate QuotaEnforcer object in order.xml: " + template.asXML()); } }
From source file:dk.netarkivet.harvester.datamodel.H1HeritrixTemplate.java
License:Open Source License
@Override public void setMaxJobRunningTime(Long maxJobRunningTimeSecondsL) { // get and set the "max-time-sec" node of the orderXMLdoc String xpath = MAXTIMESEC_PATH_XPATH; Node groupMaxTimeSecNode = template.selectSingleNode(xpath); if (groupMaxTimeSecNode != null) { String currentMaxTimeSec = groupMaxTimeSecNode.getText(); groupMaxTimeSecNode.setText(Long.toString(maxJobRunningTimeSecondsL)); log.trace("Value of groupMaxTimeSecNode changed from " + currentMaxTimeSec + " to " + maxJobRunningTimeSecondsL); } else {/* www. jav a 2s . co m*/ throw new IOFailure("Unable to locate xpath '" + xpath + "' in the order.xml: " + template.asXML()); } }
From source file:dk.netarkivet.harvester.datamodel.H1HeritrixTemplate.java
License:Open Source License
@Override public void setRecoverlogNode(File recoverlogGzFile) { final String RECOVERLOG_PATH_XPATH = "/crawl-order/controller/string[@name='recover-path']"; Node orderXmlNode = template.selectSingleNode(RECOVERLOG_PATH_XPATH); if (orderXmlNode != null) { orderXmlNode.setText(recoverlogGzFile.getAbsolutePath()); log.debug("The Heritrix recover path now refers to '{}'.", recoverlogGzFile.getAbsolutePath()); } else {//from www .j av a2 s . co m throw new IOFailure("Unable to locate the '" + RECOVERLOG_PATH_XPATH + "' element in order.xml: " + template.asXML()); } }
From source file:dk.netarkivet.harvester.harvesting.HarvestController.java
License:Open Source License
/** * Insert the correct recoverpath in the order.xml for the given harvestjob. * @param job A harvestjob/*from ww w . ja v a 2s . co m*/ * @param files Heritrix files related to this harvestjob. */ private void insertHeritrixRecoverPathInOrderXML(Job job, HeritrixFiles files) { Document order = job.getOrderXMLdoc(); final String RECOVERLOG_PATH_XPATH = "/crawl-order/controller/string[@name='recover-path']"; Node orderXmlNode = order.selectSingleNode(RECOVERLOG_PATH_XPATH); if (orderXmlNode != null) { orderXmlNode.setText(files.getRecoverBackupGzFile().getAbsolutePath()); log.debug("The Heritrix recover path now refers to '" + files.getRecoverBackupGzFile().getAbsolutePath() + "'."); job.setOrderXMLDoc(order); } else { throw new IOFailure( "Unable to locate the '" + RECOVERLOG_PATH_XPATH + "' element in order.xml: " + order.asXML()); } }
From source file:edu.ku.brc.specify.toycode.L18NStringResApp.java
License:Open Source License
/** * @param file/* ww w . j a v a2 s . c o m*/ */ public void process(final File fileArg, final boolean doDiffs) { try { String dirName = RES_PATH + "values-" + destLocale.getLanguage(); String path = dirName + File.separator + fileArg.getName(); File file = fileArg; if (doDiffs) { file = new File(path); } Document doc = readFileToDOM4J(new FileInputStream(file)); Node root = doc.getRootElement(); for (Object nodeObj : root.selectNodes("/resources/string")) { Node node = (Node) nodeObj; String name = XMLHelper.getAttr((Element) node, "name", null); if (doDiffs) { if (baseHash.get(name) != null) { continue; } } String text = node.getText(); String transText = translate(text); if (transText != null) { node.setText(transText); } System.out.println(name + "[" + text + "][" + transText + "]"); } File dir = new File(dirName); if (!dir.exists()) { dir.mkdir(); } FileOutputStream fos = new FileOutputStream(path); OutputFormat format = OutputFormat.createPrettyPrint(); XMLWriter writer = new XMLWriter(fos, format); writer.write(doc); writer.flush(); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.scripps.fl.pubchem.web.pug.PowerUserGateway.java
License:Apache License
protected Document createStatusDocument(String requestId) throws Exception { InputStream is = getClass().getResourceAsStream("/edu/scripps/fl/pubchem/pug/PugStatus.xml"); Document doc = getDocument(is); Node node = doc.selectSingleNode("//PCT-Request_reqid"); node.setText("" + requestId); return doc;/*from ww w.j av a 2s . c om*/ }
From source file:edu.scripps.fl.pubchem.web.pug.PUGRequest.java
License:Apache License
private static void setResponseIds2(Document document, Type type, Collection<Object> ids) throws Exception { Node node = document.selectSingleNode("//PCT-ID-List_db"); node.setText(type.getDatabase()); node = document.selectSingleNode(".//PCT-ID-List_uids"); for (Node child : (List<Node>) node.selectNodes("*")) child.detach();//from w w w. j a v a 2 s. c o m for (Object id : ids) { Element aidElem = DocumentHelper.createElement("PCT-ID-List_uids_E"); aidElem.setText(id.toString()); ((Element) node).add(aidElem); } }
From source file:edu.scripps.fl.pubchem.web.pug.PUGRequest.java
License:Apache License
public static void setResponseIds(Document document, Type type, Collection<Object> ids) throws Exception { String idTypePath = "//PCT-QueryAssayData_"; idTypePath += Type.AID.equals(type) ? "aids" : "scids"; Node localRoot = document.selectSingleNode(idTypePath); Node node = localRoot.selectSingleNode(".//PCT-ID-List_db"); node.setText(type.getDatabase()); node = localRoot.selectSingleNode(".//PCT-ID-List_uids"); // if (node == null) // throw new Exception("Cannot find PCT-ID-List_uids node"); for (Node child : (List<Node>) node.selectNodes("*")) child.detach();// w w w .ja va 2 s . c o m for (Object id : ids) { Element aidElem = DocumentHelper.createElement("PCT-ID-List_uids_E"); aidElem.setText(id.toString()); ((Element) node).add(aidElem); } }
From source file:edu.scripps.fl.pubchem.web.pug.PUGRequest.java
License:Apache License
public static void setOutputType(Document document, Output output) throws Exception { Node node = document.selectSingleNode("//PCT-QueryAssayData_output"); if (node == null) throw new Exception("Document does not contain a PCT-QueryAssayData_output node"); if (Output.XML.equals(output)) { node.setText("1"); ((Element) node).attribute("value").setText("assay-xml"); } else if (Output.ASN1.equals(output)) { node.setText("2"); ((Element) node).attribute("value").setText("assay-text-asn"); } else if (Output.CSV.equals(output)) { node.setText("4"); ((Element) node).attribute("value").setText("csv"); }//w w w . j a v a 2 s. com }
From source file:edu.ucsd.library.xdre.imports.RDFDAMS4ImportHandler.java
/** * Procedure to populate the RDF metadata and ingest the files *//* w w w. j a v a2 s .co m*/ public boolean execute() throws Exception { if (filesPaths != null) { File file = null; // List the source files for (int i = 0; i < filesPaths.length; i++) { file = new File(filesPaths[i]); if (file.exists()) { listFile(filesMap, file); } } } String message = ""; Document doc = null; DamsURI damsURI = null; String oid = null; int fLen = rdfFiles.length; String currFile = null; SAXReader saxReader = new SAXReader(); for (int i = 0; i < fLen && !interrupted; i++) { currFile = rdfFiles[i].getName(); setStatus( "Processing external import for file " + currFile + " (" + (i + 1) + " of " + fLen + ") ... "); try { doc = saxReader.read(rdfFiles[i]); List<Node> nodes = doc.selectNodes("//@rdf:about"); for (int j = 0; j < nodes.size(); j++) { Node nUri = nodes.get(j); String iUri = nUri.getStringValue(); Node parentNode = nUri.getParent(); String nName = parentNode.getName(); if (iUri.endsWith("/COL") || !(iUri.startsWith("http") && iUri.indexOf("/ark:/") > 0)) { // Assign ARK if (nName.endsWith("Object") || nName.endsWith("Component") || nName.endsWith("File") || (((Element) parentNode).isRootElement() || (parentNode.getParent().isRootElement() && parentNode.getParent().getName().equals("RDF")))) { String objId = iUri; if (nName.endsWith("Component") || nName.endsWith("File")) { damsURI = DamsURI.toParts(iUri, null); objId = damsURI.getObject(); } String srcObjKey = objId + "::" + rdfFiles[i].getAbsolutePath(); oid = idsMap.get(srcObjKey); // Assign new ARK if (oid == null) { oid = getNewId(); idsMap.put(srcObjKey, oid); } if (nName.endsWith("Object")) { objId = oid; objRecords.put(objId, currFile); } else if (nName.endsWith("Component") || nName.endsWith("File")) { damsURI.setObject(oid); // XXX // Assign cid and fid for Component and FIle if required objId = damsURI.toString(); } else objId = oid; nUri.setText(objId); updateReference(doc, iUri, objId); } else { String field = null; Node tNode = null; String xPath = null; Map<String, String> props = new TreeMap<String, String>(); String elemXPath = parentNode.getPath(); if (nName.endsWith("Collection") || nName.endsWith("CollectionPart")) { // Retrieve the Collection record field = "title_tesim"; xPath = "dams:title/mads:Title/mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Loop through to locate the rdfs:label if not selected by xPath. Node n = parentNode.selectSingleNode("dams:title"); for (Iterator<Element> it = ((Element) n).elementIterator(); it.hasNext();) { Element elem = it.next(); if (elem.getNamespacePrefix().equals("mads") && elem.getName().equals("Title")) tNode = elem.selectSingleNode("mads:authoritativeLabel"); } } } /* else if (nName.endsWith("Language") || nName.endsWith("Authority") || nName.endsWith("Subject") || nName.endsWith("Name") || nName.endsWith("Topic") || nName.endsWith("GenreForm") || nName.endsWith("Temporal") || nName.endsWith("Geographic")){ // Subject, Authority records use mads:authoritativeLabel field = "name_tesim"; xPath = "mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); } */else if (nName.endsWith(COPYRIGHT)) { // Copyright records use dams:copyrightStatus, plus other properties in the next step. field = "status_tesim"; xPath = "dams:copyrightStatus"; tNode = parentNode.selectSingleNode(xPath); props = copyrightProperties(parentNode); } else if (nName.endsWith(LICENSE)) { // License records use dams:LicenseNote, plus other properties in the next step. field = "note_tesim"; xPath = "dams:licenseNote"; tNode = parentNode.selectSingleNode(xPath); props = licenseProperties(parentNode); } else if (nName.endsWith(OTHERRIGHTS)) { // Copyright records use dams:copyrightStatus, plus other properties in the next step. field = "otherRightsBasis_tesim"; xPath = "dams:otherRightsBasis"; tNode = parentNode.selectSingleNode(xPath); props = licenseProperties(parentNode); } else if (nName.endsWith(RELATEDRESOURCE)) { // RelatedResource records use dams:description, plus other properties in the next step. field = "description_tesim"; xPath = "dams:description"; tNode = parentNode.selectSingleNode(xPath); props = relatedResourceProperties(parentNode); } else if (nName.endsWith(SOURCECAPTURE)) { // SourceCapture records use dams:sourceType, plus other properties in the next step. field = "sourceType_tesim"; xPath = "dams:sourceType"; tNode = parentNode.selectSingleNode(xPath); props = sourceCaptureProperties(parentNode); } else if (elemXPath.indexOf("mads", elemXPath.lastIndexOf('/') + 1) >= 0) { // MADSScheme and Language if (nName.endsWith(MADSSCHEME) || nName.equals(LANGUAGE)) { field = "code_tesim"; xPath = "mads:code"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { field = "name_tesim"; xPath = "rdfs:label"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Loop through to locate the rdfs:label if not selected by xPath. for (Iterator<Element> it = ((Element) parentNode).elementIterator(); it .hasNext();) { Element elem = it.next(); if (elem.getNamespacePrefix().equals("rdfs") && elem.getName().equals("label")) tNode = elem; } } } } else { // Subject, Authority records use mads:authoritativeLabel field = "name_tesim"; xPath = "mads:authoritativeLabel"; tNode = parentNode.selectSingleNode(xPath); if (tNode == null) { // Try to use the mads:code for mapping when mads:authoritativeLabel is not available field = "code_tesim"; xPath = "mads:code"; tNode = parentNode.selectSingleNode(xPath); } // Mapping for mads:isMemberOfMADSScheme String madsScheme = null; Node madsSchemeNode = parentNode.selectSingleNode("mads:isMemberOfMADSScheme"); if (madsSchemeNode != null) { Node msValueNode = madsSchemeNode.selectSingleNode("@rdf:resource"); if (msValueNode != null) { madsScheme = madsSchemeNode.getStringValue(); props.put("scheme_tesim", madsScheme); } else if ((madsSchemeNode = madsSchemeNode .selectSingleNode("mads:MADSScheme")) != null && madsSchemeNode.hasContent()) { if ((msValueNode = madsSchemeNode .selectSingleNode("mads:code")) != null) { madsScheme = msValueNode.getText(); props.put("scheme_code_tesim", madsScheme); } else if ((msValueNode = madsSchemeNode .selectSingleNode("rdfs:label")) != null) { madsScheme = msValueNode.getText(); props.put("scheme_name_tesim", madsScheme); } } else { props.put("scheme_tesim", ""); } } else { props.put("scheme_tesim", null); } } } else { // XXX Other Rights records like Statute, License, Other Rights etc. field = "value_tesim"; xPath = "rdf:value"; tNode = parentNode.selectSingleNode(xPath); field = "code_tesim"; if (tNode == null) { xPath = "dams:code"; tNode = parentNode.selectSingleNode(xPath); } } if (tNode == null) { throw new Exception("Element " + xPath + " is missing from the " + nName + " record " + iUri + " in file " + currFile + "."); } updateDocument(doc, parentNode, field, tNode.getText(), props); } } else if (nName.endsWith("Object")) { objRecords.put(iUri, currFile); } } String dams4Rdf = doc.asXML(); logData("dams4_" + rdfFiles[i].getName(), dams4Rdf); // Ingest the records String subjectId = null; DamsURI objURI = null; List<DamsURI> objURIs = null; RDFStore graph = null; rdfStore = new RDFStore(); Model rdf = rdfStore.loadRDFXML(dams4Rdf); initHandler(); Model iRdf = null; int jLen = items.size(); for (int j = 0; j < jLen && !interrupted; j++) { graph = new RDFStore(); recordsCount++; // Add subject subjectId = items.get(j); try { setStatus("Processing metadata for record " + subjectId + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + " ... "); boolean succeeded = false; objURIs = objects.get(subjectId); for (int k = 0; k < objURIs.size(); k++) { objURI = objURIs.get(k); iRdf = rdfStore.querySubject(objURI.toString()); graph.merge(iRdf); } // Update object //log.info(j + " ingesting record " + subjectId + ":\n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT) + "\n\n"); succeeded = damsClient.updateObject(subjectId, graph.export(RDFStore.RDFXML_ABBREV_FORMAT), Constants.IMPORT_MODE_ADD); if (!succeeded) { if (metadataFailed.indexOf(currFile) < 0) failedCount++; metadataFailed.append(subjectId + " (" + currFile + "), \n"); message = "Metadata import for record " + subjectId + " failed (" + (j + 1) + " of " + jLen + ") in file " + currFile + "."; setStatus(message); logError(message + "\n Error RDF: \n" + graph.export(RDFStore.RDFXML_ABBREV_FORMAT)); } else { recordsIngested.add(subjectId); message = "Metadata import for record " + subjectId + " succeeded (" + (j + 1) + " of " + jLen + ") in file " + currFile + ". "; setStatus(message); logMessage(message); log.info(message); // Update SOLR fre records ingested. updateSOLR(subjectId); } } catch (Exception e) { e.printStackTrace(); if (metadataFailed.indexOf(currFile) < 0) failedCount++; metadataFailed.append(subjectId + " (" + currFile + "), \n"); message = "Metadata import failed: " + e.getMessage(); setStatus(message + " (" + (j + 1) + " of " + jLen + ") in file " + currFile + "."); logError(message); } try { Thread.sleep(10); } catch (InterruptedException e) { e.printStackTrace(); interrupted = true; failedCount++; metadataFailed.append(subjectId + " (" + currFile + ") \n"); message = "Metadata import interrupted for subject " + subjectId + ". \n Error: " + e.getMessage() + "\n"; setStatus("Canceled"); clearSession(); logError(message); } } // Ingest the source file if (importOption.equalsIgnoreCase("metadataAndFiles")) { uploadFiles(rdf, currFile); } } catch (Exception e) { e.printStackTrace(); failedCount++; message = "Import failed for " + currFile + ": " + e.getMessage(); setStatus(message + " (" + (i + 1) + " of " + fLen + ")."); logError(message); } finally { // Update SOLR for files uploaded int iLen = objWithFiles.size(); for (int j = 0; j < iLen && !interrupted; j++) { updateSOLR(objWithFiles.get(j)); } } setProgressPercentage(((i + 1) * 100) / fLen); try { Thread.sleep(10); } catch (InterruptedException e) { e.printStackTrace(); interrupted = true; failedCount++; message = "Import interrupted for oject in " + currFile + ". \n Error: " + e.getMessage() + "\n"; setStatus("Canceled"); clearSession(); logError(message); } } return exeResult; }