Java tutorial
/******************************************************************************* * Copyright 2013 Johns Hopkins University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package edu.jhu.pha.vospace.process; import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TimeZone; import org.apache.commons.configuration.Configuration; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.CompositeDetector; import org.apache.tika.detect.DefaultDetector; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.HttpHeaders; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MediaTypeRegistry; import org.apache.tika.mime.MimeTypes; import org.apache.tika.parser.AbstractParser; import org.apache.tika.parser.CompositeParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.pkg.SimulationDetector; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.type.TypeReference; import org.xml.sax.SAXException; import com.rabbitmq.client.QueueingConsumer; import edu.jhu.pha.vospace.QueueConnector; import edu.jhu.pha.vospace.SettingsServlet; import edu.jhu.pha.vospace.node.ContainerNode; import edu.jhu.pha.vospace.node.DataNode; import edu.jhu.pha.vospace.node.Node; import edu.jhu.pha.vospace.node.NodeFactory; import edu.jhu.pha.vospace.node.NodePath; import edu.jhu.pha.vospace.node.NodeType; import edu.jhu.pha.vospace.node.VospaceId; import edu.jhu.pha.vospace.oauth.UserHelper; public class NodeProcessor implements Runnable { private static final Logger logger = Logger.getLogger(NodeProcessor.class); private final static String EXTERNAL_LINK_PROPERTY = "ivo://ivoa.net/vospace/core#external_link"; private final static String PROCESSING_PROPERTY = "ivo://ivoa.net/vospace/core#processing"; private final static String ERROR_MESSAGE_PROPERTY = "ivo://ivoa.net/vospace/core#error_message"; private static final MediaTypeRegistry MIME_REGISTRY = new MimeTypes().getMediaTypeRegistry(); static Configuration conf = SettingsServlet.getConfig(); private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); static { dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); } @Override public void run() { QueueConnector.goAMQP("nodesProcessor", new QueueConnector.AMQPWorker<Boolean>() { @Override public Boolean go(com.rabbitmq.client.Connection conn, com.rabbitmq.client.Channel channel) throws IOException { channel.exchangeDeclare(conf.getString("process.exchange.nodeprocess"), "fanout", true); channel.exchangeDeclare(conf.getString("vospace.exchange.nodechanged"), "fanout", false); channel.queueDeclare(conf.getString("process.queue.nodeprocess"), true, false, false, null); channel.queueBind(conf.getString("process.queue.nodeprocess"), conf.getString("process.exchange.nodeprocess"), ""); QueueingConsumer consumer = new QueueingConsumer(channel); channel.basicConsume(conf.getString("process.queue.nodeprocess"), false, consumer); while (!Thread.currentThread().isInterrupted()) { Node node = null; try { QueueingConsumer.Delivery delivery = consumer.nextDelivery(); Map<String, Object> nodeData = (new ObjectMapper()).readValue(delivery.getBody(), 0, delivery.getBody().length, new TypeReference<HashMap<String, Object>>() { }); channel.basicAck(delivery.getEnvelope().getDeliveryTag(), false); node = NodeFactory.getNode(new VospaceId((String) nodeData.get("uri")), (String) nodeData.get("owner")); logger.debug("Node changed: " + nodeData.get("uri") + " " + nodeData.get("owner") + " " + node.getType()); switch (node.getType()) { case DATA_NODE: case STRUCTURED_DATA_NODE: case UNSTRUCTURED_DATA_NODE: { TikaInputStream inp = null; try { Metadata detectTikaMeta = new Metadata(); detectTikaMeta.set(Metadata.RESOURCE_NAME_KEY, node.getUri().getNodePath().getNodeName()); inp = TikaInputStream.get(node.exportData()); //MediaType type = new DefaultDetector().detect(inp, nodeTikaMeta); List<Detector> list = new ArrayList<Detector>(); list.add(new SimulationDetector()); list.add(new DefaultDetector()); Detector detector = new CompositeDetector(list); MediaType type = detector.detect(inp, detectTikaMeta); node.getNodeInfo().setContentType(type.toString()); node.getMetastore().storeInfo(node.getUri(), node.getNodeInfo()); JsonNode credentials = UserHelper.getProcessorCredentials(node.getOwner()); boolean makeStructured = false; List<String> externalLinks = new ArrayList<String>(); for (ProcessorConfig processorConf : ProcessingFactory.getInstance() .getProcessorConfigsForNode(node, credentials)) { Metadata nodeTikaMeta = new Metadata(); nodeTikaMeta.set(TikaCoreProperties.SOURCE, node.getUri().toString()); nodeTikaMeta.set("owner", (String) nodeData.get("owner")); nodeTikaMeta.set(TikaCoreProperties.TITLE, node.getUri().getNodePath().getNodeName()); nodeTikaMeta.add(TikaCoreProperties.METADATA_DATE, dateFormat .format(Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTime())); nodeTikaMeta.set(Metadata.CONTENT_LOCATION, ((DataNode) node).getHttpDownloadLink().toASCIIString()); nodeTikaMeta.set(Metadata.CONTENT_TYPE, type.toString()); AbstractParser parser; TikaConfig config = TikaConfig.getDefaultConfig(); if (processorConf.getTikaConfig() != null) { config = new TikaConfig( getClass().getResourceAsStream(processorConf.getTikaConfig())); } parser = new CompositeParser(config.getMediaTypeRegistry(), config.getParser()); Processor processor = Processor.fromProcessorConfig(processorConf); InputStream str = null; try { str = TikaInputStream.get(node.exportData()); parser.parse(str, processor.getContentHandler(), nodeTikaMeta, new ParseContext()); } finally { try { str.close(); } catch (Exception ex) { } } // now do out-of-tika processing of results try { processor.processNodeMeta(nodeTikaMeta, credentials.get(processorConf.getId())); logger.debug("Processing of " + node.getUri().toString() + " is finished."); } catch (Exception e) { logger.error("Error processing the node. " + e.getMessage()); e.printStackTrace(); processError(node, e); } String[] links = nodeTikaMeta.getValues("EXTERNAL_LINKS"); if (null != links && links.length > 0) { externalLinks.addAll(Arrays.asList(links)); } MediaType curType = MediaType.parse(nodeTikaMeta.get(Metadata.CONTENT_TYPE)); if (MIME_REGISTRY.isSpecializationOf(curType, type)) { type = curType; logger.debug("Media type reassigned to " + type.toString() + " by " + processorConf.getId()); } String nodeTypeStr = nodeTikaMeta.get("NodeType"); if (null != nodeTypeStr && NodeType.valueOf(nodeTypeStr).equals(NodeType.STRUCTURED_DATA_NODE)) makeStructured = true; } node.makeNodeStructured(makeStructured); Map<String, String> properties = new HashMap<String, String>(); properties.put(PROCESSING_PROPERTY, "done"); if (externalLinks.size() > 0) { properties.put(EXTERNAL_LINK_PROPERTY, StringUtils.join(externalLinks, ' ')); } node.getNodeInfo().setContentType(type.toString()); node.getMetastore().updateUserProperties(node.getUri(), properties); node.getMetastore().storeInfo(node.getUri(), node.getNodeInfo()); logger.debug("Updated node " + node.getUri().toString() + " to " + node.getNodeInfo().getContentType() + " and " + node.getNodeInfo().getSize()); // update node's container size metadata try { ContainerNode contNode = (ContainerNode) NodeFactory.getNode( new VospaceId( new NodePath(node.getUri().getNodePath().getContainerName())), node.getOwner()); node.getStorage().updateNodeInfo(contNode.getUri().getNodePath(), contNode.getNodeInfo()); node.getMetastore().storeInfo(contNode.getUri(), contNode.getNodeInfo()); } catch (URISyntaxException e) { logger.error("Updating root node size failed: " + e.getMessage()); } try { nodeData.put("container", node.getUri().getNodePath().getParentPath().getNodeStoragePath()); byte[] jobSer = (new ObjectMapper()).writeValueAsBytes(nodeData); channel.basicPublish(conf.getString("vospace.exchange.nodechanged"), "", null, jobSer); } catch (IOException e) { logger.error(e); } } catch (TikaException ex) { logger.error("Error parsing the node " + node.getUri().toString() + ": " + ex.getMessage()); processError(node, ex); ex.printStackTrace(); } catch (SAXException ex) { logger.error("Error SAX parsing the node " + node.getUri().toString() + ": " + ex.getMessage()); processError(node, ex); } catch (IOException ex) { logger.error("Error reading the node " + node.getUri().toString() + ": " + ex.getMessage()); processError(node, ex); } finally { try { inp.close(); } catch (Exception ex2) { } ; } break; } case CONTAINER_NODE: { // DbPoolServlet.goSql("Processing nodes", // "select * from nodes where owner = ?)", // new SqlWorker<Boolean>() { // @Override // public Boolean go(java.sql.Connection conn, java.sql.PreparedStatement stmt) throws SQLException { // stmt.setString(1, node.getOwner()); // /*ResultSet resSet = stmt.executeQuery(); // while(resSet.next()) { // String uriStr = resSet.getString(1); // String username = resSet.getString(2); // // try { // VospaceId uri = new VospaceId(uriStr); // // Node newNode = NodeFactory.getInstance().getNode(uri, username); // newNode.remove(); // } catch(Exception ex) { // ex.printStackTrace(); // } // }*/ // return true; // } // } // // ); break; } default: { break; } } } catch (InterruptedException ex) { logger.error("Sleeping interrupted. " + ex.getMessage()); processError(node, ex); } catch (IOException ex) { ex.printStackTrace(); logger.error("Error reading the changed node JSON: " + ex.getMessage()); processError(node, ex); } catch (URISyntaxException ex) { logger.error("Error parsing VospaceId from changed node JSON: " + ex.getMessage()); processError(node, ex); } } return true; } }); } private void processError(Node node, Exception ex) { if (null != node) { try { Map<String, String> properties = new HashMap<String, String>(); properties.put(PROCESSING_PROPERTY, "error"); properties.put(ERROR_MESSAGE_PROPERTY, ex.getMessage()); node.getMetastore().updateUserProperties(node.getUri(), properties); } catch (Exception ex2) { logger.error("Error setting error node props: " + ex2.getMessage()); } } } }