Java tutorial
/******************************************************************************* * Copyright 2012, The Infinit.e Open Source Project. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package com.ikanow.infinit.e.harvest.extraction.document; import java.text.SimpleDateFormat; import java.util.Collection; import java.util.Date; import java.util.HashMap; import com.google.common.collect.Multimap; import com.google.common.collect.TreeMultimap; import com.ikanow.infinit.e.data_model.InfiniteEnums.HarvestEnum; import com.ikanow.infinit.e.data_model.store.DbManager; import com.ikanow.infinit.e.data_model.store.MongoDbManager; import com.ikanow.infinit.e.data_model.store.config.source.SourceHarvestStatusPojo; import com.ikanow.infinit.e.data_model.store.config.source.SourcePojo; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; public class HarvestStatus_Integrated implements HarvestStatus { @Override public void resetForNewSource() { if (null != _messages) { _messages.clear(); } _numMessages = 0; _currMessage = null; }//TESTED /** * updateHarvestStatus * Currently I am using the key to find the old source to update * should switch sourcepojo to use correct id field and search on that. * * @param sourceToUpdate * @param harvestDate * @param harvestStatus * @param harvestMessage * @param bTempDisable * @param bPermDisable */ public void update(SourcePojo sourceToUpdate, Date harvestDate, HarvestEnum harvestStatus, String harvestMessage, boolean bTempDisable, boolean bPermDisable) { // Handle successful harvests where the max docs were reached, so don't want to respect the searchCycle if ((harvestStatus == HarvestEnum.success) && (sourceToUpdate.reachedMaxDocs())) { harvestStatus = HarvestEnum.success_iteration; } if ((null != harvestMessage) && !harvestMessage.isEmpty()) { this.logMessage(harvestMessage, false); if (HarvestEnum.error == harvestStatus) { _numMessages++; } } //TOTEST if (null == sourceToUpdate.getHarvestStatus()) { sourceToUpdate.setHarvestStatus(new SourceHarvestStatusPojo()); } BasicDBObject update = new BasicDBObject(); // (annoyingly need to do this in raw format because otherwise overwrite any existing fields eg synced,doccount) if ((null == sourceToUpdate.getDistributionFactor()) || (HarvestEnum.in_progress != harvestStatus)) { update.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_, harvestStatus.toString()); } update.put(SourceHarvestStatusPojo.sourceQuery_harvested_, harvestDate); update.put(SourceHarvestStatusPojo.sourceQuery_realHarvested_, harvestDate); sourceToUpdate.getHarvestStatus().setHarvest_status(harvestStatus); sourceToUpdate.getHarvestStatus().setHarvested(harvestDate); sourceToUpdate.getHarvestStatus().setRealHarvested(harvestDate); // Optional fields: // Display message if (null == _currMessage) { // (then also no harvest message else would have logged already) sourceToUpdate.getHarvestStatus().setHarvest_message(""); } //TESTED else { // New messages to display String date = new SimpleDateFormat("'['yyyy-MM-dd'T'HH:mm:ss']' ").format(new Date()); if ((null != _messages) && !_messages.isEmpty()) { _currMessage.append('\n'); _currMessage.append(getLogMessages(true)); // (clears _messages) } sourceToUpdate.getHarvestStatus().setHarvest_message(date + _currMessage.toString()); } //TESTED //(end display message) if (sourceToUpdate.getHarvestStatus().getHarvest_message().length() > 0) { // (only overwrite the previous message if there's actually something to say...) update.put(SourceHarvestStatusPojo.sourceQuery_harvest_message_, sourceToUpdate.getHarvestStatus().getHarvest_message()); if ((null != sourceToUpdate.getDistributionTokens()) && !sourceToUpdate.getDistributionTokens().isEmpty()) { for (Integer token : sourceToUpdate.getDistributionTokens()) { update.put(SourceHarvestStatusPojo.sourceQuery_distributedStatus_ + "." + token.toString(), sourceToUpdate.getHarvestStatus().getHarvest_message()); } } //TESTED } if (bTempDisable) { sourceToUpdate.setHarvestBadSource(true); update.put(SourcePojo.harvestBadSource_, true); } if (bPermDisable) { if ((null == sourceToUpdate.getSearchCycle_secs()) || (0 == sourceToUpdate.getSearchCycle_secs())) { sourceToUpdate.setSearchCycle_secs(-1); } else if (sourceToUpdate.getSearchCycle_secs() > 0) { //(else it's already negative, ie run manually) sourceToUpdate.setSearchCycle_secs(-sourceToUpdate.getSearchCycle_secs()); } update.put(SourcePojo.searchCycle_secs_, sourceToUpdate.getSearchCycle_secs()); } DBCollection sourceDb = DbManager.getIngest().getSource(); BasicDBObject query = new BasicDBObject(SourcePojo._id_, sourceToUpdate.getId()); sourceDb.update(query, new BasicDBObject(MongoDbManager.set_, update)); } /** * logMessage * Logs temporary messages * should switch sourcepojo to use correct id field and search on that. * * @param message The message to log * @param bAggregate If true, duplicate error messages are aggregated */ public void logMessage(String message, boolean bAggregate) { if (null == message) return; if (null == _currMessage) { _currMessage = new StringBuffer(); } if (!bAggregate) { if (_currMessage.length() > 0) { _currMessage.append('\n'); } _currMessage.append(message); } else { // Aggregate messages _numMessages++; if (null == _messages) { _messages = new HashMap<String, Integer>(); } if ((_messages.size() > 0) && (_messages.size() < 20)) { // (only process the first 20 messages to keep the size down) Integer count = (Integer) _messages.get(message); if (count != null && count > 0) { _messages.put(message, count + 1); } else { _messages.put(message, 1); } } else { _messages.put(message, 1); } } }//TESTED /** * moreToLog * @return true if custom enrichment has generated more errors */ public boolean moreToLog() { return (null != _currMessage); }//TOTEST private StringBuffer _currMessage = null; // Current message (output at the end of the source processing) private HashMap<String, Integer> _messages = null; // (list of messages to aggregate) private int _numMessages = 0; public int getNumMessages() { return _numMessages; } /** * getLogMessages * Returns a list of up to 5 errors (eg encountered when parsing JavaScript) for * a source, sorted by frequency in ascending order * @return */ private StringBuffer getLogMessages(boolean bReset) { if ((null != _messages) && (_messages.size() > 0)) { StringBuffer messagesString = new StringBuffer(); // Create multimap to store errors in, reverse the order of key (error message) and // value (count) to sort on error count Multimap<Integer, String> mm = TreeMultimap.create(); for (java.util.Map.Entry<String, Integer> entry : _messages.entrySet()) { StringBuffer msg = new StringBuffer(entry.getKey()).append(" (Occurences: ") .append(entry.getValue()).append(')'); mm.put(-entry.getValue(), msg.toString()); } // Write the error messages to a Collection<String> Collection<String> messages = mm.values(); // Append up to the top five messages to our StringBuffer and return int messageCount = 1; for (String s : messages) { if (messageCount > 1) { messagesString.append('\n'); } messagesString.append(s); messageCount++; if (messageCount > 5) break; } if (bReset) { _messages.clear(); } return messagesString; } else { return null; } }//TESTED public String getMostCommonMessage() { int max = -1; String maxMsg = null; if (null != _messages) { for (java.util.Map.Entry<String, Integer> entry : _messages.entrySet()) { if (entry.getValue() > max) { max = entry.getValue(); maxMsg = entry.getKey(); } } } if (null != maxMsg) { return new StringBuffer(" errmsg='").append(max).append(": ").append(maxMsg).append("'").toString(); } else return ""; }//TESTED }