Java tutorial
/******************************************************************************* * Copyright 2012, The Infinit.e Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package com.ikanow.infinit.e.utility; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.bson.BSONObject; import org.bson.types.ObjectId; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.ImmutableSettings.Builder; import com.google.gson.Gson; import com.ikanow.infinit.e.data_model.index.ElasticSearchManager; import com.ikanow.infinit.e.data_model.index.IndexManager; import com.ikanow.infinit.e.data_model.index.feature.event.AssociationFeaturePojoIndexMap; import com.ikanow.infinit.e.data_model.store.DbManager; import com.ikanow.infinit.e.data_model.store.MongoDbManager; import com.ikanow.infinit.e.data_model.store.config.source.SourcePojo; import com.ikanow.infinit.e.data_model.store.document.AssociationPojo; import com.ikanow.infinit.e.data_model.store.feature.association.AssociationFeaturePojo; import com.ikanow.infinit.e.processing.generic.GenericProcessingController; import com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.MongoException; public class MongoAssociationFeatureTxfer { //___________________________________________________________________________________________________ // MAIN /** * @param args: 0,1 is the location of the MongoDB host/port, 2/3 is the location of the ES index host/port * @throws MongoException * @throws NumberFormatException * @throws IOException */ public static void main(String sConfigPath, String sQuery, boolean bDelete, boolean bRebuildIndex, int nSkip, int nLimit, String chunksDescription) throws NumberFormatException, MongoException, IOException { MongoAssociationFeatureTxfer txferManager = new MongoAssociationFeatureTxfer(); // Command line processing com.ikanow.infinit.e.data_model.Globals .setIdentity(com.ikanow.infinit.e.data_model.Globals.Identity.IDENTITY_SERVICE); if (null != sConfigPath) { com.ikanow.infinit.e.data_model.Globals.overrideConfigLocation(sConfigPath); } if (bRebuildIndex) { new GenericProcessingController().InitializeIndex(false, false, true); } BasicDBObject query = null; if (null == sQuery) { query = new BasicDBObject(); } else { query = (BasicDBObject) com.mongodb.util.JSON.parse(sQuery); } if (bDelete) { txferManager.doDelete(query, nLimit); } else { if (null == chunksDescription) { txferManager.doTransfer(query, nSkip, nLimit, null); } else { txferManager.doChunkedTransfer(query, nSkip, nLimit, chunksDescription); } } } //___________________________________________________________________________________________________ // Wrapper for doing transfer in chunks: private void doChunkedTransfer(BasicDBObject query, int nSkip, int nLimit, String chunksDescription) throws IOException { List<BasicDBObject> chunkList = MongoIndexerUtils.getChunks("feature.association", chunksDescription); System.out.println("CHUNKS: Found " + chunkList.size() + " chunks"); //DEBUG //System.out.println("Chunklist= " + chunkList); for (BasicDBObject chunk : chunkList) { BasicDBObject cleanQuery = new BasicDBObject(); cleanQuery.putAll((BSONObject) query); String id = null; try { id = (String) chunk.remove("$id"); System.out.println("CHUNK: " + id); doTransfer(cleanQuery, 0, 0, chunk); } catch (Exception e) { System.out.println("FAILED CHUNK: " + id + " ... " + e.getMessage()); } } }//TESTED //___________________________________________________________________________________________________ // PROCESSING LOOP (new interface) Map<String, SourcePojo> _sourceCache = new HashMap<String, SourcePojo>(); private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk) { ElasticSearchManager elasticManager = null; // Initialize the DB: DBCollection eventFeatureDB = DbManager.getFeature().getAssociation(); // Initialize the ES (create the index if it doesn't already): // 1. Set-up the entity feature index ElasticSearchManager.setDefaultClusterName("infinite-aws"); // (delete the index) //elasticManager = ElasticSearchManager.getIndex("association_index"); //elasticManager.deleteMe(); // Create the index if necessary String sMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(), AssociationFeaturePojoIndexMap.Mapping.class); Builder localSettings = ImmutableSettings.settingsBuilder(); localSettings.put("number_of_shards", 1).put("number_of_replicas", 0); localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard"); localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase"); elasticManager = ElasticSearchManager.createIndex("association_index", null, false, null, sMapping, localSettings); // Get the index (necessary if already created) if (null == elasticManager) { elasticManager = ElasticSearchManager.getIndex("association_index"); } // Now query the DB: DBCursor dbc = null; dbc = eventFeatureDB.find(query); if (null != chunk) { if (chunk.containsField(DbManager.min_)) { dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_)); } if (chunk.containsField(DbManager.max_)) { dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_)); } } dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000); if (null == chunk) { int nCount = dbc.count() - nSkip; if (nCount < 0) nCount = 0; System.out.println( "Found " + nCount + " records to sync, process first " + (0 == nLimit ? nCount : nLimit)); if (0 == nCount) { // Nothing to do... return; } } List<AssociationFeaturePojo> events = new LinkedList<AssociationFeaturePojo>(); int nSynced = 0; // Loop over array and invoke the cleansing function for each one while (dbc.hasNext()) { BasicDBObject dbo = (BasicDBObject) dbc.next(); AssociationFeaturePojo evt = AssociationFeaturePojo.fromDb(dbo, AssociationFeaturePojo.class); // If this table has just been rebuilt from the document then the indexes are all wrong ... // recalculate and save if ('#' == evt.getIndex().charAt(0)) { AssociationPojo singleEvt = new AssociationPojo(); singleEvt.setEntity1_index(evt.getEntity1_index()); singleEvt.setEntity2_index(evt.getEntity2_index()); singleEvt.setVerb_category(evt.getVerb_category()); singleEvt.setGeo_index(evt.getGeo_index()); evt.setIndex(AssociationAggregationUtils.getEventFeatureIndex(singleEvt)); eventFeatureDB .update(new BasicDBObject("_id", dbo.get("_id")), new BasicDBObject(MongoDbManager.set_, new BasicDBObject(AssociationFeaturePojo.index_, evt.getIndex())), false, true); // (has to be a multi-update even though it's unique because it's sharded on index) } // Handle groups (system group is: "4c927585d591d31d7b37097a") if (null == evt.getCommunityId()) { evt.setCommunityId(new ObjectId("4c927585d591d31d7b37097a")); } // Bulk add prep events.add(evt); nSynced++; if (events.size() > 1000) { elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null, true); events.clear(); } } // End loop over entities //write whatevers left elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null, true); if (null != chunk) { System.out.println("Found " + nSynced + " records to sync in chunk"); } } //___________________________________________________________________________________________________ // DELETE DOCUMENTS FROM A QUERY private void doDelete(BasicDBObject query, int nLimit) { try { // Initialize the DB: DBCollection eventFeatureDB = DbManager.getFeature().getAssociation(); DBCursor cur = eventFeatureDB.find(query).limit(nLimit); // (this internally works in batches of 1000; just get _id) System.out.println("Found " + cur.count() + " records to delete"); if (nLimit > 0) { System.out.println("(limited to " + nLimit + " records)"); } ArrayList<AssociationFeaturePojo> events = new ArrayList<AssociationFeaturePojo>(); LinkedList<String> eventIds = new LinkedList<String>(); while (cur.hasNext()) { AssociationFeaturePojo event = AssociationFeaturePojo.fromDb(cur.next(), AssociationFeaturePojo.class); events.add(event); eventIds.add( new StringBuffer(event.getIndex()).append(":").append(event.getCommunityId()).toString()); eventFeatureDB.remove(new BasicDBObject("index", event.getIndex())); } ElasticSearchManager elasticManager = ElasticSearchManager.getIndex("association_index"); elasticManager.bulkDeleteDocuments(eventIds); } catch (NumberFormatException e) { e.printStackTrace(); } catch (MongoException e) { e.printStackTrace(); } } }