fr.gouv.vitam.mdbes.MainIngestMDBESFromFile.java Source code

Java tutorial

Introduction

Here is the source code for fr.gouv.vitam.mdbes.MainIngestMDBESFromFile.java

Source

/**
 * This file is part of POC MongoDB ElasticSearch Project.
 *
 * Copyright 2009, Frederic Bregier, and individual contributors by the @author tags. See the
 * COPYRIGHT.txt in the distribution for a full listing of individual contributors.
 *
 * All POC MongoDB ElasticSearch Project is free software: you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation, either versionRank 3
 * of the License, or (at your option) any later versionRank.
 *
 * POC MongoDB ElasticSearch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 * Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with POC MongoDB ElasticSearch . If not, see
 * <http://www.gnu.org/licenses/>.
 */
package fr.gouv.vitam.mdbes;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.log4j.PropertyConfigurator;
import org.bson.BSONObject;

import com.mongodb.BulkWriteOperation;
import com.mongodb.BulkWriteResult;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientOptions;
import com.mongodb.ReadPreference;
import com.mongodb.WriteResult;
import com.mongodb.util.JSON;

import fr.gouv.vitam.query.GlobalDatas;
import fr.gouv.vitam.utils.exception.InvalidUuidOperationException;
import fr.gouv.vitam.utils.logging.LogbackLoggerFactory;
import fr.gouv.vitam.utils.logging.VitamLogLevel;
import fr.gouv.vitam.utils.logging.VitamLogger;
import fr.gouv.vitam.utils.logging.VitamLoggerFactory;

/**
 * Load ingest file into MongoDB
 *
 * @author "Frederic Bregier"
 *
 */
@SuppressWarnings("javadoc")
public class MainIngestMDBESFromFile implements Runnable {
    private static VitamLogger LOGGER = null;

    private static AtomicLong loadt = new AtomicLong(0);

    private static MongoClient mongoClient = null;

    public static String[] ingest;
    public static String database = "VitamLinks";
    public static String host = "localhost";
    public static String esbase = "vitam";
    public static String unicast = "mdb002, mdb003, mdb004";
    public static String model = "courriel";

    /**
     * @param args
     */
    public static void main(final String[] args) throws Exception {
        if (args.length < 6) {
            System.err.println("need: logfile host database escluster unicast files");
            return;
        }
        final String networkHost = "192.168.56.102";
        GlobalDatas.localNetworkAddress = networkHost;
        final String log4j = args[0];
        PropertyConfigurator.configure(log4j);
        VitamLoggerFactory.setDefaultFactory(new LogbackLoggerFactory(VitamLogLevel.WARN));
        LOGGER = VitamLoggerFactory.getInstance(MainIngestMDBESFromFile.class);
        // connect to the local database server
        if (args.length > 1) {
            host = args[1];
        }
        if (args.length > 2) {
            database = args[2];
        }
        if (args.length > 3) {
            esbase = args[3];
        }
        if (args.length > 4) {
            unicast = args[4];
        }
        if (args.length > 5) {
            model = args[5];
        }
        if (args.length > 6) {
            ingest = new String[args.length - 6];
            for (int i = 0; i < ingest.length; i++) {
                ingest[i] = args[6 + i];
            }
        }
        LOGGER.warn("Start with " + ingest + ":" + host + ":" + database + ":" + esbase + ":" + unicast);

        MongoDbAccess dbvitam = null;
        try {
            final MongoClientOptions options = new MongoClientOptions.Builder().connectionsPerHost(10).build();
            mongoClient = new MongoClient(host, options);
            mongoClient.setReadPreference(ReadPreference.primaryPreferred());
            dbvitam = new MongoDbAccess(mongoClient, database, esbase, unicast, true);
            dbvitam.ensureIndex();
            LOGGER.warn(dbvitam.toString());

            MainIngestMDBESFromFile.loadt = new AtomicLong(0);
            MainIngestFile.cptMaip.set(0);

            runOnce(dbvitam);

        } catch (final Exception e) {
            System.err.println("ERROR: " + e.getMessage());
            e.printStackTrace();

        } finally {
            // release resources
            final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2);
            final ToClean toclean = new ToClean(dbvitam);
            scheduler.schedule(toclean, 1, TimeUnit.MILLISECONDS);
            final ToShutdown toShutdown = new ToShutdown();
            scheduler.schedule(toShutdown, 5000, TimeUnit.MILLISECONDS);
            scheduler.awaitTermination(7000, TimeUnit.MILLISECONDS);
            System.exit(0);
        }

    }

    private static final class ToClean implements Runnable {
        MongoDbAccess dbvitam;

        public ToClean(final MongoDbAccess dbvitam) {
            this.dbvitam = dbvitam;
        }

        @Override
        public void run() {
            dbvitam.close();
            mongoClient.close();
        }

    }

    private static final class ToShutdown implements Runnable {

        @Override
        public void run() {
            System.exit(0);
        }

    }

    private static final void runOnce(final MongoDbAccess dbvitam)
            throws InterruptedException, InstantiationException, IllegalAccessException, IOException {
        System.out.println("Load starting... ");
        int nbThread = ingest.length;

        final long date11 = System.currentTimeMillis();
        if (ingest.length == 1) {
            final FileInputStream fstream = new FileInputStream(ingest[0]);
            final DataInputStream in = new DataInputStream(fstream);
            final BufferedReader br = new BufferedReader(new InputStreamReader(in));
            String strLine;
            int nb = 0;
            final HashMap<String, String> esIndex = new HashMap<>();
            BulkWriteOperation bulk = dbvitam.daips.collection.initializeUnorderedBulkOperation();
            while ((strLine = br.readLine()) != null) {
                final DBObject bson = (DBObject) JSON.parse(strLine);
                bulk.insert(bson);
                ElasticSearchAccess.addEsIndex(dbvitam, model, esIndex, bson);
                nb++;
                if (nb % GlobalDatas.LIMIT_MDB_NEW_INDEX == 0) {
                    BulkWriteResult result = bulk.execute();
                    int check = result.getInsertedCount();
                    if (check != nb) {
                        System.out.print("x");
                    } else {
                        System.out.print(".");
                    }
                    bulk = dbvitam.daips.collection.initializeUnorderedBulkOperation();
                    MainIngestFile.cptMaip.addAndGet(check);
                    nb = 0;
                }
            }
            if (!esIndex.isEmpty()) {
                System.out.println("Last bulk ES");
                dbvitam.addEsEntryIndex(true, esIndex, model);
                esIndex.clear();
            }
            if (nb != 0) {
                bulk.execute();
                MainIngestFile.cptMaip.addAndGet(nb);
                nb = 0;
            }
        } else {
            // threads
            ExecutorService executorService = Executors.newFixedThreadPool(ingest.length + 1);
            for (int i = 0; i < ingest.length; i++) {
                MainIngestMDBESFromFile ingestrun = new MainIngestMDBESFromFile();
                ingestrun.file = ingest[i];
                executorService.execute(ingestrun);
            }
            // ES
            MainIngestMDBESFromFile ingestrun = new MainIngestMDBESFromFile();
            ingestrun.file = null;
            ingestrun.files = ingest;
            ingestrun.original = dbvitam;
            executorService.execute(ingestrun);

            executorService.shutdown();
            while (!executorService.awaitTermination(10000, TimeUnit.MILLISECONDS)) {
                ;
            }
            System.out.println("Load ended");
            final long nbBigM = dbvitam.getDaipSize();
            final long nbBigD = dbvitam.getPaipSize();
            System.out.println("\n Big Test (" + nbThread + " nb MAIP: " + MainIngestFile.cptMaip.get()
                    + ") with MAIP: " + nbBigM + " DATA: " + nbBigD + " => Load:"
                    + (loadt.get()) / ((float) MainIngestFile.cptMaip.get() * nbThread));

            System.out.println("\nThread;nbLoad;nbTotal;Load");
            System.out.println(nbThread + ";" + MainIngestFile.cptMaip.get() + ";" + nbBigM + ";"
                    + (loadt.get()) / ((float) MainIngestFile.cptMaip.get() * nbThread));
        }
        final long date12 = System.currentTimeMillis();
        MainIngestMDBESFromFile.loadt.set(date12 - date11);

        System.out.println("Load ended");
        /*
         * System.out.println("All elements\n================================================================");
         * DbVitam.printStructure(dbvitam);
         */
        final long nbBigM = dbvitam.getDaipSize();
        final long nbBigD = dbvitam.getPaipSize();
        System.out.println("\n Big Test (" + nbThread + " Threads chacune " + MainIngestFile.nb + " nb MAIP: "
                + MainIngestFile.cptMaip.get() + ") with MAIP: " + nbBigM + " DATA: " + nbBigD + " => Load:"
                + (MainIngestMDBESFromFile.loadt.get()) / ((float) MainIngestFile.cptMaip.get()));

        System.out.println("\nThread;nbLoad;nbTotal;Load");
        System.out.println(nbThread + ";" + MainIngestFile.cptMaip.get() + ";" + nbBigM + ";"
                + (MainIngestMDBESFromFile.loadt.get()) / ((float) MainIngestFile.cptMaip.get()));
    }

    private String file;
    private String[] files;
    private MongoDbAccess original;

    @Override
    public void run() {
        if (file == null) {
            // ES
            //Thread.sleep(1000);
            try {
                for (int i = 0; i < files.length - 1; i++) {
                    System.out.println("ESFile: " + files[i]);
                    final HashMap<String, String> esIndex = new HashMap<>();
                    final FileInputStream fstream = new FileInputStream(files[i]);
                    final DataInputStream in = new DataInputStream(fstream);
                    final BufferedReader br = new BufferedReader(new InputStreamReader(in));
                    String strLine;
                    // Read File Line By Line
                    while ((strLine = br.readLine()) != null) {
                        final BSONObject bson = (BSONObject) JSON.parse(strLine);
                        ElasticSearchAccess.addEsIndex(original, model, esIndex, bson);
                    }
                    // Close the input stream
                    br.close();
                    in.close();
                    fstream.close();
                    if (!esIndex.isEmpty()) {
                        System.out.println("Last bulk ES");
                        original.addEsEntryIndex(true, esIndex, model);
                        esIndex.clear();
                    }
                }
                // last file might contains already inserted but to be updated DAip
                int i = files.length - 1;
                System.out.println("ESFile: " + files[i]);
                final FileInputStream fstream = new FileInputStream(files[i]);
                final DataInputStream in = new DataInputStream(fstream);
                final BufferedReader br = new BufferedReader(new InputStreamReader(in));
                String strLine;
                // Read File Line By Line
                while ((strLine = br.readLine()) != null) {
                    final BSONObject bson = (BSONObject) JSON.parse(strLine);
                    ElasticSearchAccess.addEsIndex(original, model, bson);
                }
                // Close the input stream
                br.close();
                in.close();
                fstream.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            return;
        }
        MongoDbAccess dbvitam = null;
        FileInputStream fstream = null;
        DataInputStream in = null;
        final BufferedReader br;
        try {
            System.out.println("MDFile: " + file);
            fstream = new FileInputStream(file);
            in = new DataInputStream(fstream);
            br = new BufferedReader(new InputStreamReader(in));
            dbvitam = new MongoDbAccess(mongoClient, database, esbase, unicast, false);
            // now ingest metaaip/metafield/data
            final long date11 = System.currentTimeMillis();
            String strLine;
            int nb = 0;

            if (false) {
                // Tokumx
                List<DBObject> inserts = new ArrayList<DBObject>(GlobalDatas.LIMIT_MDB_NEW_INDEX);
                while ((strLine = br.readLine()) != null) {
                    final DBObject bson = (DBObject) JSON.parse(strLine);
                    inserts.add(bson);
                    nb++;
                    if (nb % GlobalDatas.LIMIT_MDB_NEW_INDEX == 0) {
                        WriteResult result = dbvitam.daips.collection.insert(inserts);
                        if (result.getN() != nb) {
                            LOGGER.error("Wrong bulk op: " + result);
                        }
                        MainIngestFile.cptMaip.addAndGet(nb);
                        inserts.clear();
                        nb = 0;
                        System.out.print(".");
                    }
                }
                if (nb != 0) {
                    WriteResult result = dbvitam.daips.collection.insert(inserts);
                    if (result.getN() != nb) {
                        LOGGER.error("Wrong bulk op: " + result);
                    }
                    MainIngestFile.cptMaip.addAndGet(nb);
                    inserts.clear();
                    nb = 0;
                }
            } else {
                BulkWriteOperation bulk = dbvitam.daips.collection.initializeUnorderedBulkOperation();
                while ((strLine = br.readLine()) != null) {
                    final DBObject bson = (DBObject) JSON.parse(strLine);
                    bulk.insert(bson);
                    nb++;
                    if (nb % GlobalDatas.LIMIT_MDB_NEW_INDEX == 0) {
                        BulkWriteResult result = bulk.execute();
                        bulk = dbvitam.daips.collection.initializeUnorderedBulkOperation();
                        if (result.getInsertedCount() != nb) {
                            LOGGER.error("Wrong bulk op: " + result);
                        }
                        MainIngestFile.cptMaip.addAndGet(nb);
                        nb = 0;
                        System.out.print(".");
                    }
                }
                if (nb != 0) {
                    BulkWriteResult result = bulk.execute();
                    if (result.getInsertedCount() != nb) {
                        LOGGER.error("Wrong bulk op: " + result);
                    }
                    MainIngestFile.cptMaip.addAndGet(nb);
                    nb = 0;
                }
            }
            final long date12 = System.currentTimeMillis();
            loadt.addAndGet(date12 - date11);
            return;
        } catch (final InvalidUuidOperationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (final FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            // release resources
            try {
                in.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            try {
                fstream.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            if (dbvitam != null) {
                dbvitam.close();
            }
        }
    }
}