fr.gouv.vitam.mdbes.MainIngestFile.java Source code

Java tutorial

Introduction

Here is the source code for fr.gouv.vitam.mdbes.MainIngestFile.java

Source

/**
 * This file is part of POC MongoDB ElasticSearch Project.
 *
 * Copyright 2009, Frederic Bregier, and individual contributors by the @author tags. See the
 * COPYRIGHT.txt in the distribution for a full listing of individual contributors.
 *
 * All POC MongoDB ElasticSearch Project is free software: you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation, either versionRank 3
 * of the License, or (at your option) any later versionRank.
 *
 * POC MongoDB ElasticSearch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 * Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with POC MongoDB ElasticSearch . If not, see
 * <http://www.gnu.org/licenses/>.
 */
package fr.gouv.vitam.mdbes;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.log4j.PropertyConfigurator;
import org.bson.BSONObject;

import com.mongodb.MongoClient;
import com.mongodb.MongoClientOptions;
import com.mongodb.ReadPreference;
import com.mongodb.util.JSON;

import fr.gouv.vitam.query.GlobalDatas;
import fr.gouv.vitam.query.exception.InvalidExecOperationException;
import fr.gouv.vitam.utils.FileUtil;
import fr.gouv.vitam.utils.exception.InvalidParseOperationException;
import fr.gouv.vitam.utils.exception.InvalidUuidOperationException;
import fr.gouv.vitam.utils.logging.LogbackLoggerFactory;
import fr.gouv.vitam.utils.logging.VitamLogLevel;
import fr.gouv.vitam.utils.logging.VitamLogger;
import fr.gouv.vitam.utils.logging.VitamLoggerFactory;

/**
 * Ingest file into ElasticSearch and into file, then eventually in MongoDB
 *
 * @author "Frederic Bregier"
 *
 */
@SuppressWarnings("javadoc")
public class MainIngestFile implements Runnable {
    private static VitamLogger LOGGER = null;

    protected static AtomicLong loadt = new AtomicLong(0);
    protected static AtomicLong mongoLoad = new AtomicLong(0);

    protected static MongoClient mongoClient = null;
    protected static int MAXTHREAD = 1;

    protected static boolean simulate = false;
    protected static String ingest;
    protected static String model;
    protected static int startFrom = 0;
    protected static String host = "localhost";
    protected static String database = "VitamLinks";
    protected static String esbase = "vitam";
    protected static String unicast = "mdb002, mdb003, mdb004";
    protected static String commandMongo = null;
    protected static String fileout = null;
    protected static List<File> files = new ArrayList<File>();

    protected ParserIngest parserIngest;
    protected int start, stop;

    protected static int nb = 400;
    protected static int firstLevel = 10;// was 100
    protected static int lastLevel = 1000; // was 1000
    protected static float nbr = 100; // could enhance the number of request with 1000
    protected static long waitBetweenQuery = 200; // could be used to simulate Little's law, for instance = 100ms
    protected static int minleveltofile = 0;
    protected static AtomicLong cptMaip = new AtomicLong();
    protected static int nbThread = 1;

    /**
     * Will save to a File and to ElasticSearch, then to MongoDB
     *
     * @param args
     *            logfile eraze/noeraze host database escluster unicast start nbload file fileout limitdepth nbThread mongoimport
     *            stopindex/xx
     *
     *            <ol>
     *            <li>logfile = Log4J configuration log file</li>
     *            <li>noeraze/index = index will (re)create index, else nothing is done</li>
     *            <li>host = MongoDB host</li>
     *            <li>database = MongoDB database name as VitamLinks</li>
     *            <li>escluster = ElasticSearch cluster name</li>
     *            <li>unicast = ElasticSearch unicast servers list (as in "mdb001, mdb002, mdb003")</li>
     *            <li>start = start index in the bench (will be for instance between 1-1000 start from 100)</li>
     *            <li>nbload = number of iteration from start</li>
     *            <li>file = ingest file</li>
     *            <li>fileout = output saved</li>
     *            <li>limitdepth = from which level the output is saved to the file and not to MongoDB</li>
     *            <li>nbThread = number of thread (default 1)</li>
     *            <li>mongoimport = optional command for import</li>
     *            <li>stopindex/xx = shall we stop index during import in MongoDB</li>
     *            </ol>
     */
    public static void main(final String[] args) throws Exception {
        if (args.length < 6) {
            System.err.println(
                    "need: logfile noeraze/index host database escluster unicast start nbload file fileout limitdepth mongoimport stopindex/xx nbThread");
            // System.err.println("before was need: logfile nbload files eraze/noeraze start host escluster unicast fileout limitdepth mongoimport 0/1 (1=stop index)");
            return;
        }
        final String log4j = args[0];
        PropertyConfigurator.configure(log4j);
        VitamLoggerFactory.setDefaultFactory(new LogbackLoggerFactory(VitamLogLevel.WARN));
        LOGGER = VitamLoggerFactory.getInstance(MainIngestFile.class);
        boolean reindex = false;
        if (args.length > 1) {
            reindex = args[1].equals("index");
        }
        if (simulate) {
            reindex = false;
        }
        final String networkHost = "192.168.56.102";
        GlobalDatas.localNetworkAddress = networkHost;
        // connect to the local database server
        if (args.length > 2) {
            host = args[2];
        }
        if (args.length > 3) {
            database = args[3];
        }
        if (args.length > 4) {
            esbase = args[4];
        }
        if (args.length > 5) {
            unicast = args[5];
        }
        int realnb = -1;
        if (args.length > 6) {
            startFrom = Integer.parseInt(args[6]);
        }
        if (args.length > 7) {
            realnb = Integer.parseInt(args[7]);
        }
        if (args.length > 8) {
            ingest = FileUtil.readFile(args[8]);
        }
        if (args.length > 9) {
            fileout = args[9];
        }
        if (args.length > 10) {
            final int stoplevel = Integer.parseInt(args[10]);
            minleveltofile = stoplevel;
        }
        if (args.length > 11) {
            nbThread = Integer.parseInt(args[11]);
        }
        if (args.length > 12) {
            commandMongo = args[12];
        }
        boolean stopindex = false;
        if (args.length > 13) {
            stopindex = args[13].equalsIgnoreCase("stopindex");
        }
        LOGGER.debug("Start with " + reindex + ":" + host + ":" + database + ":" + esbase + ":" + unicast);
        if (args.length > 6) {
            LOGGER.debug("and " + startFrom + ":" + realnb + ":" + ingest + ":" + fileout + ":" + minleveltofile
                    + ":" + nbThread + ":" + commandMongo + ":" + stopindex);
        }
        MongoDbAccess dbvitam = null;
        try {
            MAXTHREAD += nbThread;
            final MongoClientOptions options = new MongoClientOptions.Builder().connectionsPerHost(MAXTHREAD)
                    .build();
            mongoClient = new MongoClient(host, options);
            mongoClient.setReadPreference(ReadPreference.primaryPreferred());
            dbvitam = new MongoDbAccess(mongoClient, database, esbase, unicast, reindex);
            // get a list of the collections in this database and print them out
            LOGGER.debug(dbvitam.toString());
            if (realnb < 0) {
                return;
            }
            // drop all the data in it
            final ParserIngest parser = new ParserIngest(true);
            parser.parse(ingest);
            model = parser.getModel();
            if (reindex) {
                LOGGER.debug("ensureIndex");
                dbvitam.ensureIndex();
                if (model != null) {
                    LOGGER.debug("updateEsIndex");
                    dbvitam.updateEsIndex(model);
                }
                LOGGER.debug("end Index");
            }
            LOGGER.warn(dbvitam.toString());

            final int stepnb = realnb;
            nb = stepnb;
            loadt = new AtomicLong(0);
            cptMaip.set(0);
            runOnce(dbvitam);
            // Continue with MongoDB Loading if setup
            if (commandMongo != null) {
                System.out.println("Launch MongoImport");
                runOnceMongo(dbvitam, stopindex);
            }
        } catch (Exception e) {
            LOGGER.error(e);
        } finally {
            // release resources
            final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2);
            final ToClean toclean = new ToClean(dbvitam);
            scheduler.schedule(toclean, 1, TimeUnit.MILLISECONDS);
            final ToShutdown toShutdown = new ToShutdown();
            scheduler.schedule(toShutdown, 5000, TimeUnit.MILLISECONDS);
            scheduler.awaitTermination(7000, TimeUnit.MILLISECONDS);
            System.exit(0);
        }

    }

    protected static final class ToClean implements Runnable {
        MongoDbAccess dbvitam;

        protected ToClean(final MongoDbAccess dbvitam) {
            this.dbvitam = dbvitam;
        }

        @Override
        public void run() {
            dbvitam.close();
            mongoClient.close();
        }

    }

    protected static final class ToShutdown implements Runnable {

        @Override
        public void run() {
            System.exit(0);
        }

    }

    private static final void runOnce(final MongoDbAccess dbvitam)
            throws InterruptedException, InstantiationException, IllegalAccessException, IOException {
        MainIngestFile ingests = null;
        nb = nb / nbThread;
        int step = startFrom;
        final int interval = nb;
        LOGGER.warn("Load starting... " + nbThread + ":" + nb + ":" + interval);
        for (int i = 0; i < nbThread; i++) {
            ingests = new MainIngestFile();
            ingests.start = step;
            ingests.stop = step + interval - 1;
            step += interval;
            ingests.run();
        }
        System.out.println("Load ended");
        final FileOutputStream outputStream = new FileOutputStream(fileout);
        final Map<String, String> esIndex = new HashMap<String, String>();
        for (DAip daip : ParserIngest.savedDaips.values()) {
            daip.load(dbvitam);
            daip.toFile(outputStream);
            final BSONObject bson = (BSONObject) JSON.parse(daip.toStringDirect());
            ElasticSearchAccess.addEsIndex(dbvitam, model, esIndex, bson);
        }
        outputStream.close();
        if (!esIndex.isEmpty()) {
            MainIngestFile.cptMaip.addAndGet(esIndex.size());
            System.out.println("Last bulk ES");
            dbvitam.addEsEntryIndex(true, esIndex, model);
            esIndex.clear();
        }

        /*
         * System.out.println("All elements\n================================================================");
         * DbVitam.printStructure(dbvitam);
         */
        final long nbBigM = dbvitam.getDaipSize();
        final long nbBigD = dbvitam.getPaipSize();
        System.out.println("\n Big Test (" + nbThread + " Threads chacune " + nb + " itrations de load, nb MAIP: "
                + cptMaip.get() + ") with MAIP: " + nbBigM + " DATA: " + nbBigD + " => Load:"
                + (loadt.get()) / ((float) cptMaip.get()));

        System.out.println("\nThread;nbLoad;nbTotal;Load");
        System.out.println(
                nbThread + ";" + cptMaip.get() + ";" + nbBigM + ";" + (loadt.get()) / ((float) cptMaip.get()));
    }

    @Override
    public void run() {
        MongoDbAccess dbvitam = null;
        try {
            dbvitam = new MongoDbAccess(mongoClient, database, esbase, unicast, false);
            // now ingest metaaip/metafield/data
            final long date11 = System.currentTimeMillis();
            parserIngest = new ParserIngest(simulate);

            final String filename = fileout + "-" + start + "-" + stop + ".json";
            final File file = new File(filename);
            final FileOutputStream outputStream = new FileOutputStream(file);
            parserIngest.bufferedOutputStream = new BufferedOutputStream(outputStream);
            System.out.println("Start to File: " + filename);
            parserIngest.parse(ingest);
            parserIngest.executeToFile(dbvitam, start, stop, true);
            final long date12 = System.currentTimeMillis();
            loadt.addAndGet(date12 - date11);
            cptMaip.addAndGet(parserIngest.getTotalCount());
            files.add(file);
            return;
        } catch (final InvalidExecOperationException e) {
            LOGGER.error(e);
        } catch (final InvalidParseOperationException e) {
            LOGGER.error(e);
        } catch (final InvalidUuidOperationException e) {
            LOGGER.error(e);
        } catch (final FileNotFoundException e) {
            LOGGER.error(e);
        } finally {
            // release resources
            if (parserIngest.bufferedOutputStream != null) {
                if (GlobalDatas.PRINT_REQUEST) {
                    System.out.println("Flushing file");
                }
                try {
                    parserIngest.bufferedOutputStream.flush();
                } catch (final IOException e) {
                }
                if (GlobalDatas.PRINT_REQUEST) {
                    System.out.println("Closing file");
                }
                try {
                    parserIngest.bufferedOutputStream.close();
                } catch (final IOException e) {
                }
                if (GlobalDatas.PRINT_REQUEST) {
                    System.out.println("File closed");
                }
            }
            if (dbvitam != null) {
                dbvitam.close();
            }
        }
    }

    private static final void runOnceMongo(final MongoDbAccess dbvitam, final boolean stopindex)
            throws InterruptedException, InstantiationException, IllegalAccessException, IOException {
        if (stopindex) {
            dbvitam.removeIndexBeforeImport();
        }
        final MongoRunImport[] ingests = new MongoRunImport[nbThread];
        ExecutorService executorService = null;
        executorService = Executors.newFixedThreadPool(nbThread);
        System.out.print("Load starting... " + nbThread + ":" + nb);
        for (int i = 1; i < nbThread; i++) {
            ingests[i] = new MongoRunImport();
            ingests[i].file = files.get(i);
            executorService.execute(ingests[i]);
        }
        Thread.sleep(1000);
        executorService.shutdown();
        while (!executorService.awaitTermination(10000, TimeUnit.MILLISECONDS)) {
            ;
        }
        System.out.println("Load ended");
        final long start = System.currentTimeMillis();
        if (stopindex) {
            dbvitam.resetIndexAfterImport();
        }
        final long stop = System.currentTimeMillis();
        System.out.println("End of MongoImport MaipES:MaipMD:MaipReindex:MaipTotal:MAIP/s");
        System.out.println("End of MongoImport " + (loadt.get()) / ((float) cptMaip.get()) + ":"
                + (mongoLoad.get()) / ((float) cptMaip.get()) + ":" + (stop - start) / ((float) cptMaip.get()) + ":"
                + (loadt.get() + mongoLoad.get() + stop - start) / ((float) cptMaip.get()) + ":"
                + (cptMaip.get() / ((double) (loadt.get() + mongoLoad.get() + stop - start))));
    }

    private static class MongoRunImport implements Runnable {
        private File file;

        @Override
        public void run() {
            final long date11 = System.currentTimeMillis();
            System.out.println("Launch MongoImport");
            final Runtime runtime = Runtime.getRuntime();
            final String cmd = commandMongo + " -h " + host + " -d VitamLinks -c DAip --upsert --file "
                    + file.getAbsolutePath();
            System.out.println("after removing index, start cmd: " + cmd);
            final Process process;
            try {
                process = runtime.exec(cmd);
            } catch (final IOException e1) {
                // TODO Auto-generated catch block
                LOGGER.error(e1);
                return;
            }
            // Consommation de la sortie standard de l'application externe dans un Thread separe
            new Thread() {
                @Override
                public void run() {
                    try {
                        final BufferedReader reader = new BufferedReader(
                                new InputStreamReader(process.getInputStream()));
                        String line = "";
                        try {
                            while ((line = reader.readLine()) != null) {
                                System.out.println(line);
                                // Traitement du flux de sortie de l'application si besoin est
                            }
                        } finally {
                            reader.close();
                        }
                    } catch (final IOException ioe) {
                        LOGGER.error(ioe);
                    }
                }
            }.start();

            // Consommation de la sortie d'erreur de l'application externe dans un Thread separe
            new Thread() {
                @Override
                public void run() {
                    try {
                        final BufferedReader reader = new BufferedReader(
                                new InputStreamReader(process.getErrorStream()));
                        String line = "";
                        try {
                            while ((line = reader.readLine()) != null) {
                                System.err.println(line);
                                // Traitement du flux d'erreur de l'application si besoin est
                            }
                        } finally {
                            reader.close();
                        }
                    } catch (final IOException ioe) {
                        LOGGER.error(ioe);
                    }
                }
            }.start();
            try {
                process.waitFor();
            } catch (final InterruptedException e) {
                LOGGER.error(e);
            }
            final long date12 = System.currentTimeMillis();
            mongoLoad.addAndGet(date12 - date11);
        }
    }
}