Java tutorial
/** * Copyright (c) 2014, Sindice Limited. All Rights Reserved. * * This file is part of the SIREn project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sindicetech.siren.demo.loader; import com.sindicetech.siren.solr.client.solrj.request.SirenUpdateRequest; import org.apache.commons.cli.*; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.codehaus.jackson.JsonParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.util.ArrayList; import java.util.List; /** * Intended to load JSON files to a Solr/SIREn instance */ @SuppressWarnings("static-access") public class Loader { private static final String COMMIT_SUCCESFULLY_EXECUTED_MSG = "commit has been succesfully executed"; private static final int MAX_BATCH_SIZE = 131072; private static final String DESCRIPTION = "\nCommand line utility for loading json files into Solr/SIREn"; private static final String USAGE_BASE = "load "; private static final char HELP_OPT = 'h'; private static final char EXT_OPT = 'e'; private static final char NO_EXT_CHECK_OPT = 'c'; private static final char BATCH_OPT = 'b'; private static final char FILENAME_AS_ID_OPT = 'g'; private static final Logger logger = LoggerFactory.getLogger(Loader.class); private static final Character INPUT_FILE_OPT = 'f'; private static final Character URL_OPT = 'u'; private static final Character COMMIT_EACH_OPT = 'o'; private static final String URL_OPT_LONG = "solr-url"; private static final String INPUT_FILE_OPT_LONG = "file"; private static final String BATCH_OPT_LONG = "batch-size"; private static final String NO_EXT_CHECK_OPT_LONG = "no-extension-check"; private static final String EXT_OPT_LONG = "files-extension"; private static final String COMMIT_EACH_LONG = "commit-after-each-file"; private static final int DEFAULT_BATCH_SIZE = 1024; private static final String DEFAULT_SOLR_URL = "http://localhost:8983/solr/"; private static final String DEFAULT_JSON_EXTENSION = "json"; private final SolrServer server; private final int batchSize; private final String jsonFilesExtension; private final boolean checkJsonExtension; private final boolean commitAfterEachFile; private final boolean filenameAsId; public Loader(HttpSolrServer solrServer, int batchSize, String jsonFilesExtension, boolean checkJsonExtension, boolean commitAfterEachDocument, boolean generateId) { this.server = solrServer; this.batchSize = batchSize; this.jsonFilesExtension = jsonFilesExtension; this.checkJsonExtension = checkJsonExtension; this.commitAfterEachFile = commitAfterEachDocument; this.filenameAsId = generateId; } /** checks parameters, instantiate loader and starts load */ public static void main(String[] args) { CommandLineParser cmdLineParser = new BasicParser(); CommandLine cmd = null; Options options = buildOptions(); try { cmd = cmdLineParser.parse(options, args); } catch (ParseException e) { showHelpExit(options); } if (cmd.hasOption(HELP_OPT)) { showExtendHelpExit(options); } List<File> filesToProcess = Loader.checkInputFilesAndFolders(cmd.getOptionValues(INPUT_FILE_OPT)); if (filesToProcess.size() == 0) { logger.error("no file to process"); System.exit(-1); } Loader loader = new Loader(new HttpSolrServer(cmd.getOptionValue(URL_OPT, DEFAULT_SOLR_URL)), Loader.retrieveAndChekBatchSize(cmd.getOptionValue(BATCH_OPT)), cmd.getOptionValue(EXT_OPT, DEFAULT_JSON_EXTENSION), !cmd.hasOption(NO_EXT_CHECK_OPT), cmd.hasOption(COMMIT_EACH_OPT), cmd.hasOption(FILENAME_AS_ID_OPT)); loader.loadFiles(filesToProcess); } private static int retrieveAndChekBatchSize(String opt) { if (opt != null) { try { int batchSize = Integer.parseInt(opt); if (batchSize <= MAX_BATCH_SIZE && batchSize > 0) { return batchSize; } else { logger.error("batch size should be positive integer <= {}, {} ignored", MAX_BATCH_SIZE, opt); } } catch (NumberFormatException e) { logger.error("batch size should be positive integer <= {}, {} ignored", MAX_BATCH_SIZE, opt); } } return DEFAULT_BATCH_SIZE; } private static List<File> checkInputFilesAndFolders(String[] optionValues) { List<File> filesToProcess = new ArrayList<File>(); for (String param : optionValues) { File fileOrDirectory = new File(param); if ((fileOrDirectory.isFile() || fileOrDirectory.isDirectory()) && fileOrDirectory.exists() && fileOrDirectory.canRead()) { filesToProcess.add(fileOrDirectory); } else { logger.error("not existing or not readable file is skipped: {}", param); } } return filesToProcess; } /** * loads files to Solr * * @param filesToProcess * - list of json files or/and folder with json files to process */ public void loadFiles(List<File> filesToProcess) { long start = System.currentTimeMillis(); int total = 0; FilenameFilter jsonFilesNamesFilter = new JsonFilesFilter(); for (File fileOrDirectory : filesToProcess) { if (fileOrDirectory.isFile()) { total += loadFile(fileOrDirectory); } else { String[] dirList = fileOrDirectory.list(jsonFilesNamesFilter); for (String fileName : dirList) { File file = new File(fileOrDirectory, fileName); // ignores nested directory if (file.isFile() && file.canRead()) { total += loadFile(file); } } } } // final commit try { server.commit(); logger.info(COMMIT_SUCCESFULLY_EXECUTED_MSG); } catch (SolrServerException e) { logger.error("error executing commit in Solr, giving up", e); throw new IllegalStateException("error executing commit in Solr, giving up", e); } catch (IOException e) { logger.error("error executing commit in Solr, giving up", e); throw new IllegalStateException("error executing commit in Solr, giving up", e); } logger.info("Total: {} object loaded in {} second", total, (System.currentTimeMillis() - start) / 1000); } private int loadFile(File file) { InputStream in = null; int counter = 0; try { in = new FileInputStream(file); JsonFileParser parser = new JsonFileParser(in); if (this.filenameAsId) { counter = loadOneDocFromFile(parser, file.getName()); } else { counter = loadMultiFromFile(parser); } if (commitAfterEachFile) { server.commit(); logger.info(COMMIT_SUCCESFULLY_EXECUTED_MSG); } logger.debug("{} objects loaded from file {}", counter, file.getName()); } catch (JsonParseException e) { logger.error("error in file {} skipping", file.getName(), e); } catch (IOException e) { logger.error("error reading file {} skipping", file.getName(), e); } catch (SolrServerException e) { logger.error("error sending documents to Solr, giving up", e); throw new IllegalStateException("error sending documents to Solr, giving up", e); } finally { if (in != null) { try { in.close(); } catch (IOException e) { logger.error("error closing file {}", file.getName()); } } } return counter; } private int loadMultiFromFile(JsonFileParser parser) throws IOException, SolrServerException { int counter = 0; while (parser.hasNext()) { counter++; SirenUpdateRequest req = new SirenUpdateRequest(parser.next().toString()); req.process(server); } return counter; } private int loadOneDocFromFile(JsonFileParser parser, String filename) throws IOException, SolrServerException { if (parser.hasNext()) { SirenUpdateRequest req = new SirenUpdateRequest(parser.next().toString()); req.process(server); if (parser.hasNext()) { throw new IllegalArgumentException("file " + filename + " contains more than one object"); } return 1; } else { return 0; } } private static void showExtendHelpExit(Options options) { System.out.println("Loader" + DESCRIPTION); HelpFormatter hf = new HelpFormatter(); hf.setWidth(100); hf.printHelp(USAGE_BASE + "<opts>", options); System.out.println("\nNote: only file or directory names option is mandatory\n" + "Examples:\n" + " " + USAGE_BASE + " -" + INPUT_FILE_OPT + " file.json export-folder\n" + " " + USAGE_BASE + " -" + BATCH_OPT + " " + 128 + " -" + INPUT_FILE_OPT + " file.json\n" + " " + USAGE_BASE + " -" + NO_EXT_CHECK_OPT + " export-directory\n"); System.exit(0); } private static void showHelpExit(Options options) { System.out.println("Loader" + DESCRIPTION + "\nsimple usage: " + USAGE_BASE + " -" + INPUT_FILE_OPT + " <file|directory> [<file|directory>] ..." + "\nPlease use -h option for extended help.\n"); System.exit(-1); } private static Options buildOptions() { Options options = new Options(); // input file name and help are exclusive OptionGroup fileHelpGroup = new OptionGroup(); fileHelpGroup.setRequired(true); fileHelpGroup .addOption(OptionBuilder.hasArgs(1).hasOptionalArgs(20).withArgName("file or/and folder name[s]") .withDescription("JSON file[s] or/and director(y|ies) with JSON files (max 20)") .withLongOpt(INPUT_FILE_OPT_LONG).isRequired().create(INPUT_FILE_OPT)); fileHelpGroup.addOption(OptionBuilder.withDescription("prints help and exits").create(HELP_OPT)); options.addOptionGroup(fileHelpGroup); options.addOption(OptionBuilder.hasArgs(1).withArgName("Solr URL") .withDescription("Solr URL (default=" + DEFAULT_SOLR_URL + ")").withLongOpt(URL_OPT_LONG) .isRequired(false).create(URL_OPT)); // options.addOption(OptionBuilder // .hasArgs(1) // .withArgName("batch size") // .withDescription( // "number of documents sent to Solr in one request, max " + MAX_BATCH_SIZE + " (default=" // + DEFAULT_BATCH_SIZE + ")").withLongOpt(BATCH_OPT_LONG).isRequired(false) // .create(BATCH_OPT)); options.addOption(OptionBuilder .withDescription("load all files in directories, not only files with JSON file extension") .withLongOpt(NO_EXT_CHECK_OPT_LONG).isRequired(false).create(NO_EXT_CHECK_OPT)); options.addOption( OptionBuilder.withDescription("JSON file extension (default=" + DEFAULT_JSON_EXTENSION + ")") .withLongOpt(EXT_OPT_LONG).isRequired(false).create(EXT_OPT)); options.addOption(OptionBuilder.withDescription("commit after each file, (default=false)") .withLongOpt(COMMIT_EACH_LONG).isRequired(false).create(COMMIT_EACH_OPT)); return options; } private class JsonFilesFilter implements FilenameFilter { @Override public boolean accept(File dir, String name) { if (name.startsWith(".")) { return false; } if (checkJsonExtension && !name.endsWith(jsonFilesExtension)) { return false; } return true; } } }