Example usage for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:com.google.cloud.hadoop.fs.gcs.ListHelperGoogleHadoopFileSystem.java

License:Open Source License

/**
 * Factory method for constructing and initializing an instance of
 * ListHelperGoogleHadoopFileSystem which is ready to list/get FileStatus entries corresponding
 * to {@code fileInfos}.// ww  w .  ja  va2 s.  com
 */
public static GoogleHadoopFileSystem createInstance(GoogleCloudStorageFileSystem gcsfs,
        Collection<FileInfo> fileInfos) throws IOException {
    Preconditions.checkState(!fileInfos.isEmpty(),
            "Cannot construct ListHelperGoogleHadoopFileSystem with empty fileInfos list!");
    List<GoogleCloudStorageItemInfo> infos = new ArrayList<>();
    URI rootUri = null;

    Set<URI> providedPaths = new HashSet<>();
    for (FileInfo info : fileInfos) {
        infos.add(info.getItemInfo());
        providedPaths.add(info.getPath());

        if (rootUri == null) {
            // Set the root URI to the first path in the collection.
            rootUri = info.getPath();
        }
    }

    // The flow for populating this doesn't bother to populate metadata entries for parent
    // directories but we know the parent directories are expected to exist, so we'll just
    // populate the missing entries explicitly here. Necessary for getFileStatus(parentOfInfo)
    // to work when using an instance of this class.
    for (FileInfo info : fileInfos) {
        URI parentPath = gcsfs.getParentPath(info.getPath());
        while (parentPath != null && !parentPath.equals(GoogleCloudStorageFileSystem.GCS_ROOT)) {
            if (!providedPaths.contains(parentPath)) {
                LOG.debug("Adding fake entry for missing parent path '{}'", parentPath);
                GoogleCloudStorageItemInfo fakeInfo = new GoogleCloudStorageItemInfo(
                        gcsfs.getPathCodec().validatePathAndGetId(parentPath, true), 0, 0, null, null);
                infos.add(fakeInfo);
                providedPaths.add(parentPath);
            }
            parentPath = gcsfs.getParentPath(parentPath);
        }
    }

    // Add in placeholder bucket info, since the bucket info won't be relevant for our listObject
    // operations.
    String tempBucket = rootUri.getAuthority();
    infos.add(new GoogleCloudStorageItemInfo(new StorageResourceId(tempBucket), 0, 0, "", ""));

    MetadataReadOnlyGoogleCloudStorage tempGcs = new MetadataReadOnlyGoogleCloudStorage(infos);
    GoogleCloudStorageFileSystem tempGcsFs = new GoogleCloudStorageFileSystem(tempGcs, gcsfs.getOptions());
    GoogleHadoopFileSystem tempGhfs = new ListHelperGoogleHadoopFileSystem(tempGcsFs);

    Configuration tempConfig = new Configuration();
    tempConfig.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, tempBucket);
    tempConfig.setBoolean(GoogleHadoopFileSystemBase.GCS_CREATE_SYSTEM_BUCKET_KEY, false);
    tempConfig.set(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, "/");
    // Set initSuperclass == false to avoid screwing up FileSystem statistics.
    tempGhfs.initialize(rootUri, tempConfig, false);
    return tempGhfs;
}

From source file:com.google.mr4c.hadoop.yarn.YarnTestBinding.java

License:Open Source License

private void startMrCluster() throws IOException {
    Configuration conf = new JobConf();
    FileSystem.setDefaultUri(conf, HadoopTestUtils.getTestDFS().getUri());
    conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
    conf.setBoolean(JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS, true);
    String addr = MiniYARNCluster.getHostname() + ":0";
    conf.set(YarnConfiguration.RM_ADDRESS, addr);
    conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, addr);
    m_mrCluster = MiniMRClientClusterFactory.create(HadoopTestUtils.class, "MR4CTests", 1, // num node managers
            conf);//  w  w w  .j  a va2 s. co m

    // make sure startup is finished
    for (int i = 0; i < 60; i++) {
        String newAddr = m_mrCluster.getConfig().get(YarnConfiguration.RM_ADDRESS);
        if (newAddr.equals(addr)) {
            s_log.warn("MiniYARNCluster startup not complete");
            try {
                Thread.sleep(1000);
            } catch (InterruptedException ie) {
                throw new IOException(ie);
            }
        } else {
            s_log.info("MiniYARNCluster now available at {}", newAddr);
            return;
        }
    }
    throw new IOException("MiniYARNCluster taking too long to startup");

}

From source file:com.gsinnovations.howdah.AbstractJob.java

License:Apache License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*from ww  w .ja v  a 2s .  com*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setJobName(getCustomJobName(job, mapper, reducer));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration thisConf = new Configuration(cf);
    if (compressIn) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true);
    }// w w  w  .  j a v  a2s  . c  om

    if (compressOut) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true);
    }
    Path pathIn = new Path(TEST_ROOT_DIR + "/in");
    Path pathOut = new Path(TEST_ROOT_DIR + "/out");
    localFs.delete(pathIn, true);
    localFs.delete(pathOut, true);
    writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n");
    writeFile(makeFileName("in/part2", compressIn), "more test");
    Job job = new Job(thisConf, "word count");
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (compressOut) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class);
    }
    FileInputFormat.addInputPath(job, pathIn);
    FileOutputFormat.setOutputPath(job, pathOut);
    job.submit();
    assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked());
    assertTrue(job.waitForCompletion(false));
    String result = readFile(makeFileName("out/part-r-00000", compressOut));
    System.out.println(result);
    assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result);
}

From source file:com.hortonworks.minicluster.MiniHadoopCluster.java

License:Apache License

/**
  */* ww  w . j  a  va2 s. c  om*/
  */
@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true);
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    this.addService(new ResourceManagerWrapper());
    for (int index = 0; index < this.nodeManagers.length; index++) {
        this.nodeManagers[index] = new ShortCircuitedNodeManager();
        this.addService(new NodeManagerWrapper(index));
    }
    super.serviceInit(conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf));
}

From source file:com.huayu.metis.flume.sink.hdfs.BucketWriter.java

License:Apache License

/**
 * open() is called by append()/*from  w  ww . ja  v  a  2 s.  c  om*/
 *
 * @throws java.io.IOException
 * @throws InterruptedException
 */
private void open() throws IOException, InterruptedException {
    if ((filePath == null) || (writer == null)) {
        throw new IOException("Invalid file settings");
    }

    final Configuration config = new Configuration();
    // disable FileSystem JVM shutdown hook
    config.setBoolean("fs.automatic.close", false);

    // Hadoop is not thread safe when doing certain RPC operations,
    // including getFileSystem(), when running under Kerberos.
    // open() must be called by one thread at a time in the JVM.
    // NOTE: tried synchronizing on the underlying Kerberos principal previously
    // which caused deadlocks. See FLUME-1231.
    synchronized (staticLock) {
        checkAndThrowInterruptedException();

        try {
            long counter = fileExtensionCounter.incrementAndGet();
            //??
            String fullFileName = fileName + "." + counter + "." + fileSuffix;
            //?
            bucketPath = filePath + "/" + fullFileName + inUseSuffix;
            //??
            targetPath = filePath + "/" + fullFileName;

            LOG.debug("[hy]Creating BuckPath:" + bucketPath + ", TargetPath:" + targetPath);
            callWithTimeout(new CallRunner<Void>() {
                @Override
                public Void call() throws Exception {
                    // Need to get reference to FS using above config before underlying
                    // writer does in order to avoid shutdown hook & IllegalStateExceptions
                    fileSystem = new Path(bucketPath).getFileSystem(config);
                    writer.open(bucketPath);
                    return null;
                }
            });
        } catch (Exception ex) {
            sinkCounter.incrementConnectionFailedCount();
            if (ex instanceof IOException) {
                throw (IOException) ex;
            } else {
                throw Throwables.propagate(ex);
            }
        }
    }
    sinkCounter.incrementConnectionCreatedCount();
    resetCounters();

    // if time-based rolling is enabled, schedule the roll
    if (rollInterval > 0) {
        Callable<Void> action = new Callable<Void>() {
            public Void call() throws Exception {
                LOG.debug("Rolling file ({}): Roll scheduled after {} sec elapsed.", bucketPath, rollInterval);
                try {
                    // Roll the file and remove reference from sfWriters map.
                    close(true);
                } catch (Throwable t) {
                    LOG.error("Unexpected error", t);
                }
                return null;
            }
        };
        timedRollFuture = timedRollerPool.schedule(action, rollInterval, TimeUnit.SECONDS);
    }

    isOpen = true;
}

From source file:com.idvp.platform.hdfs.BucketWriter.java

License:Apache License

/**
 * open() is called by append()/*from   w w w .j a  v a  2  s.  c om*/
 *
 * @throws IOException
 * @throws InterruptedException
 */
private void open() throws IOException, InterruptedException {
    if ((filePath == null) || (writer == null)) {
        throw new IOException("Invalid file settings");
    }

    final Configuration config = new Configuration();
    // disable FileSystem JVM shutdown hook
    config.setBoolean("fs.automatic.close", false);

    // Hadoop is not thread safe when doing certain RPC operations,
    // including getFileSystem(), when running under Kerberos.
    // open() must be called by one thread at a time in the JVM.
    // NOTE: tried synchronizing on the underlying Kerberos principal previously
    // which caused deadlocks. See FLUME-1231.
    synchronized (staticLock) {
        checkAndThrowInterruptedException();

        try {
            long counter = fileExtensionCounter.incrementAndGet();

            String fullFileName = fileName + "." + counter;

            if (fileSuffix != null && fileSuffix.length() > 0) {
                fullFileName += fileSuffix;
            }

            bucketPath = filePath + "/" + inUsePrefix + fullFileName + inUseSuffix;
            targetPath = filePath + "/" + fullFileName;

            LOG.info("Creating " + bucketPath);
            callWithTimeout((CallRunner<Void>) () -> {

                // Need to get reference to FS using above config before underlying
                // writer does in order to avoid shutdown hook &
                // IllegalStateExceptions
                if (!mockFsInjected) {
                    fileSystem = new Path(bucketPath).getFileSystem(config);
                }
                writer.open(bucketPath);

                return null;
            });
        } catch (Exception ex) {
            if (ex instanceof IOException) {
                throw (IOException) ex;
            } else {
                throw Throwables.propagate(ex);
            }
        }
    }
    isClosedMethod = getRefIsClosed();
    resetCounters();

    // if time-based rolling is enabled, schedule the roll
    if (rollInterval > 0) {
        Callable<Void> action = new Callable<Void>() {
            public Void call() throws Exception {
                LOG.debug("Rolling file ({}): Roll scheduled after {} sec elapsed.", bucketPath, rollInterval);
                try {
                    // Roll the file and remove reference from sfWriters map.
                    close(true);
                } catch (Throwable t) {
                    LOG.error("Unexpected error", t);
                }
                return null;
            }
        };
        timedRollFuture = timedRollerPool.schedule(action, rollInterval, TimeUnit.SECONDS);
    }

    isOpen = true;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.VerySimpleLocalExample.java

License:Apache License

@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" })
@Test//w  ww  . j  a  v a  2  s.co  m
public void test_localHadoopLaunch()
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {

    // 0) Setup the temp dir 
    final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;
    //final Path tmp_path = FileContext.getLocalFSFileContext().makeQualified(new Path(temp_dir));
    final Path tmp_path2 = FileContext.getLocalFSFileContext()
            .makeQualified(new Path(temp_dir + "/tmp_output"));
    try {
        FileContext.getLocalFSFileContext().delete(tmp_path2, true);
    } catch (Exception e) {
    } // (just doesn't exist yet)

    // 1) Setup config with local mode
    final Configuration config = new Configuration();
    config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
    config.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    config.set("mapred.job.tracker", "local");
    config.set("fs.defaultFS", "local");
    config.unset("mapreduce.framework.name");

    // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place
    config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");

    // 2) Build job and do more setup using the Job API
    //TODO: not sure why this is deprecated, it doesn't seem to be in v1? We do need to move to JobConf at some point, but I ran into some 
    // issues when trying to do everything I needed to for V1, so seems expedient to start here and migrate away
    final Job hj = new Job(config); // (NOTE: from here, changes to config are ignored)

    // Input format:
    //TOOD: fails because of guava issue, looks like we'll need to move to 2.7 and check it works with 2.5.x server?
    //TextInputFormat.addInputPath(hj, tmp_path);
    //hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName ("org.apache.hadoop.mapreduce.lib.input.TextInputFormat"));
    hj.setInputFormatClass(TestInputFormat.class);

    // Output format:
    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
            .forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"));
    TextOutputFormat.setOutputPath(hj, tmp_path2);

    // Mapper etc (combiner/reducer are similar)
    hj.setMapperClass(TestMapper.class);
    hj.setOutputKeyClass(Text.class);
    hj.setOutputValueClass(Text.class);
    hj.setNumReduceTasks(0); // (disable reducer for now)

    hj.setJar("test");

    try {
        hj.submit();
    } catch (UnsatisfiedLinkError e) {
        throw new RuntimeException(
                "This is a windows/hadoop compatibility problem - adding the hadoop-commons in the misc_test_assets subdirectory to the top of the classpath should resolve it (and does in V1), though I haven't yet made that work with Aleph2",
                e);
    }
    //hj.getJobID().toString();
    while (!hj.isComplete()) {
        Thread.sleep(1000);
    }
    assertTrue("Finished successfully", hj.isSuccessful());
}

From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
private String runHadoopJob(CustomMapReduceJobPojo job, String tempJarLocation)
        throws IOException, SAXException, ParserConfigurationException {
    StringWriter xml = new StringWriter();
    createConfigXML(xml, job.jobtitle, job.inputCollection,
            getQueryOrProcessing(job.query, QuerySpec.INPUTFIELDS), job.isCustomTable, job.getOutputDatabase(),
            job._id.toString(), job.outputCollectionTemp, job.mapper, job.reducer, job.combiner,
            getQueryOrProcessing(job.query, QuerySpec.QUERY), job.communityIds, job.outputKey, job.outputValue,
            job.arguments);/* w  ww.  ja  va 2s  . c o  m*/

    ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();

    URLClassLoader child = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
            savedClassLoader);
    Thread.currentThread().setContextClassLoader(child);

    // Now load the XML into a configuration object: 
    Configuration config = new Configuration();

    try {
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(new ByteArrayInputStream(xml.toString().getBytes()));
        NodeList nList = doc.getElementsByTagName("property");

        for (int temp = 0; temp < nList.getLength(); temp++) {
            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                Element eElement = (Element) nNode;
                String name = getTagValue("name", eElement);
                String value = getTagValue("value", eElement);
                if ((null != name) && (null != value)) {
                    config.set(name, value);
                }
            }
        }
    } catch (Exception e) {
        throw new IOException(e.getMessage());
    }

    // Now run the JAR file
    try {

        config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
        if (bLocalMode) {
            config.set("mapred.job.tracker", "local");
            config.set("fs.default.name", "local");
        } else {
            String trackerUrl = HadoopUtils.getXMLProperty(
                    prop_custom.getHadoopConfigPath() + "/hadoop/mapred-site.xml", "mapred.job.tracker");
            String fsUrl = HadoopUtils.getXMLProperty(
                    prop_custom.getHadoopConfigPath() + "/hadoop/core-site.xml", "fs.default.name");
            config.set("mapred.job.tracker", trackerUrl);
            config.set("fs.default.name", fsUrl);
        }

        Job hj = new Job(config);

        Class<?> classToLoad = Class.forName(job.mapper, true, child);
        hj.setJarByClass(classToLoad);
        hj.setInputFormatClass((Class<? extends InputFormat>) Class
                .forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, child));
        if ((null != job.exportToHdfs) && job.exportToHdfs) {
            hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
                    .forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", true, child));
            Path outPath = this.ensureOutputDirectory(job);
            SequenceFileOutputFormat.setOutputPath(hj, outPath);
        } else { // normal case, stays in MongoDB
            hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
                    .forName("com.mongodb.hadoop.MongoOutputFormat", true, child));
        }
        hj.setMapperClass((Class<? extends Mapper>) Class.forName(job.mapper, true, child));
        if ((null != job.reducer) && !job.reducer.equalsIgnoreCase("null")
                && !job.reducer.equalsIgnoreCase("none")) {
            hj.setReducerClass((Class<? extends Reducer>) Class.forName(job.reducer, true, child));
        } else {
            hj.setNumReduceTasks(0);
        }
        if ((null != job.combiner) && !job.combiner.equalsIgnoreCase("null")
                && !job.combiner.equalsIgnoreCase("none")) {
            hj.setCombinerClass((Class<? extends Reducer>) Class.forName(job.combiner, true, child));
        }
        hj.setOutputKeyClass(Class.forName(job.outputKey, true, child));
        hj.setOutputValueClass(Class.forName(job.outputValue, true, child));

        hj.setJobName(job.jobtitle);

        if (bLocalMode) {
            hj.waitForCompletion(false);
            return "local_done";
        } else {
            hj.submit();
            String jobId = hj.getJobID().toString();
            return jobId;
        }
    } catch (Exception e) {
        e.printStackTrace();
        Thread.currentThread().setContextClassLoader(savedClassLoader);
        return "Error: " + HarvestExceptionUtils.createExceptionMessage(e);
    } finally {
        Thread.currentThread().setContextClassLoader(savedClassLoader);
    }
}

From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
public String runHadoopJob(CustomMapReduceJobPojo job, String tempJarLocation)
        throws IOException, SAXException, ParserConfigurationException {
    StringWriter xml = new StringWriter();
    String outputCollection = job.outputCollectionTemp;// (non-append mode) 
    if ((null != job.appendResults) && job.appendResults)
        outputCollection = job.outputCollection; // (append mode, write directly in....)
    else if (null != job.incrementalMode)
        job.incrementalMode = false; // (not allowed to be in incremental mode and not update mode)

    createConfigXML(xml, job.jobtitle, job.inputCollection,
            InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.INPUTFIELDS),
            job.isCustomTable, job.getOutputDatabase(), job._id.toString(), outputCollection, job.mapper,
            job.reducer, job.combiner,/*www .  j  a  v  a  2 s  . c om*/
            InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.QUERY),
            job.communityIds, job.outputKey, job.outputValue, job.arguments, job.incrementalMode,
            job.submitterID, job.selfMerge, job.outputCollection, job.appendResults);

    ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();

    URLClassLoader child = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
            savedClassLoader);
    Thread.currentThread().setContextClassLoader(child);

    // Check version: for now, any infinit.e.data_model with an VersionTest class is acceptable
    boolean dataModelLoaded = true;
    try {
        URLClassLoader versionTest = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
                null);
        try {
            Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, versionTest);
        } catch (ClassNotFoundException e2) {
            //(this is fine, will use the cached version)
            dataModelLoaded = false;
        }
        if (dataModelLoaded)
            Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoVersionTest", true, versionTest);
    } catch (ClassNotFoundException e1) {
        throw new RuntimeException(
                "This JAR is compiled with too old a version of the data-model, please recompile with Jan 2014 (rc2) onwards");
    }

    // Now load the XML into a configuration object: 
    Configuration config = new Configuration();
    // Add the client configuration overrides:
    if (!bLocalMode) {
        String hadoopConfigPath = props_custom.getHadoopConfigPath() + "/hadoop/";
        config.addResource(new Path(hadoopConfigPath + "core-site.xml"));
        config.addResource(new Path(hadoopConfigPath + "mapred-site.xml"));
        config.addResource(new Path(hadoopConfigPath + "hadoop-site.xml"));
    } //TESTED

    try {
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(new ByteArrayInputStream(xml.toString().getBytes()));
        NodeList nList = doc.getElementsByTagName("property");

        for (int temp = 0; temp < nList.getLength(); temp++) {
            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                Element eElement = (Element) nNode;
                String name = getTagValue("name", eElement);
                String value = getTagValue("value", eElement);
                if ((null != name) && (null != value)) {
                    config.set(name, value);
                }
            }
        }
    } catch (Exception e) {
        throw new IOException(e.getMessage());
    }

    // Some other config defaults:
    // (not sure if these are actually applied, or derived from the defaults - for some reason they don't appear in CDH's client config)
    config.set("mapred.map.tasks.speculative.execution", "false");
    config.set("mapred.reduce.tasks.speculative.execution", "false");
    // (default security is ignored here, have it set via HADOOP_TASKTRACKER_CONF in cloudera)

    // Now run the JAR file
    try {
        BasicDBObject advancedConfigurationDbo = null;
        try {
            advancedConfigurationDbo = (null != job.query)
                    ? ((BasicDBObject) com.mongodb.util.JSON.parse(job.query))
                    : (new BasicDBObject());
        } catch (Exception e) {
            advancedConfigurationDbo = new BasicDBObject();
        }
        boolean esMode = advancedConfigurationDbo.containsField("qt") && !job.isCustomTable;
        if (esMode && !job.inputCollection.equals("doc_metadata.metadata")) {
            throw new RuntimeException(
                    "Infinit.e Queries are only supported on doc_metadata - use MongoDB queries instead.");
        }

        config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
        if (bLocalMode) { // local job tracker and FS mode
            config.set("mapred.job.tracker", "local");
            config.set("fs.default.name", "local");
        } else {
            if (bTestMode) { // run job tracker locally but FS mode remotely
                config.set("mapred.job.tracker", "local");
            } else { // normal job tracker
                String trackerUrl = HadoopUtils.getXMLProperty(
                        props_custom.getHadoopConfigPath() + "/hadoop/mapred-site.xml", "mapred.job.tracker");
                config.set("mapred.job.tracker", trackerUrl);
            }
            String fsUrl = HadoopUtils.getXMLProperty(
                    props_custom.getHadoopConfigPath() + "/hadoop/core-site.xml", "fs.default.name");
            config.set("fs.default.name", fsUrl);
        }
        if (!dataModelLoaded && !(bTestMode || bLocalMode)) { // If running distributed and no data model loaded then add ourselves
            Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
                    "infinit.e.data_model.jar", config);
            DistributedCache.addFileToClassPath(jarToCache, config);
            jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
                    "infinit.e.processing.custom.library.jar", config);
            DistributedCache.addFileToClassPath(jarToCache, config);
        } //TESTED

        // Debug scripts (only if they exist), and only in non local/test mode
        if (!bLocalMode && !bTestMode) {

            try {
                Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
                        "custom_map_error_handler.sh", config);
                config.set("mapred.map.task.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
                config.set("mapreduce.map.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
                DistributedCache.createSymlink(config);
                DistributedCache.addCacheFile(scriptToCache.toUri(), config);
            } catch (Exception e) {
            } // just carry on

            try {
                Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
                        "custom_reduce_error_handler.sh", config);
                config.set("mapred.reduce.task.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
                config.set("mapreduce.reduce.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
                DistributedCache.createSymlink(config);
                DistributedCache.addCacheFile(scriptToCache.toUri(), config);
            } catch (Exception e) {
            } // just carry on

        } //TODO (???): TOTEST

        // (need to do these 2 things here before the job is created, at which point the config class has been copied across)
        //1)
        Class<?> mapperClazz = Class.forName(job.mapper, true, child);
        if (ICustomInfiniteInternalEngine.class.isAssignableFrom(mapperClazz)) { // Special case: internal custom engine, so gets an additional integration hook
            ICustomInfiniteInternalEngine preActivities = (ICustomInfiniteInternalEngine) mapperClazz
                    .newInstance();
            preActivities.preTaskActivities(job._id, job.communityIds, config, !(bTestMode || bLocalMode));
        } //TESTED
          //2)
        if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {
            // Need to download the GridFSZip file
            try {
                Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/unbundled/",
                        "GridFSZipFile.jar", config);
                DistributedCache.addFileToClassPath(jarToCache, config);
            } catch (Throwable t) {
            } // (this is fine, will already be on the classpath .. otherwise lots of other stuff will be failing all over the place!)            
        }

        if (job.inputCollection.equals("records")) {

            InfiniteElasticsearchHadoopUtils.handleElasticsearchInput(job, config, advancedConfigurationDbo);

            //(won't run under 0.19 so running with "records" should cause all sorts of exceptions)

        } //TESTED (by hand)         

        if (bTestMode || bLocalMode) { // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place
            config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
        }

        // Manually specified caches
        List<URL> localJarCaches = InfiniteHadoopUtils.handleCacheList(advancedConfigurationDbo.get("$caches"),
                job, config, props_custom);

        Job hj = new Job(config); // (NOTE: from here, changes to config are ignored)
        try {

            if (null != localJarCaches) {
                if (bLocalMode || bTestMode) {
                    Method method = URLClassLoader.class.getDeclaredMethod("addURL", new Class[] { URL.class });
                    method.setAccessible(true);
                    method.invoke(child, localJarCaches.toArray());

                } //TOTEST (tested logically)
            }
            Class<?> classToLoad = Class.forName(job.mapper, true, child);
            hj.setJarByClass(classToLoad);

            if (job.inputCollection.equalsIgnoreCase("filesystem")) {
                String inputPath = null;
                try {
                    inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
                    if (!inputPath.endsWith("/")) {
                        inputPath = inputPath + "/";
                    }
                } catch (Exception e) {
                }
                if (null == inputPath) {
                    throw new RuntimeException("Must specify 'file.url' if reading from filesystem.");
                }
                inputPath = InfiniteHadoopUtils.authenticateInputDirectory(job, inputPath);

                InfiniteFileInputFormat.addInputPath(hj, new Path(inputPath + "*/*")); // (that extra bit makes it recursive)
                InfiniteFileInputFormat.setMaxInputSplitSize(hj, 33554432); // (32MB)
                InfiniteFileInputFormat.setInfiniteInputPathFilter(hj, config);
                hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteFileInputFormat", true, child));
            } else if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {

                String[] oidStrs = null;
                try {
                    String inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
                    Pattern oidExtractor = Pattern.compile("inf://share/([^/]+)");
                    Matcher m = oidExtractor.matcher(inputPath);
                    if (m.find()) {
                        oidStrs = m.group(1).split("\\s*,\\s*");

                    } else {
                        throw new RuntimeException(
                                "file.url must be in format inf://share/<oid-list>/<string>: " + inputPath);
                    }
                    InfiniteHadoopUtils.authenticateShareList(job, oidStrs);
                } catch (Exception e) {
                    throw new RuntimeException(
                            "Authentication error: " + e.getMessage() + ": " + advancedConfigurationDbo, e);
                }

                hj.getConfiguration().setStrings("mapred.input.dir", oidStrs);
                hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteShareInputFormat", true, child));
            } else if (job.inputCollection.equals("records")) {
                hj.setInputFormatClass((Class<? extends InputFormat>) Class
                        .forName("com.ikanow.infinit.e.data_model.custom.InfiniteEsInputFormat", true, child));
            } else {
                if (esMode) {
                    hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                            "com.ikanow.infinit.e.processing.custom.utils.InfiniteElasticsearchMongoInputFormat",
                            true, child));
                } else {
                    hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
                            "com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, child));
                }
            }
            if ((null != job.exportToHdfs) && job.exportToHdfs) {

                //TODO (INF-2469): Also, if the output key is BSON then also run as text (but output as JSON?)

                Path outPath = InfiniteHadoopUtils.ensureOutputDirectory(job, props_custom);

                if ((null != job.outputKey) && (null != job.outputValue)
                        && job.outputKey.equalsIgnoreCase("org.apache.hadoop.io.text")
                        && job.outputValue.equalsIgnoreCase("org.apache.hadoop.io.text")) {
                    // (slight hack before I sort out the horrendous job class - if key/val both text and exporting to HDFS then output as Text)
                    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
                            .forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat", true, child));
                    TextOutputFormat.setOutputPath(hj, outPath);
                } //TESTED
                else {
                    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
                            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", true, child));
                    SequenceFileOutputFormat.setOutputPath(hj, outPath);
                } //TESTED
            } else { // normal case, stays in MongoDB
                hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
                        "com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat", true, child));
            }
            hj.setMapperClass((Class<? extends Mapper>) mapperClazz);
            String mapperOutputKeyOverride = advancedConfigurationDbo.getString("$mapper_key_class", null);
            if (null != mapperOutputKeyOverride) {
                hj.setMapOutputKeyClass(Class.forName(mapperOutputKeyOverride));
            } //TESTED 

            String mapperOutputValueOverride = advancedConfigurationDbo.getString("$mapper_value_class", null);
            if (null != mapperOutputValueOverride) {
                hj.setMapOutputValueClass(Class.forName(mapperOutputValueOverride));
            } //TESTED 

            if ((null != job.reducer) && !job.reducer.startsWith("#") && !job.reducer.equalsIgnoreCase("null")
                    && !job.reducer.equalsIgnoreCase("none")) {
                hj.setReducerClass((Class<? extends Reducer>) Class.forName(job.reducer, true, child));
                // Variable reducers:
                if (null != job.query) {
                    try {
                        hj.setNumReduceTasks(advancedConfigurationDbo.getInt("$reducers", 1));
                    } catch (Exception e) {
                        try {
                            // (just check it's not a string that is a valid int)
                            hj.setNumReduceTasks(
                                    Integer.parseInt(advancedConfigurationDbo.getString("$reducers", "1")));
                        } catch (Exception e2) {
                        }
                    }
                } //TESTED
            } else {
                hj.setNumReduceTasks(0);
            }
            if ((null != job.combiner) && !job.combiner.startsWith("#")
                    && !job.combiner.equalsIgnoreCase("null") && !job.combiner.equalsIgnoreCase("none")) {
                hj.setCombinerClass((Class<? extends Reducer>) Class.forName(job.combiner, true, child));
            }
            hj.setOutputKeyClass(Class.forName(job.outputKey, true, child));
            hj.setOutputValueClass(Class.forName(job.outputValue, true, child));

            hj.setJobName(job.jobtitle);
            currJobName = job.jobtitle;
        } catch (Error e) { // (messing about with class loaders = lots of chances for errors!)
            throw new RuntimeException(e.getMessage(), e);
        }
        if (bTestMode || bLocalMode) {
            hj.submit();
            currThreadId = null;
            Logger.getRootLogger().addAppender(this);
            currLocalJobId = hj.getJobID().toString();
            currLocalJobErrs.setLength(0);
            while (!hj.isComplete()) {
                Thread.sleep(1000);
            }
            Logger.getRootLogger().removeAppender(this);
            if (hj.isSuccessful()) {
                if (this.currLocalJobErrs.length() > 0) {
                    return "local_done: " + this.currLocalJobErrs.toString();
                } else {
                    return "local_done";
                }
            } else {
                return "Error: " + this.currLocalJobErrs.toString();
            }
        } else {
            hj.submit();
            String jobId = hj.getJobID().toString();
            return jobId;
        }
    } catch (Exception e) {
        e.printStackTrace();
        Thread.currentThread().setContextClassLoader(savedClassLoader);
        return "Error: " + InfiniteHadoopUtils.createExceptionMessage(e);
    } finally {
        Thread.currentThread().setContextClassLoader(savedClassLoader);
    }
}