List of usage examples for org.apache.hadoop.fs FileSystem getDefaultUri
public static URI getDefaultUri(Configuration conf)
From source file:org.apache.sentry.tests.e2e.hive.fs.TestFSBase.java
License:Apache License
@BeforeClass public static void setupTestStaticConfiguration() throws Exception { useSentryService = true;/* w w w. ja va2s. c o m*/ AbstractTestWithStaticConfiguration.setupTestStaticConfiguration(); AbstractTestWithStaticConfiguration.setupAdmin(); authenticationType = System.getProperty(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.varname); LOGGER.info("authenticationType = " + authenticationType); Assume.assumeThat(hiveServer2Type, equalTo(HiveServerFactory.HiveServer2Type.UnmanagedHiveServer2)); if (DFSType.ClusterDFS.equals(DFS_TYPE) || DFSType.S3DFS.equals(DFS_TYPE)) { LOGGER.info("DFSType = " + DFS_TYPE); } else { LOGGER.warn("Incorrect DFSType " + DFS_TYPE); Assume.assumeTrue(false); } String storageUriStr = System.getProperty(TestFSContants.SENTRY_E2E_TEST_STORAGE_URI); if (!Strings.isNullOrEmpty(storageUriStr)) { storageUriStr = storageUriStr.toLowerCase(); if (storageUriStr.startsWith("hdfs") || storageUriStr.startsWith("file")) { storageDFSType = DFSType.ClusterDFS; } else if (storageUriStr.startsWith("s3a")) { storageDFSType = DFSType.S3DFS; } } storageFileSystem = fileSystem; if (storageDFSType.equals(DFSType.ClusterDFS)) { // hdfs cluster defaultStorageUri = FileSystem.getDefaultUri(fileSystem.getConf()); } else { // non-hdfs file sytem must specify defaultStorageUri if (!Strings.isNullOrEmpty(storageUriStr)) { defaultStorageUri = URI.create(storageUriStr); } else { LOGGER.warn( "Skipping test: Unknown sentry.e2etest.storage.uri, " + "for example, s3a://bucketname"); Assume.assumeTrue(false); } LOGGER.info("defaultStorageUri = " + defaultStorageUri.toString()); if (storageDFSType.equals(DFSType.S3DFS)) { // currently defaultFS = s3a doesn't work for NN // needs to explicitly specify s3a's defaultUri String accessKey = System.getProperty(TestFSContants.S3A_ACCESS_KEY, hiveServer.getProperty(TestFSContants.S3A_ACCESS_KEY)); String secretKey = System.getProperty(TestFSContants.S3A_SECRET_KEY, hiveServer.getProperty(TestFSContants.S3A_SECRET_KEY)); LOGGER.info("accessKey = " + accessKey); LOGGER.info("secretKey = " + secretKey); Assume.assumeTrue(Strings.isNullOrEmpty(accessKey) == false); Assume.assumeTrue(Strings.isNullOrEmpty(secretKey) == false); Configuration conf = new Configuration(); conf.set(TestFSContants.S3A_ACCESS_KEY, accessKey); conf.set(TestFSContants.S3A_SECRET_KEY, secretKey); storageFileSystem = new S3AFileSystem(); Assume.assumeNotNull(storageFileSystem); LOGGER.info("Configuring S3DFS defaultStorageUri = " + defaultStorageUri.toString()); storageFileSystem.initialize(defaultStorageUri, conf); } /* else if (DFS_TYPE.equals(DFSType.MSAZUREDFS)) { } */ } // Get warehouse dir from hite-site.xml conf file StrWarehouseDirFromConfFile = hiveServer.getOrgWarehouseDir(); }
From source file:org.apache.sentry.tests.e2e.hive.hiveserver.HiveServerFactory.java
License:Apache License
public static HiveServer create(HiveServer2Type type, Map<String, String> properties, File baseDir, File confDir, File logDir, String policyFile, FileSystem fileSystem) throws Exception { if (type.equals(HiveServer2Type.UnmanagedHiveServer2)) { LOGGER.info("Creating UnmanagedHiveServer"); return new UnmanagedHiveServer(); }/* w ww .j a v a 2 s . com*/ if (!properties.containsKey(WAREHOUSE_DIR)) { LOGGER.info("fileSystem " + fileSystem.getClass().getSimpleName()); if (fileSystem instanceof DistributedFileSystem) { @SuppressWarnings("static-access") String dfsUri = fileSystem.getDefaultUri(fileSystem.getConf()).toString(); LOGGER.info("dfsUri " + dfsUri); properties.put(WAREHOUSE_DIR, dfsUri + "/data"); fileSystem.mkdirs(new Path("/data/"), new FsPermission((short) 0777)); } else { properties.put(WAREHOUSE_DIR, new File(baseDir, "warehouse").getPath()); fileSystem.mkdirs(new Path("/", "warehouse"), new FsPermission((short) 0777)); } } Boolean policyOnHDFS = Boolean.valueOf(System.getProperty("sentry.e2etest.policyonhdfs", "false")); if (policyOnHDFS) { // Initialize "hive.exec.scratchdir", according the description of // "hive.exec.scratchdir", the permission should be (733). // <description>HDFS root scratch dir for Hive jobs which gets created with write // all (733) permission. For each connecting user, an HDFS scratch dir: // ${hive.exec.scratchdir}/<username> is created, // with ${hive.scratch.dir.permission}.</description> fileSystem.mkdirs(new Path("/tmp/hive/")); fileSystem.setPermission(new Path("/tmp/hive/"), new FsPermission((short) 0733)); } else { LOGGER.info("Setting an readable path to hive.exec.scratchdir"); properties.put("hive.exec.scratchdir", new File(baseDir, "scratchdir").getPath()); } if (!properties.containsKey(METASTORE_CONNECTION_URL)) { properties.put(METASTORE_CONNECTION_URL, String.format("jdbc:derby:;databaseName=%s;create=true;createDatabaseIfNotExist=true", new File(baseDir, "metastore").getPath())); properties.put("datanucleus.schema.autoCreateTables", "true"); } if (!properties.containsKey(ACCESS_TESTING_MODE)) { properties.put(ACCESS_TESTING_MODE, "true"); } if (!properties.containsKey(AUTHZ_PROVIDER_RESOURCE)) { LOGGER.info("Policy File location: " + policyFile); properties.put(AUTHZ_PROVIDER_RESOURCE, policyFile); } if (!properties.containsKey(AUTHZ_PROVIDER)) { properties.put(AUTHZ_PROVIDER, LocalGroupResourceAuthorizationProvider.class.getName()); } if (!properties.containsKey(AUTHZ_SERVER_NAME)) { properties.put(AUTHZ_SERVER_NAME, DEFAULT_AUTHZ_SERVER_NAME); } if (!properties.containsKey(HS2_PORT)) { properties.put(HS2_PORT, String.valueOf(findPort())); } if (!properties.containsKey(SUPPORT_CONCURRENCY)) { properties.put(SUPPORT_CONCURRENCY, "false"); } if (!properties.containsKey(HADOOPBIN)) { properties.put(HADOOPBIN, "./target/test-classes/hadoop"); } // Modify the test resource to have executable permission java.nio.file.Path hadoopPath = FileSystems.getDefault().getPath("target/test-classes", "hadoop"); if (hadoopPath != null) { hadoopPath.toFile().setExecutable(true); } if (HiveServer2Type.InternalMetastore.equals(type)) { // The configuration sentry.metastore.service.users is for the user who // has all access to get the metadata. properties.put(METASTORE_BYPASS, "accessAllMetaUser"); if (!properties.containsKey(METASTORE_URI)) { properties.put(METASTORE_URI, "thrift://localhost:" + String.valueOf(findPort())); } if (!properties.containsKey(METASTORE_HOOK)) { properties.put(METASTORE_HOOK, "org.apache.sentry.binding.metastore.MetastoreAuthzBinding"); } properties.put(ConfVars.METASTORESERVERMINTHREADS.varname, "5"); } properties.put(ConfVars.HIVE_AUTHORIZATION_ENABLED.varname, "true"); properties.put(ConfVars.HIVE_AUTHORIZATION_MANAGER.varname, SentryHiveAuthorizerFactory.class.getName()); // CBO has a bug on Hive 2.0.0 with VIEWS because ReadIdentity objects are sent without // parent information for partitioned columns properties.put(ConfVars.HIVE_CBO_ENABLED.varname, "false"); // Hive 2.x set the following configuration to TRUE by default and it causes test issues on // Sentry because we're trying to change columns with different column types properties.put(ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false"); // This configuration will avoid starting the HS2 WebUI that was causing test failures when // HS2 is configured for concurrency properties.put(ConfVars.HIVE_IN_TEST.varname, "true"); // This configuration is used by SentryHiveAuthorizerFactory to change the client type // to HIVESERVER2 if we're using the authorization V2 in test mode. properties.put(ConfVars.HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE.varname, "true"); // Sets the hadoop temporary directory specified by the java.io.tmpdir (already set to the // maven build directory to avoid writing to the /tmp directly String hadoopTempDir = System.getProperty("java.io.tmpdir") + File.separator + "hadoop-tmp"; properties.put("hadoop.tmp.dir", hadoopTempDir); // This configuration will avoid that the HMS fails if the metastore schema has not version // information. For some reason, HMS does not set a version initially on our tests. properties.put(ConfVars.METASTORE_SCHEMA_VERIFICATION.varname, "false"); // Disable join cartesian checks to allow Sentry tests to pass properties.put(ConfVars.HIVE_STRICT_CHECKS_CARTESIAN.varname, "false"); // Disable capability checks (these checks do not work when Hive is in testing mode) properties.put(ConfVars.METASTORE_CAPABILITY_CHECK.varname, "false"); if (!properties.containsKey(METASTORE_BYPASS)) { properties.put(METASTORE_BYPASS, "hive,impala," + System.getProperty("user.name", "")); } else { String tempByPass = properties.get(METASTORE_BYPASS); tempByPass = "hive,impala," + System.getProperty("user.name", "") + "," + tempByPass; properties.put(METASTORE_BYPASS, tempByPass); } if (!properties.containsKey(HiveAuthzConf.AuthzConfVars.AUTHZ_SERVER_NAME.getVar())) { properties.put(HiveAuthzConf.AuthzConfVars.AUTHZ_SERVER_NAME.getVar(), "server1"); } properties.put(METASTORE_SETUGI, "true"); properties.put(METASTORE_CLIENT_TIMEOUT, "100"); properties.put(ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS.varname, "true"); properties.put(ConfVars.HIVESTATSAUTOGATHER.varname, "false"); properties.put(ConfVars.HIVE_STATS_COLLECT_SCANCOLS.varname, "true"); String hadoopBinPath = properties.get(HADOOPBIN); Assert.assertNotNull(hadoopBinPath, "Hadoop Bin"); File hadoopBin = new File(hadoopBinPath); if (!hadoopBin.isFile()) { Assert.fail("Path to hadoop bin " + hadoopBin.getPath() + " is invalid. " + "Perhaps you missed the download-hadoop profile."); } /* * This hack, setting the hiveSiteURL field removes a previous hack involving * setting of system properties for each property. Although both are hacks, * I prefer this hack because once the system properties are set they can * affect later tests unless those tests clear them. This hack allows for * a clean switch to a new set of defaults when a new HiveConf object is created. */ Reflection.staticField("hiveSiteURL").ofType(URL.class).in(HiveConf.class).set(null); HiveConf hiveConf = new HiveConf(); HiveAuthzConf authzConf = new HiveAuthzConf(Resources.getResource("sentry-site.xml")); for (Map.Entry<String, String> entry : properties.entrySet()) { LOGGER.info(entry.getKey() + " => " + entry.getValue()); hiveConf.set(entry.getKey(), entry.getValue()); authzConf.set(entry.getKey(), entry.getValue()); } File hiveSite = new File(confDir, "hive-site.xml"); File accessSite = new File(confDir, HiveAuthzConf.AUTHZ_SITE_FILE); OutputStream out = new FileOutputStream(accessSite); authzConf.writeXml(out); out.close(); // points hive-site.xml at access-site.xml hiveConf.set(HiveAuthzConf.HIVE_SENTRY_CONF_URL, "file:///" + accessSite.getPath()); if (!properties.containsKey(HiveConf.ConfVars.HIVE_SERVER2_SESSION_HOOK.varname)) { hiveConf.set(HiveConf.ConfVars.HIVE_SERVER2_SESSION_HOOK.varname, "org.apache.sentry.binding.hive.HiveAuthzBindingSessionHook"); } hiveConf.set(HIVESERVER2_IMPERSONATION, "false"); out = new FileOutputStream(hiveSite); hiveConf.writeXml(out); out.close(); Reflection.staticField("hiveSiteURL").ofType(URL.class).in(HiveConf.class).set(hiveSite.toURI().toURL()); switch (type) { case EmbeddedHiveServer2: LOGGER.info("Creating EmbeddedHiveServer"); return new EmbeddedHiveServer(); case InternalHiveServer2: LOGGER.info("Creating InternalHiveServer"); return new InternalHiveServer(hiveConf); case InternalMetastore: LOGGER.info("Creating InternalMetastoreServer"); return new InternalMetastoreServer(hiveConf); case ExternalHiveServer2: LOGGER.info("Creating ExternalHiveServer"); return new ExternalHiveServer(hiveConf, confDir, logDir); default: throw new UnsupportedOperationException(type.name()); } }
From source file:org.apache.sentry.tests.e2e.hiveserver.HiveServerFactory.java
License:Apache License
private static HiveServer create(HiveServer2Type type, Map<String, String> properties, File baseDir, File confDir, File logDir, File policyFile, FileSystem fileSystem) throws Exception { if (!properties.containsKey(WAREHOUSE_DIR)) { LOGGER.error("fileSystem " + fileSystem.getClass().getSimpleName()); if (fileSystem instanceof DistributedFileSystem) { @SuppressWarnings("static-access") String dfsUri = fileSystem.getDefaultUri(fileSystem.getConf()).toString(); LOGGER.error("dfsUri " + dfsUri); properties.put(WAREHOUSE_DIR, dfsUri + "/data"); } else {//from w w w . j a v a 2s . c o m properties.put(WAREHOUSE_DIR, new File(baseDir, "warehouse").getPath()); } } if (!properties.containsKey(METASTORE_CONNECTION_URL)) { properties.put(METASTORE_CONNECTION_URL, String.format("jdbc:derby:;databaseName=%s;create=true", new File(baseDir, "metastore").getPath())); } if (policyFile.exists()) { LOGGER.info("Policy file " + policyFile + " exists"); } else { LOGGER.info("Creating policy file " + policyFile); FileOutputStream to = new FileOutputStream(policyFile); Resources.copy(Resources.getResource(AUTHZ_PROVIDER_FILENAME), to); to.close(); } if (!properties.containsKey(ACCESS_TESTING_MODE)) { properties.put(ACCESS_TESTING_MODE, "true"); } if (!properties.containsKey(AUTHZ_PROVIDER_RESOURCE)) { properties.put(AUTHZ_PROVIDER_RESOURCE, policyFile.getPath()); } if (!properties.containsKey(AUTHZ_PROVIDER)) { properties.put(AUTHZ_PROVIDER, LocalGroupResourceAuthorizationProvider.class.getName()); } if (!properties.containsKey(AUTHZ_SERVER_NAME)) { properties.put(AUTHZ_SERVER_NAME, DEFAULT_AUTHZ_SERVER_NAME); } if (!properties.containsKey(HS2_PORT)) { properties.put(HS2_PORT, String.valueOf(findPort())); } if (!properties.containsKey(SUPPORT_CONCURRENCY)) { properties.put(SUPPORT_CONCURRENCY, "false"); } if (!properties.containsKey(HADOOPBIN)) { properties.put(HADOOPBIN, "./target/hadoop/bin/hadoop"); } String hadoopBinPath = properties.get(HADOOPBIN); Assert.assertNotNull(hadoopBinPath, "Hadoop Bin"); File hadoopBin = new File(hadoopBinPath); if (!hadoopBin.isFile()) { Assert.fail("Path to hadoop bin " + hadoopBin.getPath() + "is invalid. " + "Perhaps you missed the download-hadoop profile."); } /* * This hack, setting the hiveSiteURL field removes a previous hack involving * setting of system properties for each property. Although both are hacks, * I prefer this hack because once the system properties are set they can * affect later tests unless those tests clear them. This hack allows for * a clean switch to a new set of defaults when a new HiveConf object is created. */ Reflection.staticField("hiveSiteURL").ofType(URL.class).in(HiveConf.class).set(null); HiveConf hiveConf = new HiveConf(); HiveAuthzConf authzConf = new HiveAuthzConf(Resources.getResource("sentry-site.xml")); for (Map.Entry<String, String> entry : properties.entrySet()) { LOGGER.info(entry.getKey() + " => " + entry.getValue()); hiveConf.set(entry.getKey(), entry.getValue()); authzConf.set(entry.getKey(), entry.getValue()); } File hiveSite = new File(confDir, "hive-site.xml"); File accessSite = new File(confDir, HiveAuthzConf.AUTHZ_SITE_FILE); OutputStream out = new FileOutputStream(accessSite); authzConf.writeXml(out); out.close(); // points hive-site.xml at access-site.xml hiveConf.set(HiveAuthzConf.HIVE_ACCESS_CONF_URL, accessSite.toURI().toURL().toExternalForm()); if (!properties.containsKey(HiveConf.ConfVars.HIVE_SERVER2_SESSION_HOOK.varname)) { hiveConf.set(HiveConf.ConfVars.HIVE_SERVER2_SESSION_HOOK.varname, "org.apache.sentry.binding.hive.HiveAuthzBindingSessionHook"); } out = new FileOutputStream(hiveSite); hiveConf.writeXml(out); out.close(); Reflection.staticField("hiveSiteURL").ofType(URL.class).in(HiveConf.class).set(hiveSite.toURI().toURL()); switch (type) { case EmbeddedHiveServer2: LOGGER.info("Creating EmbeddedHiveServer"); return new EmbeddedHiveServer(); case InternalHiveServer2: LOGGER.info("Creating InternalHiveServer"); return new InternalHiveServer(hiveConf); case ExternalHiveServer2: LOGGER.info("Creating ExternalHiveServer"); return new ExternalHiveServer(hiveConf, confDir, logDir); case UnmanagedHiveServer2: LOGGER.info("Creating UnmanagedHiveServer"); return new UnmanagedHiveServer(hiveConf); default: throw new UnsupportedOperationException(type.name()); } }
From source file:org.apache.storm.hdfs.security.AutoHDFS.java
License:Apache License
@SuppressWarnings("unchecked") private byte[] getHadoopCredentials(Map<String, Object> conf, final Configuration configuration) { try {/* w ww.j a v a 2 s.co m*/ if (UserGroupInformation.isSecurityEnabled()) { login(configuration); final String topologySubmitterUser = (String) conf.get(Config.TOPOLOGY_SUBMITTER_PRINCIPAL); final URI nameNodeURI = conf.containsKey(TOPOLOGY_HDFS_URI) ? new URI(conf.get(TOPOLOGY_HDFS_URI).toString()) : FileSystem.getDefaultUri(configuration); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); final UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(topologySubmitterUser, ugi); Credentials creds = (Credentials) proxyUser.doAs(new PrivilegedAction<Object>() { @Override public Object run() { try { FileSystem fileSystem = FileSystem.get(nameNodeURI, configuration); Credentials credential = proxyUser.getCredentials(); if (configuration.get(STORM_USER_NAME_KEY) == null) { configuration.set(STORM_USER_NAME_KEY, hdfsPrincipal); } fileSystem.addDelegationTokens(configuration.get(STORM_USER_NAME_KEY), credential); LOG.info("Delegation tokens acquired for user {}", topologySubmitterUser); return credential; } catch (IOException e) { throw new RuntimeException(e); } } }); ByteArrayOutputStream bao = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream(bao); creds.write(out); out.flush(); out.close(); return bao.toByteArray(); } else { throw new RuntimeException("Security is not enabled for HDFS"); } } catch (Exception ex) { throw new RuntimeException("Failed to get delegation tokens.", ex); } }
From source file:org.apache.storm.hdfs.security.AutoHDFSNimbus.java
License:Apache License
@SuppressWarnings("unchecked") private byte[] getHadoopCredentials(Map<String, Object> conf, final Configuration configuration, final String topologySubmitterUser) { try {/*from w w w .ja va 2 s .c o m*/ if (UserGroupInformation.isSecurityEnabled()) { login(configuration); final URI nameNodeURI = conf.containsKey(TOPOLOGY_HDFS_URI) ? new URI(conf.get(TOPOLOGY_HDFS_URI).toString()) : FileSystem.getDefaultUri(configuration); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); final UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(topologySubmitterUser, ugi); Credentials creds = (Credentials) proxyUser.doAs(new PrivilegedAction<Object>() { @Override public Object run() { try { FileSystem fileSystem = FileSystem.get(nameNodeURI, configuration); Credentials credential = proxyUser.getCredentials(); if (configuration.get(STORM_USER_NAME_KEY) == null) { configuration.set(STORM_USER_NAME_KEY, hdfsPrincipal); } fileSystem.addDelegationTokens(configuration.get(STORM_USER_NAME_KEY), credential); LOG.info("Delegation tokens acquired for user {}", topologySubmitterUser); return credential; } catch (IOException e) { throw new RuntimeException(e); } } }); ByteArrayOutputStream bao = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream(bao); creds.write(out); out.flush(); out.close(); return bao.toByteArray(); } else { throw new RuntimeException("Security is not enabled for HDFS"); } } catch (Exception ex) { throw new RuntimeException("Failed to get delegation tokens.", ex); } }
From source file:org.apache.sysml.api.DMLScript.java
License:Apache License
private static void checkSecuritySetup(DMLConfig config) throws IOException, DMLRuntimeException { //analyze local configuration String userName = System.getProperty("user.name"); HashSet<String> groupNames = new HashSet<>(); try {//from w w w. j a v a 2s . co m //check existence, for backwards compatibility to < hadoop 0.21 if (UserGroupInformation.class.getMethod("getCurrentUser") != null) { String[] groups = UserGroupInformation.getCurrentUser().getGroupNames(); Collections.addAll(groupNames, groups); } } catch (Exception ex) { } //analyze hadoop configuration JobConf job = ConfigurationManager.getCachedJobConf(); boolean localMode = InfrastructureAnalyzer.isLocalMode(job); String taskController = job.get(MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER, "org.apache.hadoop.mapred.DefaultTaskController"); String ttGroupName = job.get(MRConfigurationNames.MR_TASKTRACKER_GROUP, "null"); String perm = job.get(MRConfigurationNames.DFS_PERMISSIONS_ENABLED, "null"); //note: job.get("dfs.permissions.supergroup",null); URI fsURI = FileSystem.getDefaultUri(job); //determine security states boolean flagDiffUser = !(taskController.equals("org.apache.hadoop.mapred.LinuxTaskController") //runs map/reduce tasks as the current user || localMode // run in the same JVM anyway || groupNames.contains(ttGroupName)); //user in task tracker group boolean flagLocalFS = fsURI == null || fsURI.getScheme().equals("file"); boolean flagSecurity = perm.equals("yes"); LOG.debug("SystemML security check: " + "local.user.name = " + userName + ", " + "local.user.groups = " + ProgramConverter.serializeStringCollection(groupNames) + ", " + MRConfigurationNames.MR_JOBTRACKER_ADDRESS + " = " + job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS) + ", " + MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER + " = " + taskController + "," + MRConfigurationNames.MR_TASKTRACKER_GROUP + " = " + ttGroupName + ", " + MRConfigurationNames.FS_DEFAULTFS + " = " + ((fsURI != null) ? fsURI.getScheme() : "null") + ", " + MRConfigurationNames.DFS_PERMISSIONS_ENABLED + " = " + perm); //print warning if permission issues possible if (flagDiffUser && (flagLocalFS || flagSecurity)) { LOG.warn("Cannot run map/reduce tasks as user '" + userName + "'. Using tasktracker group '" + ttGroupName + "'."); } }
From source file:org.apache.tephra.persist.HDFSTransactionLogTest.java
License:Apache License
private void testCaskTransactionLogSync(int totalCount, int batchSize, byte versionNumber, boolean isComplete) throws Exception { List<co.cask.tephra.persist.TransactionEdit> edits = TransactionEditUtil.createRandomCaskEdits(totalCount); long timestamp = System.currentTimeMillis(); Configuration configuration = getConfiguration(); FileSystem fs = FileSystem.newInstance(FileSystem.getDefaultUri(configuration), configuration); SequenceFile.Writer writer = getSequenceFileWriter(configuration, fs, timestamp, versionNumber); AtomicLong logSequence = new AtomicLong(); HDFSTransactionLog transactionLog = getHDFSTransactionLog(configuration, fs, timestamp); AbstractTransactionLog.CaskEntry entry; for (int i = 0; i < totalCount - batchSize; i += batchSize) { if (versionNumber > 1) { writeNumWrites(writer, batchSize); }//from ww w . j a v a 2 s.c om for (int j = 0; j < batchSize; j++) { entry = new AbstractTransactionLog.CaskEntry(new LongWritable(logSequence.getAndIncrement()), edits.get(j)); writer.append(entry.getKey(), entry.getEdit()); } writer.syncFs(); } if (versionNumber > 1) { writeNumWrites(writer, batchSize); } for (int i = totalCount - batchSize; i < totalCount - 1; i++) { entry = new AbstractTransactionLog.CaskEntry(new LongWritable(logSequence.getAndIncrement()), edits.get(i)); writer.append(entry.getKey(), entry.getEdit()); } entry = new AbstractTransactionLog.CaskEntry(new LongWritable(logSequence.getAndIncrement()), edits.get(totalCount - 1)); if (isComplete) { writer.append(entry.getKey(), entry.getEdit()); } else { byte[] bytes = Longs.toByteArray(entry.getKey().get()); writer.appendRaw(bytes, 0, bytes.length, new SequenceFile.ValueBytes() { @Override public void writeUncompressedBytes(DataOutputStream outStream) throws IOException { byte[] test = new byte[] { 0x2 }; outStream.write(test, 0, 1); } @Override public void writeCompressedBytes(DataOutputStream outStream) throws IllegalArgumentException, IOException { // no-op } @Override public int getSize() { // mimic size longer than the actual byte array size written, so we would reach EOF return 12; } }); } writer.syncFs(); Closeables.closeQuietly(writer); // now let's try to read this log TransactionLogReader reader = transactionLog.getReader(); int syncedEdits = 0; while (reader.next() != null) { // testing reading the transaction edits syncedEdits++; } if (isComplete) { Assert.assertEquals(totalCount, syncedEdits); } else { Assert.assertEquals(totalCount - batchSize, syncedEdits); } }
From source file:org.apache.tephra.persist.HDFSTransactionLogTest.java
License:Apache License
private void testTransactionLogSync(int totalCount, int batchSize, byte versionNumber, boolean isComplete) throws Exception { List<TransactionEdit> edits = TransactionEditUtil.createRandomEdits(totalCount); long timestamp = System.currentTimeMillis(); Configuration configuration = getConfiguration(); configuration.set(TxConstants.TransactionLog.CFG_SLOW_APPEND_THRESHOLD, "0"); FileSystem fs = FileSystem.newInstance(FileSystem.getDefaultUri(configuration), configuration); SequenceFile.Writer writer = getSequenceFileWriter(configuration, fs, timestamp, versionNumber); AtomicLong logSequence = new AtomicLong(); HDFSTransactionLog transactionLog = getHDFSTransactionLog(configuration, fs, timestamp); AbstractTransactionLog.Entry entry;/*from w ww . java 2 s . c o m*/ for (int i = 0; i < totalCount - batchSize; i += batchSize) { writeNumWrites(writer, batchSize); for (int j = 0; j < batchSize; j++) { entry = new AbstractTransactionLog.Entry(new LongWritable(logSequence.getAndIncrement()), edits.get(j)); writer.append(entry.getKey(), entry.getEdit()); } writer.syncFs(); } writeNumWrites(writer, batchSize); for (int i = totalCount - batchSize; i < totalCount - 1; i++) { entry = new AbstractTransactionLog.Entry(new LongWritable(logSequence.getAndIncrement()), edits.get(i)); writer.append(entry.getKey(), entry.getEdit()); } entry = new AbstractTransactionLog.Entry(new LongWritable(logSequence.getAndIncrement()), edits.get(totalCount - 1)); if (isComplete) { writer.append(entry.getKey(), entry.getEdit()); } else { byte[] bytes = Longs.toByteArray(entry.getKey().get()); writer.appendRaw(bytes, 0, bytes.length, new SequenceFile.ValueBytes() { @Override public void writeUncompressedBytes(DataOutputStream outStream) throws IOException { byte[] test = new byte[] { 0x2 }; outStream.write(test, 0, 1); } @Override public void writeCompressedBytes(DataOutputStream outStream) throws IllegalArgumentException, IOException { // no-op } @Override public int getSize() { // mimic size longer than the actual byte array size written, so we would reach EOF return 12; } }); } writer.syncFs(); Closeables.closeQuietly(writer); // now let's try to read this log TransactionLogReader reader = transactionLog.getReader(); int syncedEdits = 0; while (reader.next() != null) { // testing reading the transaction edits syncedEdits++; } if (isComplete) { Assert.assertEquals(totalCount, syncedEdits); } else { Assert.assertEquals(totalCount - batchSize, syncedEdits); } }
From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java
License:Open Source License
private static FileSystem buildDistributionOutputStreamVector(boolean useSequenceFile, String fileNamePrefix, File localOutputPath, String remoteOutputPath, int myNodeIndex, int nodeCount, Vector<PRValueOutputStream> outputStreamVector) { Configuration conf = new Configuration(CrawlEnvironment.getHadoopConfig()); conf.setInt("dfs.socket.timeout", 240000); conf.setInt("io.file.buffer.size", 4096 * 20); DistributedFileSystem hdfs = new DistributedFileSystem(); try {/* ww w .j av a2 s. c o m*/ hdfs.initialize(FileSystem.getDefaultUri(conf), conf); for (int i = 0; i < nodeCount; ++i) { // create output filename String fileName = fileNamePrefix + "-" + NUMBER_FORMAT.format(i); // create stream (local or remote stream, depending on i) // remote path Path remotePath = new Path(remoteOutputPath, fileName); // remove file CrawlEnvironment.getDefaultFileSystem().delete(remotePath, false); if (useSequenceFile) { // recreate it ... outputStreamVector.add(new PRSequenceFileOutputStream(conf, CrawlEnvironment.getDefaultFileSystem(), remotePath)); } else { // recreate it ... outputStreamVector .add(new PROldValueOutputStream(CrawlEnvironment.getDefaultFileSystem(), remotePath)); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); for (PRValueOutputStream streamInfo : outputStreamVector) { try { if (streamInfo != null) { streamInfo.close(true); } } catch (IOException e2) { LOG.error(CCStringUtils.stringifyException(e2)); } outputStreamVector.clear(); } } return hdfs; }
From source file:org.dutir.lucene.io.HadoopUtility.java
License:Mozilla Public License
protected static Path findCacheFileByFragment(JobConf jc, String name) throws IOException { URI[] ps = DistributedCache.getCacheFiles(jc); URI defaultFS = FileSystem.getDefaultUri(jc); if (ps == null) return null; for (URI _p : ps) { final URI p = defaultFS.resolve(_p); if (p.getFragment().equals(name)) { logger.debug("Found matching path in DistributedCache in search for " + name + " : " + new Path(p.getScheme(), p.getAuthority(), p.getPath()).toString()); return new Path(p.getScheme(), p.getAuthority(), p.getPath()); }//from w w w. j ava2 s . c o m } return null; }