List of usage examples for java.util Random nextBoolean
public boolean nextBoolean()
From source file:org.apache.hadoop.hbase.HBaseTestingUtility.java
/** Creates a random table with the given parameters */ public HTable createRandomTable(String tableName, final Collection<String> families, final int maxVersions, final int numColsPerRow, final int numFlushes, final int numRegions, final int numRowsPerFlush) throws IOException, InterruptedException { LOG.info("\n\nCreating random table " + tableName + " with " + numRegions + " regions, " + numFlushes + " storefiles per region, " + numRowsPerFlush + " rows per flush, maxVersions=" + maxVersions + "\n"); final Random rand = new Random(tableName.hashCode() * 17L + 12938197137L); final int numCF = families.size(); final byte[][] cfBytes = new byte[numCF][]; {/* www . j av a 2 s . c o m*/ int cfIndex = 0; for (String cf : families) { cfBytes[cfIndex++] = Bytes.toBytes(cf); } } final int actualStartKey = 0; final int actualEndKey = Integer.MAX_VALUE; final int keysPerRegion = (actualEndKey - actualStartKey) / numRegions; final int splitStartKey = actualStartKey + keysPerRegion; final int splitEndKey = actualEndKey - keysPerRegion; final String keyFormat = "%08x"; final HTable table = createTable(tableName, cfBytes, maxVersions, Bytes.toBytes(String.format(keyFormat, splitStartKey)), Bytes.toBytes(String.format(keyFormat, splitEndKey)), numRegions); if (hbaseCluster != null) { getMiniHBaseCluster().flushcache(TableName.META_TABLE_NAME); } for (int iFlush = 0; iFlush < numFlushes; ++iFlush) { for (int iRow = 0; iRow < numRowsPerFlush; ++iRow) { final byte[] row = Bytes.toBytes( String.format(keyFormat, actualStartKey + rand.nextInt(actualEndKey - actualStartKey))); Put put = new Put(row); Delete del = new Delete(row); for (int iCol = 0; iCol < numColsPerRow; ++iCol) { final byte[] cf = cfBytes[rand.nextInt(numCF)]; final long ts = rand.nextInt(); final byte[] qual = Bytes.toBytes("col" + iCol); if (rand.nextBoolean()) { final byte[] value = Bytes .toBytes("value_for_row_" + iRow + "_cf_" + Bytes.toStringBinary(cf) + "_col_" + iCol + "_ts_" + ts + "_random_" + rand.nextLong()); put.add(cf, qual, ts, value); } else if (rand.nextDouble() < 0.8) { del.deleteColumn(cf, qual, ts); } else { del.deleteColumns(cf, qual, ts); } } if (!put.isEmpty()) { table.put(put); } if (!del.isEmpty()) { table.delete(del); } } LOG.info("Initiating flush #" + iFlush + " for table " + tableName); table.flushCommits(); if (hbaseCluster != null) { getMiniHBaseCluster().flushcache(table.getName()); } } return table; }
From source file:TestDataLoader.java
/** * Load randomly generated submissions./*from w w w . ja v a 2 s .c o m*/ */ public static void loadSubmissions(long seed, int howMany) throws IOException { // Cache a list of all embargo types. List<EmbargoType> embargos = settingRepo.findAllEmbargoTypes(); // Cache the person who will generate action logs for all items. Person reviewer = personRepo.findPersonByEmail("jdimaggio@gmail.com"); // Establish a constant random seed so each run through this code produces the same results. Random random = new Random(seed); long start = System.currentTimeMillis(); for (int i = 0; i < howMany; i++) { context.turnOffAuthorization(); Person student = personRepo.findPersonByNetId("student" + i); if (student == null) { String[] studentName = generateRandomName(random, ACTOR_NAMES); String studentEmail = generateRandomEmail(random, studentName); studentEmail = studentEmail.replaceFirst("@", (i + 1) + "@"); student = personRepo .createPerson("student" + i, studentEmail, studentName[0], studentName[1], RoleType.STUDENT) .save(); if (i > 0) { student.setMiddleName(studentName[2]); student.setPassword("password"); student.setCurrentPhoneNumber("555-555-5555"); student.setCurrentPostalAddress("2335 Barron Basen Dr\nSome Town, Texas 77845"); student.setPermanentPhoneNumber("555-999-9999"); student.setPermanentPostalAddress("2335 Dry Gulch Dr\nAnother Town, IL 78834"); student.setPermanentEmailAddress("permanent@gmail.com"); student.setInstitutionalIdentifier("" + random.nextInt(9) + random.nextInt(9) + random.nextInt(9) + random.nextInt(9) + random.nextInt(9) + random.nextInt(9) + random.nextInt(9) + random.nextInt(9) + random.nextInt(9)); } student.save(); } String[] studentName = new String[3]; studentName[0] = student.getFirstName(); studentName[1] = student.getLastName(); studentName[2] = student.getMiddleName(); context.restoreAuthorization(); context.login(student); Submission sub = subRepo.createSubmission(student).save(); if (i > 0) { sub.setStudentFirstName(studentName[0]); sub.setStudentLastName(studentName[1]); sub.setStudentMiddleName(studentName[2]); if (random.nextInt(100) > 30) sub.setStudentBirthYear(random.nextInt(20) + 1980); if (random.nextInt(100) > 5) sub.setEmbargoType(embargos.get(random.nextInt(embargos.size() - 1))); int members = random.nextInt(5); String[] firstMemberName = null; for (int m = 0; m < members; m++) { String[] memberName = generateRandomName(random, FAMOUS_NAMES); if (firstMemberName == null) firstMemberName = memberName; CommitteeMember member = sub.addCommitteeMember(memberName[0], memberName[1], memberName[2]) .save(); if (random.nextInt(100) > 75) { DegreeLevelArray role = ROLETYPES_DEFINITIONS[random .nextInt(ROLETYPES_DEFINITIONS.length - 1)]; member.addRole(role.name); if (random.nextInt(100) > 75) { role = ROLETYPES_DEFINITIONS[random.nextInt(ROLETYPES_DEFINITIONS.length - 1)]; try { // We could pick the same role twice. member.addRole(role.name); } catch (IllegalArgumentException iae) { /* ignore */} ; } } member.save(); } if (random.nextInt(100) > 5 && firstMemberName != null) sub.setCommitteeContactEmail(generateRandomEmail(random, firstMemberName)); if (random.nextInt(100) > 5) { DegreeLevelArray degree = DEGREES_DEFINITIONS[random.nextInt(DEGREES_DEFINITIONS.length - 1)]; sub.setDegree(degree.name); sub.setDegreeLevel(degree.degreeLevel); } if (random.nextInt(100) > 5) sub.setDocumentTitle(generateRandomTitle(random)); if (random.nextInt(100) > 5) sub.setDocumentAbstract(generateRandomAbstract(random)); if (random.nextInt(100) > 5) sub.setDocumentKeywords(generateRandomKeywords(random)); if (random.nextInt(100) > 5) { List<ProquestSubject> subjects = proquestRepo.findAllSubjects(); sub.addDocumentSubject(subjects.get(random.nextInt(subjects.size() - 1)).getDescription()); if (random.nextInt(100) > 5) { sub.addDocumentSubject(subjects.get(random.nextInt(subjects.size() - 1)).getDescription()); if (random.nextInt(100) > 5) { sub.addDocumentSubject( subjects.get(random.nextInt(subjects.size() - 1)).getDescription()); } } } if (random.nextInt(100) > 5) { sub.setDocumentLanguage(LANGUAGES_DEFINITIONS[random.nextInt(LANGUAGES_DEFINITIONS.length)]); } if (random.nextInt(100) > 5) sub.setDepartment(DEPARTMENTS_DEFINITIONS[random.nextInt(DEPARTMENTS_DEFINITIONS.length - 1)]); if (random.nextInt(100) > 5) sub.setProgram(PROGRAMS_DEFINITIONS[random.nextInt(PROGRAMS_DEFINITIONS.length - 1)]); if (random.nextInt(100) > 5) sub.setCollege(COLLEGES_DEFINITIONS[random.nextInt(COLLEGES_DEFINITIONS.length - 1)]); if (random.nextInt(100) > 5) sub.setMajor(MAJORS_DEFINITIONS[random.nextInt(MAJORS_DEFINITIONS.length - 1)]); if (random.nextInt(100) > 5) sub.setDocumentType(DOCTYPES_DEFINITIONS[random.nextInt(DOCTYPES_DEFINITIONS.length - 1)].name); if (random.nextInt(100) > 5) { sub.setGraduationYear(random.nextInt(10) + 2002); sub.setGraduationMonth( GRAD_MONTHS_DEFINITIONS[random.nextInt(GRAD_MONTHS_DEFINITIONS.length - 1)]); } if (random.nextInt(100) > 50) sub.setDefenseDate(generateRandomDate(random, 2, 2010)); if (random.nextInt(100) > 5) sub.setUMIRelease(random.nextBoolean()); if (random.nextInt(100) > 5) sub.setSubmissionDate(generateRandomDate(random, 2, 2010)); if (random.nextInt(100) > 70) sub.setApprovalDate(generateRandomDate(random, 2, 2010)); if (random.nextInt(100) > 50) sub.setCommitteeEmbargoApprovalDate(generateRandomDate(random, 2, 2010)); if (random.nextInt(100) > 50) sub.setCommitteeApprovalDate(generateRandomDate(random, 2, 2010)); if (random.nextInt(100) > 5) sub.setCommitteeEmailHash(generateCommitteEmailHash()); if (random.nextInt(100) > 5) sub.addAttachment(new File("test/SamplePrimaryDocument.pdf"), AttachmentType.PRIMARY); if (random.nextInt(100) > 20) { Date agreementDate = generateRandomDate(random, 2, 2010); String stampedLicense = stampLicense(settingRepo.getConfigValue(SUBMIT_LICENSE_TEXT), agreementDate); sub.addAttachment(stampedLicense.getBytes(), "LICENSE.txt", AttachmentType.LICENSE); sub.setLicenseAgreementDate(agreementDate); } if (random.nextInt(100) > 35) { Date agreementDate = generateRandomDate(random, 2, 2010); String stampedLicense = stampLicense(settingRepo.getConfigValue(PROQUEST_LICENSE_TEXT), agreementDate); sub.addAttachment(stampedLicense.getBytes(), "PROQUEST_LICENSE.txt", AttachmentType.LICENSE); } if (random.nextInt(100) > 75) sub.addAttachment(new File("test/SampleSupplementalDocument.doc"), AttachmentType.SUPPLEMENTAL); if (random.nextInt(100) > 75) sub.addAttachment(new File("test/SampleSupplementalDocument.xls"), AttachmentType.SUPPLEMENTAL); if (random.nextInt(100) > 50) sub.addAttachment(new File("test/SampleFeedbackDocument.png"), AttachmentType.FEEDBACK); context.turnOffAuthorization(); if (random.nextInt(100) > 50) sub.setReviewerNotes(generateRandomTitle(random)); context.restoreAuthorization(); } sub.save(); context.logout(); if (i > 0) { // Generate modifications to the context.login(reviewer); int actionLogs = random.nextInt(30) + 10; for (int l = 0; l < actionLogs; l++) { if (random.nextInt(100) > 30) { // Create randomly generated action. sub.logAction("Randomly generated action"); } else { State state = sub.getState(); List<State> transitions = state.getTransitions(sub); if (transitions.size() == 0) transitions = stateManager.getAllStates(); if (transitions.size() == 1) { sub.setState(transitions.get(0)); } else { sub.setState(transitions.get(random.nextInt(transitions.size() - 1))); } } sub.save(); } context.logout(); } if (i > 0 && i % 100 == 0) { // Do a database commit every 100 transactions. JPA.em().getTransaction().commit(); JPA.em().clear(); JPA.em().getTransaction().begin(); // Reload persistant objects embargos = settingRepo.findAllEmbargoTypes(); reviewer = personRepo.findPersonByEmail("jdimaggio@gmail.com"); Logger.debug("Random submission generator: " + i + " submissions at " + ((System.currentTimeMillis() - start) / i) + " milleseconds per submission (in progress)"); } } Logger.debug("Random submission generator: " + howMany + " submissions at " + ((System.currentTimeMillis() - start) / howMany) + " milleseconds per submission (finished)"); JPA.em().getTransaction().commit(); JPA.em().clear(); JPA.em().getTransaction().begin(); }
From source file:com.gtwm.pb.model.manageData.DataManagement.java
public void anonymiseData(TableInfo table, HttpServletRequest request, SessionDataInfo sessionData, Map<BaseField, FieldContentType> fieldContentTypes, List<FileItem> multipartItems) throws SQLException, CodingErrorException, CantDoThatException, InputRecordException, ObjectNotFoundException, DisallowedException, MissingParametersException { if (!request.getServerName().contains("backup")) { throw new CantDoThatException("For safety, anonymisation can only run on a test/backup server"); }/* w w w .j ava2 s . com*/ Random randomGenerator = new Random(); String[] alphabet = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" }; // Get data we're going to anonymise List<DataRowInfo> dataRows = this.getReportDataRows(null, table.getDefaultReport(), new HashMap<BaseField, String>(), false, new HashMap<BaseField, Boolean>(0), -1, QuickFilterType.AND, false); // Build up list of names List<String> forenames = new LinkedList<String>(); List<String> surnames = new LinkedList<String>(); List<String> emailAddresses = new LinkedList<String>(); List<String> companyNameParts = new LinkedList<String>(); List<String> phoneNumbers = new LinkedList<String>(); List<String> niNumbers = new LinkedList<String>(); List<String> capitalisedWords = new LinkedList<String>(); List<String> emailParts = new LinkedList<String>(); Set<String> emailSuffixes = new LinkedHashSet<String>(); emailSuffixes.add(".co.uk"); emailSuffixes.add("com"); emailSuffixes.add(".org.uk"); emailSuffixes.add(".org"); emailSuffixes.add(".net"); emailSuffixes.add(".ac.uk"); Pattern numeralPattern = Pattern.compile("[123456789]"); // no zero Pattern capitalWordsPattern = Pattern.compile("[A-Z][a-z0-9]+"); int randomMultiplier = randomGenerator.nextInt(10) + 5; for (DataRowInfo dataRow : dataRows) { for (BaseField field : fieldContentTypes.keySet()) { FieldContentType contentType = fieldContentTypes.get(field); String keyValue = dataRow.getDataRowFields().get(field).getKeyValue(); if (contentType.equals(FieldContentType.COMPANY_NAME)) { for (String part : keyValue.split("\\s")) { String cleanedPart = part.replace("(", "").replace(")", ""); companyNameParts.add(cleanedPart); capitalisedWords.add(cleanedPart); } } else if (contentType.equals(FieldContentType.FULL_NAME)) { String fullName = keyValue; String surname = fullName.replaceAll("^.*\\s", ""); String forename = fullName.substring(0, fullName.length() - surname.length()); forenames.add(forename); surnames.add(surname); capitalisedWords.add(forename); capitalisedWords.add(surname); } else if (contentType.equals(FieldContentType.PHONE_NUMBER)) { if (keyValue.length() > 0) { StringBuffer phoneNumber = new StringBuffer("01632 "); for (int i = 0; i < 6; i++) { phoneNumber.append(randomGenerator.nextInt(10)); } phoneNumbers.add(phoneNumber.toString()); } } else if (contentType.equals(FieldContentType.NI_NUMBER)) { if (keyValue.length() > 0) { StringBuffer niNumber = new StringBuffer(); niNumber.append(alphabet[randomGenerator.nextInt(26)]); niNumber.append(alphabet[randomGenerator.nextInt(26)]); niNumber.append(randomGenerator.nextInt(10)); niNumber.append(randomGenerator.nextInt(10)); niNumber.append(randomGenerator.nextInt(10)); niNumber.append(randomGenerator.nextInt(10)); niNumber.append(randomGenerator.nextInt(10)); niNumber.append(randomGenerator.nextInt(10)); niNumber.append(alphabet[randomGenerator.nextInt(26)]); niNumbers.add(niNumber.toString()); } else { niNumbers.add(""); } } else if (contentType.equals(FieldContentType.EMAIL_ADDRESS)) { if (keyValue.contains("@")) { String emailSansSuffix = keyValue.trim().toLowerCase(); for (String emailSuffix : emailSuffixes) { emailSansSuffix = emailSansSuffix.replaceAll(Pattern.quote(emailSuffix) + "$", ""); } String[] emailComponents = emailSansSuffix.replace("@", ".").split("\\."); for (String emailComponent : emailComponents) { emailParts.add(emailComponent); } } } else if (contentType.equals(FieldContentType.NOTES)) { // extract capitalised words Matcher matcher = capitalWordsPattern.matcher(keyValue); while (matcher.find()) { capitalisedWords.add(matcher.group()); } } } } // Anonymise Set<String> generatedCompanyNames = new HashSet<String>(1000); for (DataRowInfo dataRow : dataRows) { int rowId = dataRow.getRowId(); LinkedHashMap<BaseField, BaseValue> dataToSave = new LinkedHashMap<BaseField, BaseValue>(); for (BaseField field : fieldContentTypes.keySet()) { FieldContentType contentType = fieldContentTypes.get(field); if (contentType.equals(FieldContentType.COMPANY_NAME)) { int partIndex = randomGenerator.nextInt(companyNameParts.size()); String companyName = ""; int pass = 0; while (companyName.equals("") && (pass < 100)) { pass++; String companyNamePart = companyNameParts.get(partIndex); if (!companyNamePart.trim().toLowerCase().equals("ltd")) { companyName += companyNamePart + " "; } if (randomGenerator.nextBoolean()) { partIndex = randomGenerator.nextInt(companyNameParts.size()); companyNamePart = companyNameParts.get(partIndex); companyName += companyNamePart + " "; if (!companyNamePart.trim().toLowerCase().equals("ltd")) { if (randomGenerator.nextBoolean()) { partIndex = randomGenerator.nextInt(companyNameParts.size()); companyNamePart = companyNameParts.get(partIndex); companyName += companyNamePart + " "; } } } companyName = companyName.trim(); // Company name may be unsuitable for a whole host // of reasons if (companyName.toLowerCase().equals("ltd") || companyName.toLowerCase().endsWith(" the") || companyName.toLowerCase().endsWith(" for") || companyName.toLowerCase().endsWith(" &") || companyName.toLowerCase().endsWith(" of") || companyName.matches("^\\W.*") || generatedCompanyNames.contains(companyName)) { companyName = ""; } } generatedCompanyNames.add(companyName); TextValue companyNameValue = new TextValueDefn(companyName); dataToSave.put(field, companyNameValue); } else if (contentType.equals(FieldContentType.FULL_NAME)) { int forenameIndex = randomGenerator.nextInt(forenames.size()); String randomForename = forenames.get(forenameIndex); int surnameIndex = randomGenerator.nextInt(surnames.size()); String randomSurname = surnames.get(surnameIndex); TextValue fullNameValue = new TextValueDefn(randomForename + randomSurname); dataToSave.put(field, fullNameValue); } else if (contentType.equals(FieldContentType.PHONE_NUMBER)) { String currentKey = dataRow.getDataRowFields().get(field).getKeyValue(); if (!currentKey.equals("")) { int phoneIndex = randomGenerator.nextInt(phoneNumbers.size()); TextValue phoneNumber = new TextValueDefn(phoneNumbers.get(phoneIndex)); dataToSave.put(field, phoneNumber); } } else if (contentType.equals(FieldContentType.NI_NUMBER)) { int niIndex = randomGenerator.nextInt(niNumbers.size()); TextValue niNumber = new TextValueDefn(niNumbers.get(niIndex)); dataToSave.put(field, niNumber); } else if (contentType.equals(FieldContentType.EMAIL_ADDRESS)) { int dataRowIndex = randomGenerator.nextInt(dataRows.size()); String currentKey = dataRow.getDataRowFields().get(field).getKeyValue(); if (currentKey != null) { String emailAddress = ""; if (currentKey.contains("@")) { int partIndex = randomGenerator.nextInt(emailParts.size()); emailAddress = emailParts.get(partIndex); if (randomGenerator.nextBoolean()) { emailAddress += "." + emailParts.get(randomGenerator.nextInt(emailParts.size())); } emailAddress += "@"; emailAddress += emailParts.get(randomGenerator.nextInt(emailParts.size())); if (randomGenerator.nextBoolean()) { emailAddress += ".com"; } else if (randomGenerator.nextBoolean()) { emailAddress += ".co.uk"; } else if (randomGenerator.nextBoolean()) { emailAddress += ".org.uk"; } else if (randomGenerator.nextBoolean()) { emailAddress += ".ac.uk"; } else if (randomGenerator.nextBoolean()) { emailAddress += ".net"; } else { emailAddress += ".org"; } } TextValue emailValue = new TextValueDefn(emailAddress); dataToSave.put(field, emailValue); } } else if (contentType.equals(FieldContentType.CODE)) { int dataRowIndex = randomGenerator.nextInt(dataRows.size()); String currentKey = dataRow.getDataRowFields().get(field).getKeyValue(); if (currentKey != null) { int length = currentKey.length(); StringBuilder code = new StringBuilder(""); for (int i = 0; i < length; i++) { if (i < 3 || (i % 10 == 0)) { code.append(alphabet[randomGenerator.nextInt(26)]); } else { code.append(randomGenerator.nextInt(10)); } } TextValue codeValue = new TextValueDefn(code.toString()); dataToSave.put(field, codeValue); } } else if (contentType.equals(FieldContentType.NOTES)) { // Replace all capitalised words with another capitalised // word int dataRowIndex = randomGenerator.nextInt(dataRows.size()); String currentKey = dataRow.getDataRowFields().get(field).getKeyValue(); String newKey = anonymiseNote(capitalisedWords, currentKey); TextValue textValue = new TextValueDefn(newKey); dataToSave.put(field, textValue); } else if (contentType.equals(FieldContentType.OTHER)) { int dataRowIndex = randomGenerator.nextInt(dataRows.size()); DataRowInfo randomDataRow = dataRows.get(dataRowIndex); String randomKey = randomDataRow.getValue(field).getKeyValue(); if (field instanceof TextField) { // Anonymise numbers within the text Matcher matcher = numeralPattern.matcher(randomKey); char[] keyChars = randomKey.toCharArray(); while (matcher.matches()) { int position = matcher.start(); keyChars[position] = alphabet[randomGenerator.nextInt(26)].charAt(0); } TextValue textValue = new TextValueDefn(String.valueOf(keyChars)); dataToSave.put(field, textValue); } else if (field instanceof IntegerField) { String valueString = randomDataRow.getValue(field).getKeyValue(); if (valueString != null) { if (!valueString.equals("")) { valueString = valueString.replace(",", ""); int integer = Integer.valueOf(valueString); integer = integer * randomMultiplier; IntegerValue intValue = new IntegerValueDefn(integer); dataToSave.put(field, intValue); } } } else if (field instanceof DecimalField) { String valueString = randomDataRow.getValue(field).getKeyValue(); if (valueString != null) { if (!valueString.equals("")) { valueString = valueString.replace(",", ""); double decimal = Double.valueOf(valueString); decimal = decimal * randomMultiplier; DecimalValue decimalValue = new DecimalValueDefn(decimal); dataToSave.put(field, decimalValue); } } } else if (field instanceof RelationField) { IntegerValue intValue = new IntegerValueDefn(Integer.valueOf(randomKey)); dataToSave.put(field, intValue); } } else { throw new CodingErrorException("Unhandled anonymisation content type " + contentType); } } this.saveRecord(request, table, dataToSave, false, rowId, sessionData, multipartItems); } this.anonymizeComments(table, request, capitalisedWords); }