Java tutorial
package edu.stanford.nlp.sequences; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.optimization.StochasticCalculateMethods; import edu.stanford.nlp.process.WordShapeClassifier; import edu.stanford.nlp.util.ReflectionLoading; import edu.stanford.nlp.util.logging.Redwood; import java.io.Serializable; import java.lang.reflect.Field; import java.util.*; import java.util.function.Function; /** * Flags for sequence classifiers. Documentation for general flags and * flags for NER can be found in the Javadoc of * {@link edu.stanford.nlp.ie.NERFeatureFactory}. Documentation for the flags * for Chinese word segmentation can be found in the Javadoc of * {@link edu.stanford.nlp.wordseg.ChineseSegmenterFeatureFactory}. * * <i>IMPORTANT NOTE IF CHANGING THIS FILE:</i> <b>MAKE SURE</b> TO * ONLY ADD NEW VARIABLES AT THE END OF THE LIST OF VARIABLES (and not * to change existing variables)! Otherwise you usually break all * currently serialized classifiers!!! Search for "ADD VARIABLES ABOVE * HERE" below. * * Some general flags are described here * <table border="1"> * <caption>Flags for sequence classifiers</caption> * <tr> * <td><b>Property Name</b></td> * <td><b>Type</b></td> * <td><b>Default Value</b></td> * <td><b>Description</b></td> * </tr> * <tr> * <td>useQN</td> * <td>boolean</td> * <td>true</td> * <td>Use Quasi-Newton (L-BFGS) optimization to find minimum. NOTE: Need to set this to * false if using other minimizers such as SGD.</td> * </tr> * <tr> * <td>QNsize</td> * <td>int</td> * <td>25</td> * <td>Number of previous iterations of Quasi-Newton to store (this increases * memory use, but speeds convergence by letting the Quasi-Newton optimization * more effectively approximate the second derivative).</td> * </tr> * <tr> * <td>QNsize2</td> * <td>int</td> * <td>25</td> * <td>Number of previous iterations of Quasi-Newton to store (used when pruning * features, after the first iteration - the first iteration is with QNSize).</td> * </tr> * <tr> * <td>useInPlaceSGD</td> * <td>boolean</td> * <td>false</td> * <td>Use SGD (tweaking weights in place) to find minimum (more efficient than * the old SGD, faster to converge than Quasi-Newton if there are very large of * samples). Implemented for CRFClassifier. NOTE: Remember to set useQN to false * </td> * </tr> * <tr> * <td>tuneSampleSize</td> * <td>int</td> * <td>-1</td> * <td>If this number is greater than 0, specifies the number of samples to use * for tuning (default is 1000).</td> * </tr> * <tr> * <td>SGDPasses</td> * <td>int</td> * <td>-1</td> * <td>If this number is greater than 0, specifies the number of SGD passes over * entire training set) to do before giving up (default is 50). Can be smaller * if sample size is very large.</td> * </tr> * <tr> * <td>useSGD</td> * <td>boolean</td> * <td>false</td> * <td>Use SGD to find minimum (can be slow). NOTE: Remember to set useQN to * false</td> * </tr> * <tr> * <td>useSGDtoQN</td> * <td>boolean</td> * <td>false</td> * <td>Use SGD (SGD version selected by useInPlaceSGD or useSGD) for a certain * number of passes (SGDPasses) and then switches to QN. Gives the quick initial * convergence of SGD, with the desired convergence criterion of QN (there is * some ramp up time for QN). NOTE: Remember to set useQN to false</td> * </tr> * <tr> * <td>evaluateIters</td> * <td>int</td> * <td>0</td> * <td>If this number is greater than 0, evaluates on the test set every so * often while minimizing. Implemented for CRFClassifier.</td> * </tr> * <tr> * <td>evalCmd</td> * <td>String</td> * <td></td> * <td>If specified (and evaluateIters is set), runs the specified cmdline * command during evaluation (instead of default CONLL-like NER evaluation)</td> * </tr> * <tr> * <td>evaluateTrain</td> * <td>boolean</td> * <td>false</td> * <td>If specified (and evaluateIters is set), also evaluate on training set * (can be expensive)</td> * </tr> * <tr> * <td>tokenizerOptions</td><td>String</td> * <td>(null)</td> * <td>Extra options to supply to the tokenizer when creating it.</td> * </tr> * <tr> * <td>tokenizerFactory</td><td>String</td> * <td>(null)</td> * <td>A different tokenizer factory to use if the ReaderAndWriter in question uses tokenizers.</td> * </tr> * </table> * * @author Jenny Finkel */ public class SeqClassifierFlags implements Serializable { /** A logger for this class */ private static final Redwood.RedwoodChannels log = Redwood.channels(SeqClassifierFlags.class); private static final long serialVersionUID = -7076671761070232567L; public static final String DEFAULT_BACKGROUND_SYMBOL = "O"; private String stringRep = ""; public boolean useNGrams = false; public boolean conjoinShapeNGrams = false; public boolean lowercaseNGrams = false; public boolean dehyphenateNGrams = false; public boolean usePrev = false; public boolean useNext = false; public boolean useTags = false; public boolean useWordPairs = false; public boolean useGazettes = false; public boolean useSequences = true; public boolean usePrevSequences = false; public boolean useNextSequences = false; public boolean useLongSequences = false; public boolean useBoundarySequences = false; public boolean useTaggySequences = false; public boolean useExtraTaggySequences = false; public boolean dontExtendTaggy = false; public boolean useTaggySequencesShapeInteraction = false; public boolean strictlyZeroethOrder = false; public boolean strictlyFirstOrder = false; public boolean strictlySecondOrder = false; public boolean strictlyThirdOrder = false; public String entitySubclassification = "IO"; public boolean retainEntitySubclassification = false; public boolean useGazettePhrases = false; public boolean makeConsistent = false; public boolean useViterbi = true; public int[] binnedLengths = null; public boolean verboseMode = false; public boolean useSum = false; public double tolerance = 1e-4; // Turned on if non-null. Becomes part of the filename features are printed to. // The meaning of this option varies between classifiers (see exportFeatures for another option): // - CMMClassifier: print the features of each datum // - CRFClassifier: just dump the list of feature names for the whole dataset public String printFeatures = null; public boolean useSymTags = false; /** * useSymWordPairs Has a small negative effect. */ public boolean useSymWordPairs = false; public String printClassifier = "WeightHistogram"; public int printClassifierParam = 100; public boolean intern = false; public boolean intern2 = false; public boolean selfTest = false; public boolean sloppyGazette = false; public boolean cleanGazette = false; public boolean noMidNGrams = false; public int maxNGramLeng = -1; public boolean useReverse = false; public boolean greekifyNGrams = false; public boolean useParenMatching = false; public boolean useLemmas = false; public boolean usePrevNextLemmas = false; public boolean normalizeTerms = false; public boolean normalizeTimex = false; public boolean useNB = false; public boolean useQN = true; public boolean useFloat = false; public int QNsize = 25; public int QNsize2 = 25; public int maxIterations = -1; public int wordShape = WordShapeClassifier.NOWORDSHAPE; /** Set useShapeStrings to be true to say that the model should use word shape features and they are provided in * the tokens, but should not be calculated via a word shape function. This flag must be false if the word shape * features will be calculated; word shape features are also added if there is a defined word shape function. */ public boolean useShapeStrings = false; public boolean useTypeSeqs = false; public boolean useTypeSeqs2 = false; public boolean useTypeSeqs3 = false; public boolean useDisjunctive = false; public int disjunctionWidth = 4; public boolean useDisjunctiveShapeInteraction = false; public boolean useDisjShape = false; public boolean useWord = true; // ON by default public boolean useClassFeature = false; public boolean useShapeConjunctions = false; public boolean useWordTag = false; public boolean useNPHead = false; public boolean useNPGovernor = false; public boolean useHeadGov = false; public boolean useLastRealWord = false; public boolean useNextRealWord = false; public boolean useOccurrencePatterns = false; public boolean useTypeySequences = false; public boolean justify = false; public boolean normalize = false; public String priorType = "QUADRATIC"; public double sigma = 1.0; public double epsilon = 0.01; public int beamSize = 30; public int maxLeft = 2; public int maxRight = 0; public boolean usePosition = false; public boolean useBeginSent = false; public boolean useGazFeatures = false; public boolean useMoreGazFeatures = false; public boolean useAbbr = false; public boolean useMinimalAbbr = false; public boolean useAbbr1 = false; public boolean useMinimalAbbr1 = false; public boolean useMoreAbbr = false; public boolean deleteBlankLines = false; public boolean useGENIA = false; public boolean useTOK = false; public boolean useABSTR = false; public boolean useABSTRFreqDict = false; public boolean useABSTRFreq = false; public boolean useFREQ = false; public boolean useABGENE = false; public boolean useWEB = false; public boolean useWEBFreqDict = false; public boolean useIsURL = false; public boolean useURLSequences = false; public boolean useIsDateRange = false; public boolean useEntityTypes = false; public boolean useEntityTypeSequences = false; public boolean useEntityRule = false; public boolean useOrdinal = false; public boolean useACR = false; public boolean useANTE = false; public boolean useMoreTags = false; public boolean useChunks = false; public boolean useChunkySequences = false; public boolean usePrevVB = false; public boolean useNextVB = false; public boolean useVB = false; public boolean subCWGaz = false; // TODO OBSOLETE: delete when breaking serialization sometime. public String documentReader = "ColumnDocumentReader"; // public String trainMap = "word=0,tag=1,answer=2"; // public String testMap = "word=0,tag=1,answer=2"; public String map = "word=0,tag=1,answer=2"; public boolean useWideDisjunctive = false; public int wideDisjunctionWidth = 10; // chinese word-segmenter features public boolean useRadical = false; public boolean useBigramInTwoClique = false; public String morphFeatureFile = null; public boolean useReverseAffix = false; public int charHalfWindow = 3; public boolean useWord1 = false; public boolean useWord2 = false; public boolean useWord3 = false; public boolean useWord4 = false; public boolean useRad1 = false; public boolean useRad2 = false; public boolean useWordn = false; public boolean useCTBPre1 = false; public boolean useCTBSuf1 = false; public boolean useASBCPre1 = false; public boolean useASBCSuf1 = false; public boolean usePKPre1 = false; public boolean usePKSuf1 = false; public boolean useHKPre1 = false; public boolean useHKSuf1 = false; public boolean useCTBChar2 = false; public boolean useASBCChar2 = false; public boolean useHKChar2 = false; public boolean usePKChar2 = false; public boolean useRule2 = false; public boolean useDict2 = false; public boolean useOutDict2 = false; public String outDict2 = "/u/htseng/scr/chunking/segmentation/out.lexicon"; public boolean useDictleng = false; public boolean useDictCTB2 = false; public boolean useDictASBC2 = false; public boolean useDictPK2 = false; public boolean useDictHK2 = false; public boolean useBig5 = false; public boolean useNegDict2 = false; public boolean useNegDict3 = false; public boolean useNegDict4 = false; public boolean useNegCTBDict2 = false; public boolean useNegCTBDict3 = false; public boolean useNegCTBDict4 = false; public boolean useNegASBCDict2 = false; public boolean useNegASBCDict3 = false; public boolean useNegASBCDict4 = false; public boolean useNegHKDict2 = false; public boolean useNegHKDict3 = false; public boolean useNegHKDict4 = false; public boolean useNegPKDict2 = false; public boolean useNegPKDict3 = false; public boolean useNegPKDict4 = false; public boolean usePre = false; public boolean useSuf = false; public boolean useRule = false; public boolean useHk = false; public boolean useMsr = false; public boolean useMSRChar2 = false; public boolean usePk = false; public boolean useAs = false; public boolean useFilter = false; // TODO this flag is used for nothing; // delete when breaking serialization public boolean largeChSegFile = false; // TODO this flag is used for nothing; // delete when breaking serialization public boolean useRad2b = false; /** * Keep the whitespace between English words in testFile when printing out * answers. Doesn't really change the content of the CoreLabels. (For Chinese * segmentation.) */ public boolean keepEnglishWhitespaces = false; /** * Keep all the whitespace words in testFile when printing out answers. * Doesn't really change the content of the CoreLabels. (For Chinese * segmentation.) */ public boolean keepAllWhitespaces = false; public boolean sighanPostProcessing = false; /** * use POS information (an "open" feature for Chinese segmentation) */ public boolean useChPos = false; // CTBSegDocumentReader normalization table // A value of null means that a default algorithmic normalization // is done in which ASCII characters get mapped to their fullwidth // equivalents in the Unihan range public String normalizationTable; // = null; public String dictionary; // = null; public String serializedDictionary; // = null; public String dictionary2; // = null; public String normTableEncoding = "GB18030"; /** * for Sighan bakeoff 2005, the path to the dictionary of bigrams appeared in * corpus */ public String sighanCorporaDict = "/u/nlp/data/chinese-segmenter/"; // end Sighan 20005 chinese word-segmenter features/properties public boolean useWordShapeGaz = false; public String wordShapeGaz = null; // TODO: This should be removed in favor of suppressing splitting when // maxDocSize <= 0, when next breaking serialization // this now controls nothing public boolean splitDocuments = true; public boolean printXML; // This is disused and can be removed when breaking serialization public boolean useSeenFeaturesOnly = false; public String lastNameList = "/u/nlp/data/dist.all.last"; public String maleNameList = "/u/nlp/data/dist.male.first"; public String femaleNameList = "/u/nlp/data/dist.female.first"; // don't want these serialized public transient String trainFile = null; /** NER adaptation (Gaussian prior) parameters. */ public transient String adaptFile = null; public transient String devFile = null; public transient String testFile = null; public transient String textFile = null; public transient String textFiles = null; public transient boolean readStdin = false; public transient String outputFile = null; public transient String loadClassifier = null; public transient String loadTextClassifier = null; public transient String loadJarClassifier = null; public transient String loadAuxClassifier = null; public transient String serializeTo = null; public transient String serializeToText = null; public transient int interimOutputFreq = 0; public transient String initialWeights = null; public transient List<String> gazettes = new ArrayList<>(); public transient String selfTrainFile = null; public String inputEncoding = "UTF-8"; // used for CTBSegDocumentReader as well public boolean bioSubmitOutput = false; public int numRuns = 1; public String answerFile = null; public String altAnswerFile = null; public String dropGaz; public String printGazFeatures = null; public int numStartLayers = 1; public boolean dump = false; // whether to merge B- and I- tags in an input file and to tag with IO tags // (lacking a prefix). E.g., "I-PERS" goes to "PERS" public boolean mergeTags; public boolean splitOnHead; // threshold public int featureCountThreshold = 0; public double featureWeightThreshold = 0.0; // feature factory public String featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; public Object[] featureFactoryArgs = new Object[0]; public String backgroundSymbol = DEFAULT_BACKGROUND_SYMBOL; // use public boolean useObservedSequencesOnly = false; public int maxDocSize = 0; public boolean printProbs = false; public boolean printFirstOrderProbs = false; public boolean saveFeatureIndexToDisk = false; public boolean removeBackgroundSingletonFeatures = false; public boolean doGibbs = false; public int numSamples = 100; public boolean useNERPrior = false; // todo [cdm 2014]: Disused, to be deleted, use priorModelFactory public boolean useAcqPrior = false; // todo [cdm 2014]: Disused, to be deleted, use priorModelFactory public boolean useUniformPrior = false; // todo [cdm 2014]: Disused, to be deleted, use priorModelFactory public boolean useMUCFeatures = false; public double annealingRate = 0.0; public String annealingType = null; public String loadProcessedData = null; public boolean initViterbi = true; public boolean useUnknown = false; public boolean checkNameList = false; public boolean useSemPrior = false; // todo [cdm 2014]: Disused, to be deleted, use priorModelFactory public boolean useFirstWord = false; public boolean useNumberFeature = false; public int ocrFold = 0; public transient boolean ocrTrain = false; // CDM 2017: Disused. Can delete.... public String classifierType = "MaxEnt"; public String svmModelFile = null; public String inferenceType = "Viterbi"; public boolean useLemmaAsWord = false; public String type = "cmm"; public String readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter"; public List<String> comboProps = new ArrayList<>(); public boolean usePrediction = false; public boolean useAltGazFeatures = false; public String gazFilesFile = null; public boolean usePrediction2 = false; public String baseTrainDir = "."; public String baseTestDir = "."; /** A regex pattern for files, which will be evaluated within a particular directory. * If non-null, used over trainFileList and trainFile. */ public String trainFiles = null; public String trainFileList = null; public String testFiles = null; public String trainDirs = null; // cdm 2009: this is currently unsupported, // but one user wanted something like this.... public String testDirs = null; public boolean useOnlySeenWeights = false; public String predProp = null; public CoreLabel pad = new CoreLabel(); public boolean useObservedFeaturesOnly = false; public String distSimLexicon = null; public boolean useDistSim = false; public int removeTopN = 0; public int numTimesRemoveTopN = 1; public double randomizedRatio = 1.0; public double removeTopNPercent = 0.0; public int purgeFeatures = -1; public boolean booleanFeatures = false; // This flag is only used for the sequences Type 2 CRF, not for ie.crf.CRFClassifier public boolean iobWrapper = false; public boolean iobTags = false; /** Binary segmentation feature for character-based Chinese NER. */ public boolean useSegmentation = false; public boolean memoryThrift = false; public boolean timitDatum = false; public String serializeDatasetsDir = null; public String loadDatasetsDir = null; public String pushDir = null; public boolean purgeDatasets = false; public boolean keepOBInMemory = true; public boolean fakeDataset = false; public boolean restrictTransitionsTimit = false; public int numDatasetsPerFile = 1; public boolean useTitle = false; // these are for the old stuff public boolean lowerNewgeneThreshold = false; public boolean useEitherSideWord = false; public boolean useEitherSideDisjunctive = false; public boolean twoStage = false; public String crfType = "MaxEnt"; public int featureThreshold = 1; public String featThreshFile = null; public double featureDiffThresh = 0.0; public int numTimesPruneFeatures = 0; public double newgeneThreshold = 0.0; public boolean doAdaptation = false; public boolean useInternal = true; public boolean useExternal = true; public double selfTrainConfidenceThreshold = 0.9; public int selfTrainIterations = 1; public int selfTrainWindowSize = 1; // Unigram public boolean useHuber = false; public boolean useQuartic = false; public double adaptSigma = 1.0; public int numFolds = 1; public int startFold = 1; public int endFold = 1; public boolean cacheNGrams = false; public String outputFormat; public boolean useSMD = false; public boolean useSGDtoQN = false; public boolean useStochasticQN = false; public boolean useScaledSGD = false; public int scaledSGDMethod = 0; public int SGDPasses = -1; public int QNPasses = -1; public boolean tuneSGD = false; public StochasticCalculateMethods stochasticMethod = StochasticCalculateMethods.NoneSpecified; public double initialGain = 0.1; public int stochasticBatchSize = 15; public boolean useSGD = false; public double gainSGD = 0.1; public boolean useHybrid = false; public int hybridCutoffIteration = 0; public boolean outputIterationsToFile = false; public boolean testObjFunction = false; public boolean testVariance = false; public int SGD2QNhessSamples = 50; public boolean testHessSamples = false; public int CRForder = 1; // TODO remove this when breaking serialization; this is unused; really maxLeft/maxRight control order public int CRFwindow = 2; // TODO remove this when breaking serialization; this is unused; really maxLeft/maxRight control clique size public boolean estimateInitial = false; public transient String biasedTrainFile = null; public transient String confusionMatrix = null; public String outputEncoding = null; public boolean useKBest = false; public String searchGraphPrefix = null; public double searchGraphPrune = Double.POSITIVE_INFINITY; public int kBest = 1; // more chinese segmenter features for GALE 2007 public boolean useFeaturesC4gram; public boolean useFeaturesC5gram; public boolean useFeaturesC6gram; public boolean useFeaturesCpC4gram; public boolean useFeaturesCpC5gram; public boolean useFeaturesCpC6gram; public boolean useUnicodeType; public boolean useUnicodeType4gram; public boolean useUnicodeType5gram; public boolean use4Clique; public boolean useUnicodeBlock; public boolean useShapeStrings1; public boolean useShapeStrings3; public boolean useShapeStrings4; public boolean useShapeStrings5; public boolean useGoodForNamesCpC; public boolean useDictionaryConjunctions; public boolean expandMidDot; // Only print the features for the first this many tokens encountered public int printFeaturesUpto = Integer.MAX_VALUE; public boolean useDictionaryConjunctions3; public boolean useWordUTypeConjunctions2; public boolean useWordUTypeConjunctions3; public boolean useWordShapeConjunctions2; public boolean useWordShapeConjunctions3; public boolean useMidDotShape; public boolean augmentedDateChars; public boolean suppressMidDotPostprocessing; public boolean printNR; // a flag for WordAndTagDocumentReaderAndWriter public String classBias = null; public boolean printLabelValue; // Old printErrorStuff public boolean useRobustQN = false; public boolean combo = false; public boolean useGenericFeatures = false; public boolean verboseForTrueCasing = false; public String trainHierarchical = null; public String domain = null; public boolean baseline = false; public String transferSigmas = null; public boolean doFE = false; public boolean restrictLabels = true; // whether to print a line saying each ObjectBank entry (usually a filename) public boolean announceObjectBankEntries = false; // This is for use with the OWLQNMinimizer L1 regularization. To use it, set useQN=false, // and this to a positive number. A smaller number means more features are retained. // Depending on the problem, a good value might be // between 0.75 (POS tagger) down to 0.01 (Chinese word segmentation) public double l1reg = 0.0; // truecaser flags: public String mixedCaseMapFile = ""; public String auxTrueCaseModels = ""; // more flags inspired by Zhang and Johnson 2003 public boolean use2W = false; public boolean useLC = false; public boolean useYetMoreCpCShapes = false; // added for the NFL domain public boolean useIfInteger = false; // Filename to which the features generated by a CRF classify will be exported (if non-null) public String exportFeatures = null; public boolean useInPlaceSGD = false; public boolean useTopics = false; // Number of iterations before evaluating weights (0 = don't evaluate) public int evaluateIters = 0; // Command to use for evaluation public String evalCmd = ""; // Evaluate on training set or not public boolean evaluateTrain = false; public int tuneSampleSize = -1; public boolean usePhraseFeatures = false; public boolean usePhraseWords = false; public boolean usePhraseWordTags = false; public boolean usePhraseWordSpecialTags = false; public boolean useCommonWordsFeature = false; public boolean useProtoFeatures = false; public boolean useWordnetFeatures = false; public String tokenFactory = "edu.stanford.nlp.process.CoreLabelTokenFactory"; public Object[] tokenFactoryArgs = new Object[0]; public String tokensAnnotationClassName = "edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation"; public transient String tokenizerOptions = null; public transient String tokenizerFactory = null; public boolean useCorefFeatures = false; public String wikiFeatureDbFile = null; // for combining 2 CRFs - one trained from noisy data and another trained from // non-noisy public boolean useNoisyNonNoisyFeature = false; // year annotation of the document public boolean useYear = false; public boolean useSentenceNumber = false; // to know source of the label. Currently, used to know which pattern is used // to label the token public boolean useLabelSource = false; /** * Whether to (not) lowercase tokens before looking them up in distsim * lexicon. By default lowercasing was done, but now it doesn't have to be * true :-). */ public boolean casedDistSim = false; /** * The format of the distsim file. Known values are: alexClark = TSV file. * word TAB clusterNumber [optional other content] terryKoo = TSV file. * clusterBitString TAB word TAB frequency */ public String distSimFileFormat = "alexClark"; /** * If this number is greater than 0, the distSim class is assume to be a bit * string and is truncated at this many characters. Normal distSim features * will then use this amount of resolution. Extra, special distsim features * may work at a coarser level of resolution. Since the lexicon only stores * this length of bit string, there is then no way to have finer-grained * clusters. */ public int distSimMaxBits = 8; /** * If this is set to true, all digit characters get mapped to '9' in a distsim * lexicon and for lookup. This is a simple word shaping that can shrink * distsim lexicons and improve their performance. */ public boolean numberEquivalenceDistSim = false; /** * What class to assign to words not found in the dist sim lexicon. You might * want to make it a known class, if one is the "default class. */ public String unknownWordDistSimClass = "null"; /** * Use prefixes and suffixes from the previous and current word in edge clique. */ public boolean useNeighborNGrams = false; /** * This function maps words in the training or test data to new * words. They are used at the feature extractor level, ie in the * FeatureFactory. For now, only the NERFeatureFactory uses this. */ public Function<String, String> wordFunction = null; public static final String DEFAULT_PLAIN_TEXT_READER = "edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter"; public String plainTextDocumentReaderAndWriter = DEFAULT_PLAIN_TEXT_READER; /** * Use a bag of all words as a feature. Perhaps this will find some * words that indicate certain types of entities are present. */ public boolean useBagOfWords = false; /** * When scoring, count the background symbol stats too. Useful for * things where the background symbol is particularly meaningful, * such as truecase. */ public boolean evaluateBackground = false; /** * Number of experts to be used in Logarithmic Opinion Pool (product of experts) training * default value is 1 */ public int numLopExpert = 1; public transient String initialLopScales = null; public transient String initialLopWeights = null; public boolean includeFullCRFInLOP = false; public boolean backpropLopTraining = false; public boolean randomLopWeights = false; public boolean randomLopFeatureSplit = false; public boolean nonLinearCRF = false; public boolean secondOrderNonLinear = false; public int numHiddenUnits = -1; public boolean useOutputLayer = true; public boolean useHiddenLayer = true; public boolean gradientDebug = false; public boolean checkGradient = false; public boolean useSigmoid = false; public boolean skipOutputRegularization = false; public boolean sparseOutputLayer = false; public boolean tieOutputLayer = false; public boolean blockInitialize = false; public boolean softmaxOutputLayer = false; /** * Bisequence CRF parameters */ public String loadBisequenceClassifierEn = null; public String loadBisequenceClassifierCh = null; public String bisequenceClassifierPropEn = null; public String bisequenceClassifierPropCh = null; public String bisequenceTestFileEn = null; public String bisequenceTestFileCh = null; public String bisequenceTestOutputEn = null; public String bisequenceTestOutputCh = null; public String bisequenceTestAlignmentFile = null; public String bisequenceAlignmentTestOutput = null; public int bisequencePriorType = 1; public String bisequenceAlignmentPriorPenaltyCh = null; public String bisequenceAlignmentPriorPenaltyEn = null; public double alignmentPruneThreshold = 0.0; public double alignmentDecodeThreshold = 0.5; public boolean factorInAlignmentProb = false; public boolean useChromaticSampling = false; public boolean useSequentialScanSampling = false; public int maxAllowedChromaticSize = 8; /** * Whether or not to keep blank sentences when processing. Useful * for systems such as the segmenter if you want to line up each * line exactly, including blank lines. */ public boolean keepEmptySentences = false; public boolean useBilingualNERPrior = false; public int samplingSpeedUpThreshold = -1; public String entityMatrixCh = null; public String entityMatrixEn = null; public int multiThreadGibbs = 0; public boolean matchNERIncentive = false; public boolean useEmbedding = false; public boolean prependEmbedding = false; public String embeddingWords = null; public String embeddingVectors = null; public boolean transitionEdgeOnly = false; // L1-prior used in QNMinimizer's OWLQN public double priorLambda = 0; public boolean addCapitalFeatures = false; public int arbitraryInputLayerSize = -1; public boolean noEdgeFeature = false; public boolean terminateOnEvalImprovement = false; public int terminateOnEvalImprovementNumOfEpoch = 1; public boolean useMemoryEvaluator = true; public boolean suppressTestDebug = false; public boolean useOWLQN = false; public boolean printWeights = false; public int totalDataSlice = 10; public int numOfSlices = 0; public boolean regularizeSoftmaxTieParam = false; public double softmaxTieLambda = 0; public int totalFeatureSlice = 10; public int numOfFeatureSlices = 0; public boolean addBiasToEmbedding = false; public boolean hardcodeSoftmaxOutputWeights = false; public boolean useNERPriorBIO = false; // todo [cdm 2014]: Disused, to be deleted, use priorModelFactory public String entityMatrix = null; public int multiThreadClassifier = 0; public boolean useDualDecomp = false; public boolean biAlignmentPriorIsPMI = true; public boolean dampDDStepSizeWithAlignmentProb = false; public boolean dualDecompAlignment = false; public double dualDecompInitialStepSizeAlignment = 0.1; public boolean dualDecompNotBIO = false; public String berkeleyAlignerLoadPath = null; public boolean useBerkeleyAlignerForViterbi = false; public boolean useBerkeleyCompetitivePosterior = false; public boolean useDenero = true; public double alignDDAlpha = 1; public boolean factorInBiEdgePotential = false; public boolean noNeighborConstraints = false; public boolean includeC2EViterbi = true; public boolean initWithPosterior = true; public int nerSkipFirstK = 0; public int nerSlowerTimes = 1; public boolean powerAlignProb = false; public boolean powerAlignProbAsAddition = false; public boolean initWithNERPosterior = false; public boolean applyNERPenalty = true; public boolean printFactorTable = false; public boolean useAdaGradFOBOS = false; public double initRate = 0.1; public boolean groupByFeatureTemplate = false; public boolean groupByOutputClass = false; public double priorAlpha = 0; public String splitWordRegex = null; public boolean groupByInput = false; public boolean groupByHiddenUnit = false; public String unigramLM = null; public String bigramLM = null; public int wordSegBeamSize = 1000; public String vocabFile = null; public String normalizedFile = null; public boolean averagePerceptron = true; public String loadCRFSegmenterPath = null; public String loadPCTSegmenterPath = null; public String crfSegmenterProp = null; public String pctSegmenterProp = null; public String intermediateSegmenterOut = null; public String intermediateSegmenterModel = null; public int dualDecompMaxItr = 0; public double dualDecompInitialStepSize = 0.1; public boolean dualDecompDebug = false; public boolean useCWSWordFeatures = false; public boolean useCWSWordFeaturesAll = false; public boolean useCWSWordFeaturesBigram = false; public boolean pctSegmenterLenAdjust = false; public boolean useTrainLexicon = false; public boolean useCWSFeatures = true; public boolean appendLC = false; public boolean perceptronDebug = false; public boolean pctSegmenterScaleByCRF = false; public double pctSegmenterScale = 0.0; public boolean separateASCIIandRange = true; public double dropoutRate = 0.0; public double dropoutScale = 1.0; // keenon: changed from = 1, nowadays it makes sense to default to parallelism public int multiThreadGrad = Runtime.getRuntime().availableProcessors(); public int maxQNItr = 0; public boolean dropoutApprox = false; public String unsupDropoutFile = null; public double unsupDropoutScale = 1.0; public int startEvaluateIters = 0; public int multiThreadPerceptron = 1; public boolean lazyUpdate = false; public int featureCountThresh = 0; public transient String serializeWeightsTo = null; public boolean geDebug = false; public boolean doFeatureDiscovery = false; public transient String loadWeightsFrom = null; public transient String loadClassIndexFrom = null; public transient String serializeClassIndexTo = null; public boolean learnCHBasedOnEN = true; public boolean learnENBasedOnCH = false; public String loadWeightsFromEN = null; public String loadWeightsFromCH = null; public String serializeToEN = null; public String serializeToCH = null; public String testFileEN = null; public String testFileCH = null; public String unsupFileEN = null; public String unsupFileCH = null; public String unsupAlignFile = null; public String supFileEN = null; public String supFileCH = null; public transient String serializeFeatureIndexTo = null; public String loadFeatureIndexFromEN = null; public String loadFeatureIndexFromCH = null; public double lambdaEN = 1.0; public double lambdaCH = 1.0; public boolean alternateTraining = false; public boolean weightByEntropy = false; public boolean useKL = false; public boolean useHardGE = false; public boolean useCRFforUnsup = false; public boolean useGEforSup = false; public boolean useKnownLCWords = true; // disused, can be deleted when breaking serialization // allow for multiple feature factories. public String[] featureFactories = null; public List<Object[]> featureFactoriesArgs = null; public boolean useNoisyLabel = false; public String errorMatrix = null; public boolean printTrainLabels = false; // Inference label dictionary cutoff public int labelDictionaryCutoff = -1; public boolean useAdaDelta = false; public boolean useAdaDiff = false; public double adaGradEps = 1e-3; public double adaDeltaRho = 0.95; public boolean useRandomSeed = false; public boolean terminateOnAvgImprovement = false; public boolean strictGoodCoNLL = false; public boolean removeStrictGoodCoNLLDuplicates = false; /** A class name for a factory that vends a prior NER model that * implements both SequenceModel and SequenceListener, and which * is used in the Gibbs sampling sequence model inference. */ public String priorModelFactory; /** Put in undirected (left/right) bag of words features for local * neighborhood. Seems much worse than regular useDisjunctive. */ public boolean useUndirectedDisjunctive; public boolean splitSlashHyphenWords; // unused with new enum below. Remove when breaking serialization. /** How many words it is okay to add to knownLCWords after initial training. * If this number is negative, then add any number of further words during classifying/testing. * If this number is non-negative (greater than or equal to 0), then add at most this many words * to the knownLCWords. By default, this is now set to 0, so there is no transductive learning on the * test set, since too many people complained about results changing over runs. However, traditionally * we used a non-zero value, and this usually helps performance a bit (until 2014 it was -1, then it * was set to 10_000, so that memory would not grow without bound if a SequenceClassifier is run for * a long time. */ public int maxAdditionalKnownLCWords = 0; // was 10_000; public enum SlashHyphenEnum { NONE, WFRAG, WORD, BOTH }; public SlashHyphenEnum slashHyphenTreatment = SlashHyphenEnum.NONE; public boolean useTitle2 = false; public boolean showNCCInfo; public boolean showCCInfo; public String crfToExamine; public boolean useSUTime; public boolean applyNumericClassifiers; public String combinationMode; public String nerModel; /** * Use prefixes and suffixes from the previous and next word in node clique. */ public boolean useMoreNeighborNGrams = false; // "ADD VARIABLES ABOVE HERE" public transient List<String> phraseGazettes = null; public transient Properties props = null; /** * Create a new SeqClassifierFlags object initialized with default values. */ public SeqClassifierFlags() { } /** * Create a new SeqClassifierFlags object and initialize it using values in * the Properties object. The properties are printed to stderr as it works. * * @param props The properties object used for initialization */ public SeqClassifierFlags(Properties props) { setProperties(props, true); } /** * Create a new SeqClassifierFlags object and initialize it using values in * the Properties object. The properties are printed to stderr as it works. * * @param props The properties object used for initialization * @param printProps Whether to print the properties on construction */ public SeqClassifierFlags(Properties props, boolean printProps) { setProperties(props, printProps); } /** * Initialize this object using values in Properties object. The properties * are printed to stderr as it works. * * @param props The properties object used for initialization */ public final void setProperties(Properties props) { setProperties(props, true); } /** * Initialize using values in Properties file. * * @param props The properties object used for initialization * @param printProps Whether to print the properties to stderr as it works. */ public void setProperties(Properties props, boolean printProps) { this.props = props; StringBuilder sb = new StringBuilder(stringRep); for (String key : props.stringPropertyNames()) { String val = props.getProperty(key); if (!(key.isEmpty() && val.isEmpty())) { if (printProps) { log.info(key + '=' + val); } sb.append(key).append('=').append(val).append('\n'); } if (key.equalsIgnoreCase("macro")) { if (Boolean.parseBoolean(val)) { useObservedSequencesOnly = true; readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter"; // useClassFeature = true; // submit useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // reverse useReverse = true; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 3.0; // normalize normalize = true; normalizeTimex = true; } } else if (key.equalsIgnoreCase("goodCoNLL")) { // This was developed for CMMClassifier after the original 2003 CoNLL work. // It is for an MEMM. You shouldn't use it with CRFClassifier. if (Boolean.parseBoolean(val)) { // featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter"; useObservedSequencesOnly = true; // useClassFeature = true; useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // should this be set?? maxNGramLeng = 6; No (to get best score). // reverse useReverse = false; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth // This was originally 20, but in Aug 2006 increased to 50, because that helped // for English, but actually even smaller than 20 helps for languages like // Spanish, so dropped in 2014 to 5.0. sigma = 5.0; // normalize normalize = true; normalizeTimex = true; // this was sort of wrong for German since it lowercases months, but didn't do too much harm maxLeft = 2; useDisjunctive = true; disjunctionWidth = 4; // clearly optimal for CoNLL useBoundarySequences = true; useLemmas = true; // no-op except for German usePrevNextLemmas = true; // no-op except for German strictGoodCoNLL = true; // don't add some CpC features added later removeStrictGoodCoNLLDuplicates = true; // added in 2014; the duplicated features don't help inputEncoding = "iso-8859-1"; // needed for CoNLL German and Spanish files // optimization useQN = true; QNsize = 15; } } else if (key.equalsIgnoreCase("conllNoTags")) { if (Boolean.parseBoolean(val)) { readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter"; // trainMap=testMap="word=0,answer=1"; map = "word=0,answer=1"; useObservedSequencesOnly = true; // useClassFeature = true; useLongSequences = true; // useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; // useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // reverse useReverse = false; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence // useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 20.0; adaptSigma = 20.0; // normalize normalize = true; normalizeTimex = true; maxLeft = 2; useDisjunctive = true; disjunctionWidth = 4; useBoundarySequences = true; // useLemmas = true; // no-op except for German // usePrevNextLemmas = true; // no-op except for German inputEncoding = "iso-8859-1"; // opt useQN = true; QNsize = 15; } } else if (key.equalsIgnoreCase("notags")) { if (Boolean.parseBoolean(val)) { // turn off all features that use POS tags // this is slightly crude: it also turns off a few things that // don't use tags in e.g., useTaggySequences useTags = false; useSymTags = false; useTaggySequences = false; useOccurrencePatterns = false; } } else if (key.equalsIgnoreCase("submit")) { if (Boolean.parseBoolean(val)) { useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; wordShape = WordShapeClassifier.WORDSHAPEDAN1; useSequences = true; usePrevSequences = true; } } else if (key.equalsIgnoreCase("binnedLengths")) { if (val != null) { String[] binnedLengthStrs = val.split("[, ]+"); binnedLengths = new int[binnedLengthStrs.length]; for (int i = 0; i < binnedLengths.length; i++) { binnedLengths[i] = Integer.parseInt(binnedLengthStrs[i]); } } } else if (key.equalsIgnoreCase("makeConsistent")) { makeConsistent = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dump")) { dump = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNGrams")) { useNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNeighborNGrams")) { useNeighborNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMoreNeighborNGrams")) { useMoreNeighborNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wordFunction")) { wordFunction = ReflectionLoading.loadByReflection(val); } else if (key.equalsIgnoreCase("conjoinShapeNGrams")) { conjoinShapeNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("lowercaseNGrams")) { lowercaseNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIsURL")) { useIsURL = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useURLSequences")) { useURLSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityTypes")) { useEntityTypes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityRule")) { useEntityRule = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOrdinal")) { useOrdinal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityTypeSequences")) { useEntityTypeSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIsDateRange")) { useIsDateRange = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dehyphenateNGrams")) { dehyphenateNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("lowerNewgeneThreshold")) { lowerNewgeneThreshold = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrev")) { usePrev = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNext")) { useNext = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTags")) { useTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordPairs")) { useWordPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGazettes")) { useGazettes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wordShape")) { wordShape = WordShapeClassifier.lookupShaper(val); if (wordShape == WordShapeClassifier.NOWORDSHAPE) { log.warn("There is no word shaper called '" + val + "'; no word shape features will be used."); } } else if (key.equalsIgnoreCase("useShapeStrings")) { useShapeStrings = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGoodForNamesCpC")) { useGoodForNamesCpC = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictionaryConjunctions")) { useDictionaryConjunctions = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictionaryConjunctions3")) { useDictionaryConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("expandMidDot")) { expandMidDot = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSequences")) { useSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevSequences")) { usePrevSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextSequences")) { useNextSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLongSequences")) { useLongSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBoundarySequences")) { useBoundarySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTaggySequences")) { useTaggySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useExtraTaggySequences")) { useExtraTaggySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTaggySequencesShapeInteraction")) { useTaggySequencesShapeInteraction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyZeroethOrder")) { strictlyZeroethOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyFirstOrder")) { strictlyFirstOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlySecondOrder")) { strictlySecondOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyThirdOrder")) { strictlyThirdOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dontExtendTaggy")) { dontExtendTaggy = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("entitySubclassification")) { entitySubclassification = val; } else if (key.equalsIgnoreCase("useGazettePhrases")) { useGazettePhrases = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("phraseGazettes")) { StringTokenizer st = new StringTokenizer(val, " ,;\t"); if (phraseGazettes == null) { phraseGazettes = new ArrayList<>(); } while (st.hasMoreTokens()) { phraseGazettes.add(st.nextToken()); } } else if (key.equalsIgnoreCase("useSum")) { useSum = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("verbose")) { verboseMode = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("verboseMode")) { verboseMode = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tolerance")) { tolerance = Double.parseDouble(val); } else if (key.equalsIgnoreCase("maxIterations")) { maxIterations = Integer.parseInt(val); } else if (key.equalsIgnoreCase("exportFeatures")) { exportFeatures = val; } else if (key.equalsIgnoreCase("printFeatures")) { printFeatures = val; } else if (key.equalsIgnoreCase("printFeaturesUpto")) { printFeaturesUpto = Integer.parseInt(val); } else if (key.equalsIgnoreCase("lastNameList")) { lastNameList = val; } else if (key.equalsIgnoreCase("maleNameList")) { maleNameList = val; } else if (key.equalsIgnoreCase("femaleNameList")) { femaleNameList = val; } else if (key.equalsIgnoreCase("useSymTags")) { useSymTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSymWordPairs")) { useSymWordPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printClassifier")) { printClassifier = val; } else if (key.equalsIgnoreCase("printClassifierParam")) { printClassifierParam = Integer.parseInt(val); } else if (key.equalsIgnoreCase("intern")) { intern = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("mergetags")) { mergeTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("iobTags")) { iobTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useViterbi")) { useViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("intern2")) { intern2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("selfTest")) { selfTest = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sloppyGazette")) { sloppyGazette = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("cleanGazette")) { cleanGazette = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("noMidNGrams")) { noMidNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useReverse")) { useReverse = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("retainEntitySubclassification")) { retainEntitySubclassification = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLemmas")) { useLemmas = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevNextLemmas")) { usePrevNextLemmas = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalizeTerms")) { normalizeTerms = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalizeTimex")) { normalizeTimex = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNB")) { useNB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useParenMatching")) { useParenMatching = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs")) { useTypeSeqs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs2")) { useTypeSeqs2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs3")) { useTypeSeqs3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDisjunctive")) { useDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUndirectedDisjunctive")) { useUndirectedDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("splitSlashHyphenWords")) { try { slashHyphenTreatment = SlashHyphenEnum.valueOf(val.trim().toUpperCase()); } catch (IllegalArgumentException | NullPointerException iae) { slashHyphenTreatment = SlashHyphenEnum.NONE; } } else if (key.equalsIgnoreCase("disjunctionWidth")) { disjunctionWidth = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useDisjunctiveShapeInteraction")) { useDisjunctiveShapeInteraction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWideDisjunctive")) { useWideDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wideDisjunctionWidth")) { wideDisjunctionWidth = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useDisjShape")) { useDisjShape = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTitle")) { useTitle = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTitle2")) { useTitle2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("booleanFeatures")) { booleanFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useClassFeature")) { useClassFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeConjunctions")) { useShapeConjunctions = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordTag")) { useWordTag = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNPHead")) { useNPHead = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNPGovernor")) { useNPGovernor = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHeadGov")) { useHeadGov = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLastRealWord")) { useLastRealWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextRealWord")) { useNextRealWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOccurrencePatterns")) { useOccurrencePatterns = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeySequences")) { useTypeySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("justify")) { justify = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalize")) { normalize = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("priorType")) { priorType = val; } else if (key.equalsIgnoreCase("sigma")) { sigma = Double.parseDouble(val); } else if (key.equalsIgnoreCase("epsilon")) { epsilon = Double.parseDouble(val); } else if (key.equalsIgnoreCase("beamSize")) { beamSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("removeTopN")) { removeTopN = Integer.parseInt(val); } else if (key.equalsIgnoreCase("removeTopNPercent")) { removeTopNPercent = Double.parseDouble(val); } else if (key.equalsIgnoreCase("randomizedRatio")) { randomizedRatio = Double.parseDouble(val); } else if (key.equalsIgnoreCase("numTimesRemoveTopN")) { numTimesRemoveTopN = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxLeft")) { maxLeft = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxRight")) { maxRight = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxNGramLeng")) { maxNGramLeng = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useGazFeatures")) { useGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAltGazFeatures")) { useAltGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMoreGazFeatures")) { useMoreGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbbr")) { useAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMinimalAbbr")) { useMinimalAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbbr1")) { useAbbr1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMinimalAbbr1")) { useMinimalAbbr1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("documentReader")) { log.info("You are using an outdated flag: -documentReader " + val); log.info("Please use -readerAndWriter instead."); } else if (key.equalsIgnoreCase("deleteBlankLines")) { deleteBlankLines = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("answerFile")) { answerFile = val; } else if (key.equalsIgnoreCase("altAnswerFile")) { altAnswerFile = val; } else if (key.equalsIgnoreCase("loadClassifier") || key.equalsIgnoreCase("model")) { loadClassifier = val; } else if (key.equalsIgnoreCase("loadTextClassifier")) { loadTextClassifier = val; } else if (key.equalsIgnoreCase("loadJarClassifier")) { loadJarClassifier = val; } else if (key.equalsIgnoreCase("loadAuxClassifier")) { loadAuxClassifier = val; } else if (key.equalsIgnoreCase("serializeTo")) { serializeTo = val; } else if (key.equalsIgnoreCase("serializeToText")) { serializeToText = val; } else if (key.equalsIgnoreCase("serializeDatasetsDir")) { serializeDatasetsDir = val; } else if (key.equalsIgnoreCase("loadDatasetsDir")) { loadDatasetsDir = val; } else if (key.equalsIgnoreCase("pushDir")) { pushDir = val; } else if (key.equalsIgnoreCase("purgeDatasets")) { purgeDatasets = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepOBInMemory")) { keepOBInMemory = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("fakeDataset")) { fakeDataset = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("numDatasetsPerFile")) { numDatasetsPerFile = Integer.parseInt(val); } else if (key.equalsIgnoreCase("trainFile")) { trainFile = val; } else if (key.equalsIgnoreCase("biasedTrainFile")) { biasedTrainFile = val; } else if (key.equalsIgnoreCase("classBias")) { classBias = val; } else if (key.equalsIgnoreCase("confusionMatrix")) { confusionMatrix = val; } else if (key.equalsIgnoreCase("adaptFile")) { adaptFile = val; } else if (key.equalsIgnoreCase("devFile")) { devFile = val; } else if (key.equalsIgnoreCase("testFile")) { testFile = val; } else if (key.equalsIgnoreCase("outputFile")) { outputFile = val; } else if (key.equalsIgnoreCase("textFile")) { textFile = val; } else if (key.equalsIgnoreCase("readStdin")) { readStdin = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initialWeights")) { initialWeights = val; } else if (key.equalsIgnoreCase("interimOutputFreq")) { interimOutputFreq = Integer.parseInt(val); } else if (key.equalsIgnoreCase("inputEncoding")) { inputEncoding = val; } else if (key.equalsIgnoreCase("outputEncoding")) { outputEncoding = val; } else if (key.equalsIgnoreCase("encoding")) { inputEncoding = val; outputEncoding = val; } else if (key.equalsIgnoreCase("gazette")) { useGazettes = true; StringTokenizer st = new StringTokenizer(val, " ,;\t"); if (gazettes == null) { gazettes = new ArrayList<>(); } // for after deserialization, as gazettes is transient while (st.hasMoreTokens()) { gazettes.add(st.nextToken()); } } else if (key.equalsIgnoreCase("useQN")) { useQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("QNsize")) { QNsize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("QNsize2")) { QNsize2 = Integer.parseInt(val); } else if (key.equalsIgnoreCase("l1reg")) { useQN = false; l1reg = Double.parseDouble(val); } else if (key.equalsIgnoreCase("useFloat")) { useFloat = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("trainMap")) { log.info("trainMap and testMap are no longer valid options - please use map instead."); throw new RuntimeException(); } else if (key.equalsIgnoreCase("testMap")) { log.info("trainMap and testMap are no longer valid options - please use map instead."); throw new RuntimeException(); } else if (key.equalsIgnoreCase("map")) { map = val; } else if (key.equalsIgnoreCase("useMoreAbbr")) { useMoreAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevVB")) { usePrevVB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextVB")) { useNextVB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useVB")) { if (Boolean.parseBoolean(val)) { useVB = true; usePrevVB = true; useNextVB = true; } } else if (key.equalsIgnoreCase("useChunks")) { useChunks = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useChunkySequences")) { useChunkySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("greekifyNGrams")) { greekifyNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("restrictTransitionsTimit")) { restrictTransitionsTimit = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMoreTags")) { useMoreTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBeginSent")) { useBeginSent = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePosition")) { usePosition = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGenia")) { useGENIA = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstr")) { useABSTR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWeb")) { useWEB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAnte")) { useANTE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAcr")) { useACR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTok")) { useTOK = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbgene")) { useABGENE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstrFreqDict")) { useABSTRFreqDict = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstrFreq")) { useABSTRFreq = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFreq")) { useFREQ = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usewebfreqdict")) { useWEBFreqDict = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("bioSubmitOutput")) { bioSubmitOutput = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("subCWGaz")) { subCWGaz = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("splitOnHead")) { splitOnHead = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureCountThreshold")) { featureCountThreshold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useWord")) { useWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("memoryThrift")) { memoryThrift = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("timitDatum")) { timitDatum = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("splitDocuments")) { log.info("You are using an outdated flag: -splitDocuments"); log.info("Please use -maxDocSize -1 instead."); splitDocuments = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureWeightThreshold")) { featureWeightThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("backgroundSymbol")) { backgroundSymbol = val; } else if (key.equalsIgnoreCase("featureFactory")) { // handle multiple feature factories. String[] tokens = val.split("\\s*,\\s*"); // multiple feature factories could be specified and are comma separated. int numFactories = tokens.length; if (numFactories == 1) { // for compatible reason featureFactory = getFeatureFactory(val); } featureFactories = new String[numFactories]; featureFactoriesArgs = new ArrayList<>(numFactories); for (int i = 0; i < numFactories; i++) { featureFactories[i] = getFeatureFactory(tokens[i]); featureFactoriesArgs.add(new Object[0]); } } else if (key.equalsIgnoreCase("printXML")) { log.info("printXML is disused; perhaps try using the -outputFormat xml option."); } else if (key.equalsIgnoreCase("useSeenFeaturesOnly")) { useSeenFeaturesOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBagOfWords")) { useBagOfWords = Boolean.parseBoolean(val); // chinese word-segmenter features } else if (key.equalsIgnoreCase("useRadical")) { useRadical = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBigramInTwoClique")) { useBigramInTwoClique = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useReverseAffix")) { useReverseAffix = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("charHalfWindow")) { charHalfWindow = Integer.parseInt(val); } else if (key.equalsIgnoreCase("purgeFeatures")) { purgeFeatures = Integer.parseInt(val); } else if (key.equalsIgnoreCase("ocrFold")) { ocrFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("morphFeatureFile")) { morphFeatureFile = val; } else if (key.equalsIgnoreCase("svmModelFile")) { svmModelFile = val; /* Dictionary */ } else if (key.equalsIgnoreCase("useDictleng")) { useDictleng = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDict2")) { useDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOutDict2")) { useOutDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("outDict2")) { outDict2 = val; } else if (key.equalsIgnoreCase("useDictCTB2")) { useDictCTB2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictASBC2")) { useDictASBC2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictPK2")) { useDictPK2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictHK2")) { useDictHK2 = Boolean.parseBoolean(val); /* N-gram flags */ } else if (key.equalsIgnoreCase("useWord1")) { useWord1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord2")) { useWord2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord3")) { useWord3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord4")) { useWord4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad1")) { useRad1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad2")) { useRad2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad2b")) { useRad2b = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordn")) { useWordn = Boolean.parseBoolean(val); /* affix flags */ } else if (key.equalsIgnoreCase("useCTBPre1")) { useCTBPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useCTBSuf1")) { useCTBSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCPre1")) { useASBCPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCSuf1")) { useASBCSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKPre1")) { useHKPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKSuf1")) { useHKSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKPre1")) { usePKPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKSuf1")) { usePKSuf1 = Boolean.parseBoolean(val); /* POS flags */ } else if (key.equalsIgnoreCase("useCTBChar2")) { useCTBChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrediction")) { usePrediction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCChar2")) { useASBCChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKChar2")) { useHKChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKChar2")) { usePKChar2 = Boolean.parseBoolean(val); /* Rule flag */ } else if (key.equalsIgnoreCase("useRule2")) { useRule2 = Boolean.parseBoolean(val); /* ASBC and HK */ } else if (key.equalsIgnoreCase("useBig5")) { useBig5 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict2")) { useNegDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict3")) { useNegDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict4")) { useNegDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict2")) { useNegCTBDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict3")) { useNegCTBDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict4")) { useNegCTBDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict2")) { useNegASBCDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict3")) { useNegASBCDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict4")) { useNegASBCDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict2")) { useNegPKDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict3")) { useNegPKDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict4")) { useNegPKDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict2")) { useNegHKDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict3")) { useNegHKDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict4")) { useNegHKDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePre")) { usePre = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSuf")) { useSuf = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRule")) { useRule = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAs")) { useAs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePk")) { usePk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHk")) { useHk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMsr")) { useMsr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMSRChar2")) { useMSRChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC4gram")) { useFeaturesC4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC5gram")) { useFeaturesC5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC6gram")) { useFeaturesC6gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC4gram")) { useFeaturesCpC4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC5gram")) { useFeaturesCpC5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC6gram")) { useFeaturesCpC6gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType")) { useUnicodeType = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeBlock")) { useUnicodeBlock = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType4gram")) { useUnicodeType4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType5gram")) { useUnicodeType5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings1")) { useShapeStrings1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings3")) { useShapeStrings3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings4")) { useShapeStrings4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings5")) { useShapeStrings5 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordUTypeConjunctions2")) { useWordUTypeConjunctions2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordUTypeConjunctions3")) { useWordUTypeConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordShapeConjunctions2")) { useWordShapeConjunctions2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordShapeConjunctions3")) { useWordShapeConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMidDotShape")) { useMidDotShape = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("augmentedDateChars")) { augmentedDateChars = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("suppressMidDotPostprocessing")) { suppressMidDotPostprocessing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printNR")) { printNR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("use4Clique")) { use4Clique = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFilter")) { useFilter = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("largeChSegFile")) { largeChSegFile = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepEnglishWhitespaces")) { keepEnglishWhitespaces = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepAllWhitespaces")) { keepAllWhitespaces = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sighanPostProcessing")) { sighanPostProcessing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useChPos")) { useChPos = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sighanCorporaDict")) { sighanCorporaDict = val; // end chinese word-segmenter features } else if (key.equalsIgnoreCase("useObservedSequencesOnly")) { useObservedSequencesOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("maxDocSize")) { maxDocSize = Integer.parseInt(val); splitDocuments = true; } else if (key.equalsIgnoreCase("printProbs")) { printProbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printFirstOrderProbs")) { printFirstOrderProbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("saveFeatureIndexToDisk")) { saveFeatureIndexToDisk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("removeBackgroundSingletonFeatures")) { removeBackgroundSingletonFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("doGibbs")) { doGibbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMUCFeatures")) { useMUCFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initViterbi")) { initViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("checkNameList")) { checkNameList = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFirstWord")) { useFirstWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnknown")) { useUnknown = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("cacheNGrams")) { cacheNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNumberFeature")) { useNumberFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("annealingRate")) { annealingRate = Double.parseDouble(val); } else if (key.equalsIgnoreCase("annealingType")) { if (val.equalsIgnoreCase("linear") || val.equalsIgnoreCase("exp") || val.equalsIgnoreCase("exponential")) { annealingType = val; } else { log.info("unknown annealingType: " + val + ". Please use linear|exp|exponential"); } } else if (key.equalsIgnoreCase("numSamples")) { numSamples = Integer.parseInt(val); } else if (key.equalsIgnoreCase("inferenceType")) { inferenceType = val; } else if (key.equalsIgnoreCase("loadProcessedData")) { loadProcessedData = val; } else if (key.equalsIgnoreCase("normalizationTable")) { normalizationTable = val; } else if (key.equalsIgnoreCase("dictionary")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) { dictionary = val; } else { dictionary = null; } } else if (key.equalsIgnoreCase("serDictionary")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) { serializedDictionary = val; } else { serializedDictionary = null; } } else if (key.equalsIgnoreCase("dictionary2")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) { dictionary2 = val; } else { dictionary2 = null; } } else if (key.equalsIgnoreCase("normTableEncoding")) { normTableEncoding = val; } else if (key.equalsIgnoreCase("useLemmaAsWord")) { useLemmaAsWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("type")) { type = val; } else if (key.equalsIgnoreCase("readerAndWriter")) { readerAndWriter = val; } else if (key.equalsIgnoreCase("plainTextDocumentReaderAndWriter")) { plainTextDocumentReaderAndWriter = val; } else if (key.equalsIgnoreCase("gazFilesFile")) { gazFilesFile = val; } else if (key.equalsIgnoreCase("baseTrainDir")) { baseTrainDir = val; } else if (key.equalsIgnoreCase("baseTestDir")) { baseTestDir = val; } else if (key.equalsIgnoreCase("trainFiles")) { trainFiles = val; } else if (key.equalsIgnoreCase("trainFileList")) { trainFileList = val; } else if (key.equalsIgnoreCase("trainDirs")) { trainDirs = val; } else if (key.equalsIgnoreCase("testDirs")) { testDirs = val; } else if (key.equalsIgnoreCase("testFiles")) { testFiles = val; } else if (key.equalsIgnoreCase("textFiles")) { textFiles = val; } else if (key.equalsIgnoreCase("usePrediction2")) { usePrediction2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useObservedFeaturesOnly")) { useObservedFeaturesOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("iobWrapper")) { iobWrapper = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDistSim")) { useDistSim = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("casedDistSim")) { casedDistSim = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("distSimFileFormat")) { distSimFileFormat = val; } else if (key.equalsIgnoreCase("distSimMaxBits")) { distSimMaxBits = Integer.parseInt(val); } else if (key.equalsIgnoreCase("numberEquivalenceDistSim")) { numberEquivalenceDistSim = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("unknownWordDistSimClass")) { unknownWordDistSimClass = val; } else if (key.equalsIgnoreCase("useOnlySeenWeights")) { useOnlySeenWeights = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("predProp")) { predProp = val; } else if (key.equalsIgnoreCase("distSimLexicon")) { distSimLexicon = val; } else if (key.equalsIgnoreCase("useSegmentation")) { useSegmentation = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useInternal")) { useInternal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useExternal")) { useExternal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEitherSideWord")) { useEitherSideWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEitherSideDisjunctive")) { useEitherSideDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureDiffThresh")) { featureDiffThresh = Double.parseDouble(val); if (props.getProperty("numTimesPruneFeatures") == null) { numTimesPruneFeatures = 1; } } else if (key.equalsIgnoreCase("numTimesPruneFeatures")) { numTimesPruneFeatures = Integer.parseInt(val); } else if (key.equalsIgnoreCase("newgeneThreshold")) { newgeneThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("adaptFile")) { adaptFile = val; } else if (key.equalsIgnoreCase("doAdaptation")) { doAdaptation = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("selfTrainFile")) { selfTrainFile = val; } else if (key.equalsIgnoreCase("selfTrainIterations")) { selfTrainIterations = Integer.parseInt(val); } else if (key.equalsIgnoreCase("selfTrainWindowSize")) { selfTrainWindowSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("selfTrainConfidenceThreshold")) { selfTrainConfidenceThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("numFolds")) { numFolds = Integer.parseInt(val); } else if (key.equalsIgnoreCase("startFold")) { startFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("endFold")) { endFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("adaptSigma")) { adaptSigma = Double.parseDouble(val); } else if (key.startsWith("prop") && !key.equals("prop")) { comboProps.add(val); } else if (key.equalsIgnoreCase("outputFormat")) { outputFormat = val; } else if (key.equalsIgnoreCase("useSMD")) { useSMD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useScaledSGD")) { useScaledSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("scaledSGDMethod")) { scaledSGDMethod = Integer.parseInt(val); } else if (key.equalsIgnoreCase("tuneSGD")) { tuneSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("StochasticCalculateMethod")) { if (val.equalsIgnoreCase("AlgorithmicDifferentiation")) { stochasticMethod = StochasticCalculateMethods.AlgorithmicDifferentiation; } else if (val.equalsIgnoreCase("IncorporatedFiniteDifference")) { stochasticMethod = StochasticCalculateMethods.IncorporatedFiniteDifference; } else if (val.equalsIgnoreCase("ExternalFinitedifference")) { stochasticMethod = StochasticCalculateMethods.ExternalFiniteDifference; } } else if (key.equalsIgnoreCase("initialGain")) { initialGain = Double.parseDouble(val); } else if (key.equalsIgnoreCase("stochasticBatchSize")) { stochasticBatchSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("SGD2QNhessSamples")) { SGD2QNhessSamples = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useSGD")) { useSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useInPlaceSGD")) { useInPlaceSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSGDtoQN")) { useSGDtoQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("SGDPasses")) { SGDPasses = Integer.parseInt(val); } else if (key.equalsIgnoreCase("QNPasses")) { QNPasses = Integer.parseInt(val); } else if (key.equalsIgnoreCase("gainSGD")) { gainSGD = Double.parseDouble(val); } else if (key.equalsIgnoreCase("useHybrid")) { useHybrid = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("hybridCutoffIteration")) { hybridCutoffIteration = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useStochasticQN")) { useStochasticQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("outputIterationsToFile")) { outputIterationsToFile = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("testObjFunction")) { testObjFunction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("testVariance")) { testVariance = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("CRForder")) { CRForder = Integer.parseInt(val); } else if (key.equalsIgnoreCase("CRFwindow")) { CRFwindow = Integer.parseInt(val); } else if (key.equalsIgnoreCase("testHessSamples")) { testHessSamples = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("estimateInitial")) { estimateInitial = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printLabelValue")) { printLabelValue = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("searchGraphPrefix")) { searchGraphPrefix = val; } else if (key.equalsIgnoreCase("searchGraphPrune")) { searchGraphPrune = Double.parseDouble(val); } else if (key.equalsIgnoreCase("kBest")) { useKBest = true; kBest = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useRobustQN")) { useRobustQN = true; } else if (key.equalsIgnoreCase("combo")) { combo = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("verboseForTrueCasing")) { verboseForTrueCasing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("trainHierarchical")) { trainHierarchical = val; } else if (key.equalsIgnoreCase("domain")) { domain = val; } else if (key.equalsIgnoreCase("baseline")) { baseline = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("doFE")) { doFE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("restrictLabels")) { restrictLabels = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("transferSigmas")) { transferSigmas = val; } else if (key.equalsIgnoreCase("announceObjectBankEntries")) { announceObjectBankEntries = true; } else if (key.equalsIgnoreCase("mixedCaseMapFile")) { mixedCaseMapFile = val; } else if (key.equalsIgnoreCase("auxTrueCaseModels")) { auxTrueCaseModels = val; } else if (key.equalsIgnoreCase("use2W")) { use2W = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLC")) { useLC = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useYetMoreCpCShapes")) { useYetMoreCpCShapes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIfInteger")) { useIfInteger = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("twoStage")) { twoStage = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("evaluateIters")) { evaluateIters = Integer.parseInt(val); } else if (key.equalsIgnoreCase("evalCmd")) { evalCmd = val; } else if (key.equalsIgnoreCase("evaluateTrain")) { evaluateTrain = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("evaluateBackground")) { evaluateBackground = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tuneSampleSize")) { tuneSampleSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useTopics")) { useTopics = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseFeatures")) { usePhraseFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseWords")) { usePhraseWords = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseWordTags")) { usePhraseWordTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseWordSpecialTags")) { usePhraseWordSpecialTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useProtoFeatures")) { useProtoFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordnetFeatures")) { useWordnetFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wikiFeatureDbFile")) { wikiFeatureDbFile = val; } else if (key.equalsIgnoreCase("tokenizerOptions")) { tokenizerOptions = val; } else if (key.equalsIgnoreCase("tokenizerFactory")) { tokenizerFactory = val; } else if (key.equalsIgnoreCase("useCommonWordsFeature")) { useCommonWordsFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useYear")) { useYear = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSentenceNumber")) { useSentenceNumber = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLabelSource")) { useLabelSource = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tokenFactory")) { tokenFactory = val; } else if (key.equalsIgnoreCase("tokensAnnotationClassName")) { tokensAnnotationClassName = val; } else if (key.equalsIgnoreCase("numLopExpert")) { numLopExpert = Integer.parseInt(val); } else if (key.equalsIgnoreCase("initialLopScales")) { initialLopScales = val; } else if (key.equalsIgnoreCase("initialLopWeights")) { initialLopWeights = val; } else if (key.equalsIgnoreCase("includeFullCRFInLOP")) { includeFullCRFInLOP = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("backpropLopTraining")) { backpropLopTraining = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("randomLopWeights")) { randomLopWeights = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("randomLopFeatureSplit")) { randomLopFeatureSplit = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("nonLinearCRF")) { nonLinearCRF = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("secondOrderNonLinear")) { secondOrderNonLinear = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("numHiddenUnits")) { numHiddenUnits = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useOutputLayer")) { useOutputLayer = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHiddenLayer")) { useHiddenLayer = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("gradientDebug")) { gradientDebug = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("checkGradient")) { checkGradient = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSigmoid")) { useSigmoid = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("skipOutputRegularization")) { skipOutputRegularization = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sparseOutputLayer")) { sparseOutputLayer = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tieOutputLayer")) { tieOutputLayer = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("blockInitialize")) { blockInitialize = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("softmaxOutputLayer")) { softmaxOutputLayer = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("loadBisequenceClassifierEn")) { loadBisequenceClassifierEn = val; } else if (key.equalsIgnoreCase("bisequenceClassifierPropEn")) { bisequenceClassifierPropEn = val; } else if (key.equalsIgnoreCase("loadBisequenceClassifierCh")) { loadBisequenceClassifierCh = val; } else if (key.equalsIgnoreCase("bisequenceClassifierPropCh")) { bisequenceClassifierPropCh = val; } else if (key.equalsIgnoreCase("bisequenceTestFileEn")) { bisequenceTestFileEn = val; } else if (key.equalsIgnoreCase("bisequenceTestFileCh")) { bisequenceTestFileCh = val; } else if (key.equalsIgnoreCase("bisequenceTestOutputEn")) { bisequenceTestOutputEn = val; } else if (key.equalsIgnoreCase("bisequenceTestOutputCh")) { bisequenceTestOutputCh = val; } else if (key.equalsIgnoreCase("bisequenceTestAlignmentFile")) { bisequenceTestAlignmentFile = val; } else if (key.equalsIgnoreCase("bisequenceAlignmentTestOutput")) { bisequenceAlignmentTestOutput = val; } else if (key.equalsIgnoreCase("bisequencePriorType")) { bisequencePriorType = Integer.parseInt(val); } else if (key.equalsIgnoreCase("bisequenceAlignmentPriorPenaltyCh")) { bisequenceAlignmentPriorPenaltyCh = val; } else if (key.equalsIgnoreCase("bisequenceAlignmentPriorPenaltyEn")) { bisequenceAlignmentPriorPenaltyEn = val; } else if (key.equalsIgnoreCase("alignmentPruneThreshold")) { alignmentPruneThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("alignmentDecodeThreshold")) { alignmentDecodeThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("factorInAlignmentProb")) { factorInAlignmentProb = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useChromaticSampling")) { useChromaticSampling = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSequentialScanSampling")) { useSequentialScanSampling = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("maxAllowedChromaticSize")) { maxAllowedChromaticSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("keepEmptySentences")) { keepEmptySentences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBilingualNERPrior")) { useBilingualNERPrior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("samplingSpeedUpThreshold")) { samplingSpeedUpThreshold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("entityMatrixCh")) { entityMatrixCh = val; } else if (key.equalsIgnoreCase("entityMatrixEn")) { entityMatrixEn = val; } else if (key.equalsIgnoreCase("multiThreadGibbs")) { multiThreadGibbs = Integer.parseInt(val); } else if (key.equalsIgnoreCase("matchNERIncentive")) { matchNERIncentive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEmbedding")) { useEmbedding = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("prependEmbedding")) { prependEmbedding = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("embeddingWords")) { embeddingWords = val; } else if (key.equalsIgnoreCase("embeddingVectors")) { embeddingVectors = val; } else if (key.equalsIgnoreCase("transitionEdgeOnly")) { transitionEdgeOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("priorLambda")) { priorLambda = Double.parseDouble(val); } else if (key.equalsIgnoreCase("addCapitalFeatures")) { addCapitalFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("arbitraryInputLayerSize")) { arbitraryInputLayerSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("noEdgeFeature")) { noEdgeFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("terminateOnEvalImprovement")) { terminateOnEvalImprovement = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("terminateOnEvalImprovementNumOfEpoch")) { terminateOnEvalImprovementNumOfEpoch = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useMemoryEvaluator")) { useMemoryEvaluator = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("suppressTestDebug")) { suppressTestDebug = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOWLQN")) { useOWLQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printWeights")) { printWeights = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("totalDataSlice")) { totalDataSlice = Integer.parseInt(val); } else if (key.equalsIgnoreCase("numOfSlices")) { numOfSlices = Integer.parseInt(val); } else if (key.equalsIgnoreCase("regularizeSoftmaxTieParam")) { regularizeSoftmaxTieParam = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("softmaxTieLambda")) { softmaxTieLambda = Double.parseDouble(val); } else if (key.equalsIgnoreCase("totalFeatureSlice")) { totalFeatureSlice = Integer.parseInt(val); } else if (key.equalsIgnoreCase("numOfFeatureSlices")) { numOfFeatureSlices = Integer.parseInt(val); } else if (key.equalsIgnoreCase("addBiasToEmbedding")) { addBiasToEmbedding = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("hardcodeSoftmaxOutputWeights")) { hardcodeSoftmaxOutputWeights = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("entityMatrix")) { entityMatrix = val; } else if (key.equalsIgnoreCase("multiThreadClassifier")) { multiThreadClassifier = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useDualDecomp")) { useDualDecomp = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("biAlignmentPriorIsPMI")) { biAlignmentPriorIsPMI = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dampDDStepSizeWithAlignmentProb")) { dampDDStepSizeWithAlignmentProb = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dualDecompAlignment")) { dualDecompAlignment = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dualDecompInitialStepSizeAlignment")) { dualDecompInitialStepSizeAlignment = Double.parseDouble(val); } else if (key.equalsIgnoreCase("dualDecompNotBIO")) { dualDecompNotBIO = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("berkeleyAlignerLoadPath")) { berkeleyAlignerLoadPath = val; } else if (key.equalsIgnoreCase("useBerkeleyAlignerForViterbi")) { useBerkeleyAlignerForViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBerkeleyCompetitivePosterior")) { useBerkeleyCompetitivePosterior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDenero")) { useDenero = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("alignDDAlpha")) { alignDDAlpha = Double.parseDouble(val); } else if (key.equalsIgnoreCase("factorInBiEdgePotential")) { factorInBiEdgePotential = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("noNeighborConstraints")) { noNeighborConstraints = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("includeC2EViterbi")) { includeC2EViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initWithPosterior")) { initWithPosterior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("nerSlowerTimes")) { nerSlowerTimes = Integer.parseInt(val); } else if (key.equalsIgnoreCase("nerSkipFirstK")) { nerSkipFirstK = Integer.parseInt(val); } else if (key.equalsIgnoreCase("powerAlignProb")) { powerAlignProb = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("powerAlignProbAsAddition")) { powerAlignProbAsAddition = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initWithNERPosterior")) { initWithNERPosterior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("applyNERPenalty")) { applyNERPenalty = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGenericFeatures")) { useGenericFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printFactorTable")) { printFactorTable = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAdaGradFOBOS")) { useAdaGradFOBOS = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initRate")) { initRate = Double.parseDouble(val); } else if (key.equalsIgnoreCase("groupByFeatureTemplate")) { groupByFeatureTemplate = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("groupByOutputClass")) { groupByOutputClass = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("priorAlpha")) { priorAlpha = Double.parseDouble(val); } else if (key.equalsIgnoreCase("splitWordRegex")) { splitWordRegex = val; } else if (key.equalsIgnoreCase("groupByInput")) { groupByInput = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("groupByHiddenUnit")) { groupByHiddenUnit = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("unigramLM")) { unigramLM = val; } else if (key.equalsIgnoreCase("bigramLM")) { bigramLM = val; } else if (key.equalsIgnoreCase("wordSegBeamSize")) { wordSegBeamSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("vocabFile")) { vocabFile = val; } else if (key.equalsIgnoreCase("normalizedFile")) { normalizedFile = val; } else if (key.equalsIgnoreCase("averagePerceptron")) { averagePerceptron = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("loadCRFSegmenterPath")) { loadCRFSegmenterPath = val; } else if (key.equalsIgnoreCase("loadPCTSegmenterPath")) { loadPCTSegmenterPath = val; } else if (key.equalsIgnoreCase("crfSegmenterProp")) { crfSegmenterProp = val; } else if (key.equalsIgnoreCase("pctSegmenterProp")) { pctSegmenterProp = val; } else if (key.equalsIgnoreCase("dualDecompMaxItr")) { dualDecompMaxItr = Integer.parseInt(val); } else if (key.equalsIgnoreCase("dualDecompInitialStepSize")) { dualDecompInitialStepSize = Double.parseDouble(val); } else if (key.equalsIgnoreCase("dualDecompDebug")) { dualDecompDebug = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("intermediateSegmenterOut")) { intermediateSegmenterOut = val; } else if (key.equalsIgnoreCase("intermediateSegmenterModel")) { intermediateSegmenterModel = val; } else if (key.equalsIgnoreCase("useCWSWordFeatures")) { useCWSWordFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useCWSWordFeaturesAll")) { useCWSWordFeaturesAll = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useCWSWordFeaturesBigram")) { useCWSWordFeaturesBigram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("pctSegmenterLenAdjust")) { pctSegmenterLenAdjust = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTrainLexicon")) { useTrainLexicon = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useCWSFeatures")) { useCWSFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("appendLC")) { appendLC = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("perceptronDebug")) { perceptronDebug = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("pctSegmenterScaleByCRF")) { pctSegmenterScaleByCRF = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("pctSegmenterScale")) { pctSegmenterScale = Double.parseDouble(val); } else if (key.equalsIgnoreCase("separateASCIIandRange")) { separateASCIIandRange = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dropoutRate")) { dropoutRate = Double.parseDouble(val); } else if (key.equalsIgnoreCase("dropoutScale")) { dropoutScale = Double.parseDouble(val); } else if (key.equalsIgnoreCase("multiThreadGrad")) { multiThreadGrad = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxQNItr")) { maxQNItr = Integer.parseInt(val); } else if (key.equalsIgnoreCase("dropoutApprox")) { dropoutApprox = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("unsupDropoutFile")) { unsupDropoutFile = val; } else if (key.equalsIgnoreCase("unsupDropoutScale")) { unsupDropoutScale = Double.parseDouble(val); } else if (key.equalsIgnoreCase("startEvaluateIters")) { startEvaluateIters = Integer.parseInt(val); } else if (key.equalsIgnoreCase("multiThreadPerceptron")) { multiThreadPerceptron = Integer.parseInt(val); } else if (key.equalsIgnoreCase("lazyUpdate")) { lazyUpdate = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureCountThresh")) { featureCountThresh = Integer.parseInt(val); } else if (key.equalsIgnoreCase("serializeWeightsTo")) { serializeWeightsTo = val; } else if (key.equalsIgnoreCase("geDebug")) { geDebug = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("doFeatureDiscovery")) { doFeatureDiscovery = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("loadWeightsFrom")) { loadWeightsFrom = val; } else if (key.equalsIgnoreCase("loadClassIndexFrom")) { loadClassIndexFrom = val; } else if (key.equalsIgnoreCase("serializeClassIndexTo")) { serializeClassIndexTo = val; } else if (key.equalsIgnoreCase("learnCHBasedOnEN")) { learnCHBasedOnEN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("learnENBasedOnCH")) { learnENBasedOnCH = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("loadWeightsFromEN")) { loadWeightsFromEN = val; } else if (key.equalsIgnoreCase("loadWeightsFromCH")) { loadWeightsFromCH = val; } else if (key.equalsIgnoreCase("serializeToEN")) { serializeToEN = val; } else if (key.equalsIgnoreCase("serializeToCH")) { serializeToCH = val; } else if (key.equalsIgnoreCase("testFileEN")) { testFileEN = val; } else if (key.equalsIgnoreCase("testFileCH")) { testFileCH = val; } else if (key.equalsIgnoreCase("unsupFileEN")) { unsupFileEN = val; } else if (key.equalsIgnoreCase("unsupFileCH")) { unsupFileCH = val; } else if (key.equalsIgnoreCase("unsupAlignFile")) { unsupAlignFile = val; } else if (key.equalsIgnoreCase("supFileEN")) { supFileEN = val; } else if (key.equalsIgnoreCase("supFileCH")) { supFileCH = val; } else if (key.equalsIgnoreCase("serializeFeatureIndexTo")) { serializeFeatureIndexTo = val; } else if (key.equalsIgnoreCase("loadFeatureIndexFromEN")) { loadFeatureIndexFromEN = val; } else if (key.equalsIgnoreCase("loadFeatureIndexFromCH")) { loadFeatureIndexFromCH = val; } else if (key.equalsIgnoreCase("lambdaEN")) { lambdaEN = Double.parseDouble(val); } else if (key.equalsIgnoreCase("lambdaCH")) { lambdaCH = Double.parseDouble(val); } else if (key.equalsIgnoreCase("alternateTraining")) { alternateTraining = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("weightByEntropy")) { weightByEntropy = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useKL")) { useKL = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHardGE")) { useHardGE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useCRFforUnsup")) { useCRFforUnsup = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGEforSup")) { useGEforSup = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useKnownLCWords")) { log.info("useKnownLCWords is deprecated; see maxAdditionalKnownLCWords (true = -1, false = 0)"); maxAdditionalKnownLCWords = Boolean.parseBoolean(val) ? -1 : 0; } else if (key.equalsIgnoreCase("useNoisyLabel")) { useNoisyLabel = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("errorMatrix")) { errorMatrix = val; } else if (key.equalsIgnoreCase("printTrainLabels")) { printTrainLabels = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("labelDictionaryCutoff")) { labelDictionaryCutoff = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useAdaDelta")) { useAdaDelta = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAdaDiff")) { useAdaDiff = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("adaGradEps")) { adaGradEps = Double.parseDouble(val); } else if (key.equalsIgnoreCase("adaDeltaRho")) { adaDeltaRho = Double.parseDouble(val); } else if (key.equalsIgnoreCase("useRandomSeed")) { useRandomSeed = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("terminateOnAvgImprovement")) { terminateOnAvgImprovement = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictGoodCoNLL")) { strictGoodCoNLL = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("removeStrictGoodCoNLLDuplicates")) { removeStrictGoodCoNLLDuplicates = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("priorModelFactory")) { priorModelFactory = val; } else if (key.equalsIgnoreCase("maxAdditionalKnownLCWords")) { maxAdditionalKnownLCWords = Integer.parseInt(val); } else if (key.equalsIgnoreCase("showNCCInfo")) { showNCCInfo = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("showCCInfo")) { showCCInfo = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("crfToExamine")) { crfToExamine = val; } else if (key.equalsIgnoreCase("ner.useSUTime")) { useSUTime = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("ner.applyNumericClassifiers")) { applyNumericClassifiers = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("ner.combinationMode")) { combinationMode = val; } else if (key.equalsIgnoreCase("ner.model")) { nerModel = val; } else if (key.equalsIgnoreCase("sutime.language")) { // ADD VALUE ABOVE HERE } else if (!key.isEmpty() && !key.equals("prop")) { log.info("Unknown property: |" + key + '|'); } } if (startFold > numFolds) { log.info("startFold > numFolds -> setting startFold to 1"); startFold = 1; } if (endFold > numFolds) { log.info("endFold > numFolds -> setting to numFolds"); endFold = numFolds; } if (combo) { splitDocuments = false; } stringRep = sb.toString(); } // end setProperties() // Thang Sep13: refactor to be used for multiple factories. private static String getFeatureFactory(String val) { if (val.equalsIgnoreCase("SuperSimpleFeatureFactory")) { val = "edu.stanford.nlp.sequences.SuperSimpleFeatureFactory"; } else if (val.equalsIgnoreCase("NERFeatureFactory")) { val = "edu.stanford.nlp.ie.NERFeatureFactory"; } else if (val.equalsIgnoreCase("GazNERFeatureFactory")) { val = "edu.stanford.nlp.sequences.GazNERFeatureFactory"; } else if (val.equalsIgnoreCase("IncludeAllFeatureFactory")) { val = "edu.stanford.nlp.sequences.IncludeAllFeatureFactory"; } else if (val.equalsIgnoreCase("PhraseFeatureFactory")) { val = "edu.stanford.nlp.article.extraction.PhraseFeatureFactory"; } else if (val.equalsIgnoreCase("EmbeddingFeatureFactory")) { val = "edu.stanford.nlp.ie.EmbeddingFeatureFactory"; } return val; } /** * Print the properties specified by this object. * * @return A String describing the properties specified by this object. */ @Override public String toString() { return stringRep; } /** * note that this does *not* return string representation of arrays, lists and * enums * * @throws IllegalAccessException * @throws IllegalArgumentException */ public String getNotNullTrueStringRep() { try { StringBuilder rep = new StringBuilder(); String joiner = "\n"; Field[] f = this.getClass().getFields(); for (Field ff : f) { String name = ff.getName(); Class<?> type = ff.getType(); if (type.equals(Boolean.class) || type.equals(boolean.class)) { boolean val = ff.getBoolean(this); if (val) { rep.append(joiner).append(name).append('=').append(val); } } else if (type.equals(String.class)) { String val = (String) ff.get(this); if (val != null) rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(Double.class)) { Double val = (Double) ff.get(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(double.class)) { double val = ff.getDouble(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(Integer.class)) { Integer val = (Integer) ff.get(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(int.class)) { int val = ff.getInt(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(Float.class)) { Float val = (Float) ff.get(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(float.class)) { float val = ff.getFloat(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(Byte.class)) { Byte val = (Byte) ff.get(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(byte.class)) { byte val = ff.getByte(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(char.class)) { char val = ff.getChar(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(Long.class)) { Long val = (Long) ff.get(this); rep.append(joiner).append(name).append('=').append(val); } else if (type.equals(long.class)) { long val = ff.getLong(this); rep.append(joiner).append(name).append('=').append(val); } } return rep.toString(); } catch (Exception e) { e.printStackTrace(); return ""; } } } // end class SeqClassifierFlags