Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.parser; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.Map; import java.util.Objects; import opennlp.tools.chunker.ChunkerModel; import opennlp.tools.ml.model.AbstractModel; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.postag.POSModel; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.model.ArtifactSerializer; import opennlp.tools.util.model.BaseModel; import opennlp.tools.util.model.ChunkerModelSerializer; import opennlp.tools.util.model.POSModelSerializer; /** * This is an abstract base class for {@link ParserModel} implementations. */ // TODO: Model should validate the artifact map public class ParserModel extends BaseModel { private static class HeadRulesSerializer implements ArtifactSerializer<opennlp.tools.parser.lang.en.HeadRules> { public opennlp.tools.parser.lang.en.HeadRules create(InputStream in) throws IOException, InvalidFormatException { return new opennlp.tools.parser.lang.en.HeadRules( new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))); } public void serialize(opennlp.tools.parser.lang.en.HeadRules artifact, OutputStream out) throws IOException { artifact.serialize(new OutputStreamWriter(out, StandardCharsets.UTF_8)); } } private static final String COMPONENT_NAME = "Parser"; private static final String BUILD_MODEL_ENTRY_NAME = "build.model"; private static final String CHECK_MODEL_ENTRY_NAME = "check.model"; private static final String ATTACH_MODEL_ENTRY_NAME = "attach.model"; private static final String PARSER_TAGGER_MODEL_ENTRY_NAME = "parsertager.postagger"; private static final String CHUNKER_TAGGER_MODEL_ENTRY_NAME = "parserchunker.chunker"; private static final String HEAD_RULES_MODEL_ENTRY_NAME = "head-rules.headrules"; private static final String PARSER_TYPE = "parser-type"; public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel, MaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, opennlp.tools.parser.HeadRules headRules, ParserType modelType, Map<String, String> manifestInfoEntries) { super(COMPONENT_NAME, languageCode, manifestInfoEntries); setManifestProperty(PARSER_TYPE, modelType.name()); artifactMap.put(BUILD_MODEL_ENTRY_NAME, buildModel); artifactMap.put(CHECK_MODEL_ENTRY_NAME, checkModel); if (ParserType.CHUNKING.equals(modelType)) { if (attachModel != null) throw new IllegalArgumentException("attachModel must be null for chunking parser!"); } else if (ParserType.TREEINSERT.equals(modelType)) { Objects.requireNonNull(attachModel, "attachModel must not be null"); artifactMap.put(ATTACH_MODEL_ENTRY_NAME, attachModel); } else { throw new IllegalStateException("Unknown ParserType '" + modelType + "'!"); } artifactMap.put(PARSER_TAGGER_MODEL_ENTRY_NAME, parserTagger); artifactMap.put(CHUNKER_TAGGER_MODEL_ENTRY_NAME, chunkerTagger); artifactMap.put(HEAD_RULES_MODEL_ENTRY_NAME, headRules); checkArtifactMap(); } public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel, MaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, opennlp.tools.parser.HeadRules headRules, ParserType modelType) { this(languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null); } public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, opennlp.tools.parser.HeadRules headRules, ParserType type, Map<String, String> manifestInfoEntries) { this(languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, type, manifestInfoEntries); } public ParserModel(InputStream in) throws IOException { super(COMPONENT_NAME, in); } public ParserModel(File modelFile) throws IOException { super(COMPONENT_NAME, modelFile); } public ParserModel(Path modelPath) throws IOException { this(modelPath.toFile()); } public ParserModel(URL modelURL) throws IOException { super(COMPONENT_NAME, modelURL); } @Override protected void createArtifactSerializers(Map<String, ArtifactSerializer> serializers) { super.createArtifactSerializers(serializers); // In 1.6.x the headrules artifact is serialized with the new API // which uses the Serializeable interface // This change is not backward compatible with the 1.5.x models. // In order to laod 1.5.x model the English headrules serializer must be // put on the serializer map. if (getVersion().getMajor() == 1 && getVersion().getMinor() == 5) { serializers.put("headrules", new HeadRulesSerializer()); } serializers.put("postagger", new POSModelSerializer()); serializers.put("chunker", new ChunkerModelSerializer()); } public ParserType getParserType() { return ParserType.parse(getManifestProperty(PARSER_TYPE)); } public MaxentModel getBuildModel() { return (MaxentModel) artifactMap.get(BUILD_MODEL_ENTRY_NAME); } public MaxentModel getCheckModel() { return (MaxentModel) artifactMap.get(CHECK_MODEL_ENTRY_NAME); } public MaxentModel getAttachModel() { return (MaxentModel) artifactMap.get(ATTACH_MODEL_ENTRY_NAME); } public POSModel getParserTaggerModel() { return (POSModel) artifactMap.get(PARSER_TAGGER_MODEL_ENTRY_NAME); } public ChunkerModel getParserChunkerModel() { return (ChunkerModel) artifactMap.get(CHUNKER_TAGGER_MODEL_ENTRY_NAME); } public opennlp.tools.parser.HeadRules getHeadRules() { return (opennlp.tools.parser.HeadRules) artifactMap.get(HEAD_RULES_MODEL_ENTRY_NAME); } // TODO: Update model methods should make sure properties are copied correctly ... public ParserModel updateBuildModel(MaxentModel buildModel) { return new ParserModel(getLanguage(), buildModel, getCheckModel(), getAttachModel(), getParserTaggerModel(), getParserChunkerModel(), getHeadRules(), getParserType()); } public ParserModel updateCheckModel(MaxentModel checkModel) { return new ParserModel(getLanguage(), getBuildModel(), checkModel, getAttachModel(), getParserTaggerModel(), getParserChunkerModel(), getHeadRules(), getParserType()); } public ParserModel updateTaggerModel(POSModel taggerModel) { return new ParserModel(getLanguage(), getBuildModel(), getCheckModel(), getAttachModel(), taggerModel, getParserChunkerModel(), getHeadRules(), getParserType()); } public ParserModel updateChunkerModel(ChunkerModel chunkModel) { return new ParserModel(getLanguage(), getBuildModel(), getCheckModel(), getAttachModel(), getParserTaggerModel(), chunkModel, getHeadRules(), getParserType()); } @Override protected void validateArtifactMap() throws InvalidFormatException { super.validateArtifactMap(); if (!(artifactMap.get(BUILD_MODEL_ENTRY_NAME) instanceof AbstractModel)) { throw new InvalidFormatException("Missing the build model!"); } ParserType modelType = getParserType(); if (modelType != null) { if (ParserType.CHUNKING.equals(modelType)) { if (artifactMap.get(ATTACH_MODEL_ENTRY_NAME) != null) throw new InvalidFormatException("attachModel must be null for chunking parser!"); } else if (ParserType.TREEINSERT.equals(modelType)) { if (!(artifactMap.get(ATTACH_MODEL_ENTRY_NAME) instanceof AbstractModel)) throw new InvalidFormatException("attachModel must not be null!"); } else { throw new InvalidFormatException("Unknown ParserType '" + modelType + "'!"); } } else { throw new InvalidFormatException("Missing the parser type property!"); } if (!(artifactMap.get(CHECK_MODEL_ENTRY_NAME) instanceof AbstractModel)) { throw new InvalidFormatException("Missing the check model!"); } if (!(artifactMap.get(PARSER_TAGGER_MODEL_ENTRY_NAME) instanceof POSModel)) { throw new InvalidFormatException("Missing the tagger model!"); } if (!(artifactMap.get(CHUNKER_TAGGER_MODEL_ENTRY_NAME) instanceof ChunkerModel)) { throw new InvalidFormatException("Missing the chunker model!"); } if (!(artifactMap.get(HEAD_RULES_MODEL_ENTRY_NAME) instanceof HeadRules)) { throw new InvalidFormatException("Missing the head rules!"); } } }