Java tutorial
/** * This is JUFIT, the Jena UMLS Filter Copyright (C) 2015 JULIE LAB Authors: * Johannes Hellrich and Sven Buechel * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package de.julielab.umlsfilter.rules; import java.util.ArrayList; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.ArrayUtils; import com.google.common.base.Joiner; /** * If term contains syntactic Inversion, reestablish normal word order and * return old and new version of term. * * @author hellrich * */ public class RewriteSyntacticInversion extends Rule { private static final Joiner SPACE_JOINER = Joiner.on(" "); private static final Joiner DASH_JOINER = Joiner.on("-"); private static final String RULENAME = "SYN"; private final Matcher containsDash = Pattern.compile("\\P{Z}-\\p{Z}").matcher(""); private final Matcher upperThenLowerFirst = Pattern .compile("^(.* )*\\p{Lu}\\p{javaLowerCase}\\p{javaLowerCase}+-$").matcher(""); private final Matcher upperThenLowerSecond = Pattern.compile("^(.* )*\\p{Lu}\\p{Ll}\\p{Ll}\\p{Ll}+$") .matcher(""); private final Matcher lowerDashLower = Pattern.compile("^\\p{Ll}+-\\p{Ll}+-$").matcher(""); private final Matcher doubleDash = Pattern.compile("-.*-").matcher(""); private final boolean compound; private final boolean destructive; public RewriteSyntacticInversion(final boolean compound, final boolean destructive) { super(RULENAME); this.compound = compound; this.destructive = destructive; } public RewriteSyntacticInversion(final Map<String, String[]> parameters) { super(RULENAME); if (!parameters.containsKey(PARAMETER_COMPOUND) || (parameters.get(PARAMETER_COMPOUND).length != 1) || !parameters.containsKey(PARAMETER_DESTRUCTIVE) || (parameters.get(PARAMETER_DESTRUCTIVE).length != 1)) throw new IllegalArgumentException(); compound = Boolean.parseBoolean(parameters.get(PARAMETER_COMPOUND)[0]); destructive = Boolean.parseBoolean(parameters.get(PARAMETER_DESTRUCTIVE)[0]); } @Override public ArrayList<TermWithSource> applyOnOneTerm(final TermWithSource tws) { ArrayList<TermWithSource> out = null; final String s1 = tws.getTerm(); if (s1.contains(", ") && !s1.substring(s1.indexOf(", ") + 2).contains(", ") && !s1.contains("-, ")) { final String[] strings = s1.split(", "); ArrayUtils.reverse(strings); String s2 = SPACE_JOINER.join(strings).trim(); if (containsDash.reset(s2).find()) if (!compound) s2 = s2.replaceAll("- +", "-"); else if (!doubleDash.reset(strings[0]).find() && upperThenLowerFirst.reset(strings[0]).matches() && upperThenLowerSecond.reset(strings[1]).matches()) s2 = strings[0].substring(0, strings[0].length() - 1) + strings[1].toLowerCase(); else if (!tws.getIsChem() && lowerDashLower.reset(strings[0]).matches() && upperThenLowerSecond.reset(strings[1]).matches()) { final String[] splits2 = strings[0].substring(0, strings[0].length() - 1).split("-"); for (int i = 0; i < splits2.length; ++i) splits2[i] = Character.toUpperCase(splits2[i].charAt(0)) + splits2[i].substring(1, splits2[i].length()); s2 = DASH_JOINER.join(splits2) + "-" + strings[1]; } else s2 = s2.replaceAll("- +", "-"); if (!s1.equals(s2) && !s2.equals("")) { out = new ArrayList<>(); out.add(new TermWithSource(s2, tws.getLanguage(), tws.getIsChem(), tws.getMdifiedByRulesList(), ruleName)); } } if ((out != null) && destructive) tws.supress(); return out; } }