Java tutorial
/* * Copyright 2009-2011 Collaborative Research Centre SFB 632 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis; import annis.dao.AnnotatedMatch; import annis.dao.AnnotatedSpan; import annis.model.Annotation; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.apache.commons.lang3.StringUtils; /** * * @author thomas */ public class WekaHelper { public static String exportAsArff(List<AnnotatedMatch> annotatedMatches) { StringBuilder sb = new StringBuilder(); // header: relation name (unused) sb.append("@relation name\n"); sb.append("\n"); // figure out what annotations are used at each match position SortedMap<Integer, SortedSet<String>> columnsByNodePos = new TreeMap<Integer, SortedSet<String>>(); for (int i = 0; i < annotatedMatches.size(); ++i) { AnnotatedMatch match = annotatedMatches.get(i); for (int j = 0; j < match.size(); ++j) { AnnotatedSpan span = match.get(j); if (columnsByNodePos.get(j) == null) { columnsByNodePos.put(j, new TreeSet<String>()); } for (Annotation annotation : span.getAnnotations()) { columnsByNodePos.get(j).add("anno_" + annotation.getQualifiedName()); } for (Annotation meta : span.getMetadata()) { columnsByNodePos.get(j).add("meta_" + meta.getQualifiedName()); } } } // print column names and data types int count = columnsByNodePos.keySet().size(); for (int j = 0; j < count; ++j) { sb.append("@attribute ").append(fullColumnName(j + 1, "id")).append(" string\n"); sb.append("@attribute ").append(fullColumnName(j + 1, "span")).append(" string\n"); SortedSet<String> annotationNames = columnsByNodePos.get(j); for (String name : annotationNames) { sb.append("@attribute ").append(fullColumnName(j + 1, name)).append(" string\n"); } } sb.append("\n@data\n\n"); // print values for (AnnotatedMatch match : annotatedMatches) { List<String> line = new ArrayList<String>(); int k = 0; for (; k < match.size(); ++k) { AnnotatedSpan span = match.get(k); Map<String, String> valueByName = new HashMap<String, String>(); if (span != null) { if (span.getAnnotations() != null) { for (Annotation annotation : span.getAnnotations()) { valueByName.put("anno_" + annotation.getQualifiedName(), annotation.getValue()); } } if (span.getMetadata() != null) { for (Annotation meta : span.getMetadata()) { valueByName.put("meta_" + meta.getQualifiedName(), meta.getValue()); } } line.add("'" + span.getId() + "'"); line.add("'" + span.getCoveredText().replace("'", "\\'") + "'"); } for (String name : columnsByNodePos.get(k)) { if (valueByName.containsKey(name)) { line.add("'" + valueByName.get(name).replace("'", "\\'") + "'"); } else { line.add("'NULL'"); } } } for (int l = k; l < count; ++l) { line.add("'NULL'"); for (int m = 0; m <= columnsByNodePos.get(l).size(); ++m) { line.add("'NULL'"); } } sb.append(StringUtils.join(line, ",")); sb.append("\n"); } return sb.toString(); } private static String fullColumnName(int i, String name) { return "#" + i + "_" + name; } }