de.fau.cs.osr.hddiff.perfsuite.EditScriptAnalysis.java Source code

Java tutorial

Introduction

Here is the source code for de.fau.cs.osr.hddiff.perfsuite.EditScriptAnalysis.java

Source

/**
 * Copyright 2011 The Open Source Research Group,
 *                University of Erlangen-Nrnberg
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package de.fau.cs.osr.hddiff.perfsuite;

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;

import com.sksamuel.diffpatch.DiffMatchPatch;
import com.sksamuel.diffpatch.DiffMatchPatch.Diff;

import de.fau.cs.osr.hddiff.editscript.EditOp;
import de.fau.cs.osr.hddiff.editscript.EditOpDelete;
import de.fau.cs.osr.hddiff.editscript.EditOpInsert;
import de.fau.cs.osr.hddiff.editscript.EditOpMove;
import de.fau.cs.osr.hddiff.editscript.EditOpUpdate;
import de.fau.cs.osr.hddiff.utils.ReportItem;

public class EditScriptAnalysis {
    private LinkedList<Edit> editScript = new LinkedList<>();

    private Map<GenericEditOp, List<Edit>> editsByOp = new HashMap<>();

    private Map<String, List<Edit>> editsByLabel = new HashMap<>();

    private int charsInserted;

    private int charsDeleted;

    private DiffMatchPatch dfp;

    // =========================================================================

    public EditScriptAnalysis() {
    }

    public EditScriptAnalysis(LinkedList<Diff> diff) {
        for (Diff d : diff) {
            switch (d.operation) {
            case DELETE:
                charsDeleted += d.text.length();
                break;
            case INSERT:
                charsInserted += d.text.length();
                break;
            case EQUAL:
            default:
                break;
            }
        }
    }

    public EditScriptAnalysis(List<EditOp> es) {
        dfp = new DiffMatchPatch();
        for (EditOp eo : es) {
            switch (eo.getType()) {
            case INSERT:
                addInsert((EditOpInsert) eo);
                break;
            case DELETE:
                addDelete((EditOpDelete) eo);
                break;
            case MOVE:
                addMove((EditOpMove) eo);
                break;
            case UPDATE:
                addUpdate((EditOpUpdate) eo);
                break;
            case SPLIT:
                // Splits were already applied during diffing.
                break;
            }
        }
    }

    // =========================================================================

    private void addUpdate(EditOpUpdate upd) {
        if (upd.getUpdatedNode().isTextLeaf()) {
            LinkedList<Diff> diff = dfp.diff_main(upd.getUpdatedNode().getTextContent(),
                    upd.getUpdatedNodeRight().getTextContent(), false);

            int charsDeleted = 0;
            int charsInserted = 0;
            for (Diff d : diff) {
                switch (d.operation) {
                case DELETE:
                    charsDeleted += d.text.length();
                    break;
                case INSERT:
                    charsInserted += d.text.length();
                    break;
                case EQUAL:
                default:
                    break;
                }
            }

            add(new Edit(GenericEditOp.UPD, charsInserted, charsDeleted));
        } else {
            add(new Edit(GenericEditOp.UPD, upd.getUpdatedNode().getLabel(), null));
        }
    }

    private void addMove(EditOpMove mov) {
        add(new Edit(GenericEditOp.MOV, mov.getMovedNode().getLabel(), null));
    }

    private void addDelete(EditOpDelete del) {
        add(new Edit(GenericEditOp.DEL, del.getDeletedNode().getLabel(), null));

        if (del.getDeletedNode().isTextLeaf()) {
            add(new Edit(GenericEditOp.DEL, null, del.getDeletedNode().getTextContent()));
        }
    }

    private void addInsert(EditOpInsert ins) {
        add(new Edit(GenericEditOp.INS, ins.getInsertedNode().getLabel(), null));

        if (ins.getInsertedNode().isTextLeaf()) {
            add(new Edit(GenericEditOp.INS, null, ins.getInsertedNode().getTextContent()));
        }
    }

    public void add(Edit edit) {
        editScript.add(edit);
        addMultiMap(editsByOp, edit.op, edit);
        addMultiMap(editsByLabel, edit.label, edit);
        switch (edit.op) {
        case DEL:
            if (edit.text != null)
                charsDeleted += edit.text.length();
            break;

        case INS:
            if (edit.text != null)
                charsInserted += edit.text.length();
            break;

        case UPD:
            charsInserted += edit.ins;
            charsDeleted += edit.del;
            break;

        default:
            break;
        }
    }

    public List<Edit> getEditScript() {
        return Collections.unmodifiableList(editScript);
    }

    // =========================================================================

    @Override
    public String toString() {
        return toShortString() + printNodeStatsByOp(GenericEditOp.INS) + printNodeStatsByOp(GenericEditOp.DEL)
                + printNodeStatsByOp(GenericEditOp.MOV) + printNodeStatsByOp(GenericEditOp.UPD);
    }

    public String toShortString() {
        return String.format(
                "" + "GenericEditScriptAnalysis:\n" + "  Edit script length: %d\n" + "    Insertions: %4d\n"
                        + "    Deletions:  %4d\n" + "    Moves:      %4d\n" + "    Updates:    %4d\n"
                        + "    Chars Ins:  %4d\n" + "    Chars Del:  %4d\n",
                editScript.size(), getMultiMapSize(editsByOp, GenericEditOp.INS),
                getMultiMapSize(editsByOp, GenericEditOp.DEL), getMultiMapSize(editsByOp, GenericEditOp.MOV),
                getMultiMapSize(editsByOp, GenericEditOp.UPD), charsInserted, charsDeleted);
    }

    public void report(ReportItem item, String cat) {
        item.recordFigure(cat + "a) Edit script size", editScript.size(), "#ops");
        item.recordFigure(cat + "b) Inserts", getMultiMapSize(editsByOp, GenericEditOp.INS), "#ops");
        item.recordFigure(cat + "c) Deletes", getMultiMapSize(editsByOp, GenericEditOp.DEL), "#ops");
        item.recordFigure(cat + "d) Moves", getMultiMapSize(editsByOp, GenericEditOp.MOV), "#ops");
        item.recordFigure(cat + "e) Updates", getMultiMapSize(editsByOp, GenericEditOp.UPD), "#ops");
        item.recordFigure(cat + "g) Chars ins", charsInserted, "#ops");
        item.recordFigure(cat + "h) Chars del", charsDeleted, "#ops");
    }

    private Object printNodeStatsByOp(GenericEditOp needle) {
        HashSet<String> tmp = new HashSet<>(editsByLabel.keySet());
        tmp.remove(null);
        LinkedList<String> keys = new LinkedList<>(tmp);
        Collections.sort(keys);
        keys.add(null);
        StringBuilder b = new StringBuilder();
        b.append(String.format("    %s by label:\n", needle));
        boolean empty = true;
        for (String key : keys) {
            int count = 0;
            List<Edit> ops = editsByLabel.get(key);
            if (ops != null)
                for (Edit op : ops) {
                    if (op.op == needle)
                        ++count;
                }
            if (count > 0) {
                empty = false;
                if (key == null)
                    key = "#text";
                String paddedKey = StringUtils.rightPad(StringUtils.abbreviate(key, 16) + ":", 17);
                b.append(String.format("      %s %4d\n", paddedKey, count));
            }
        }
        return empty ? "" : b.toString();
    }

    // =========================================================================

    @SuppressWarnings({ "rawtypes", "unchecked" })
    private void addMultiMap(Map map, Object key, Object value) {
        List x = (List) map.get(key);
        if (x == null) {
            x = new LinkedList();
            map.put(key, x);
        }
        x.add(value);
    }

    @SuppressWarnings("rawtypes")
    private int getMultiMapSize(Map editsByOp, Object key) {
        List list = (List) editsByOp.get(key);
        if (list == null)
            return 0;
        return list.size();
    }

    // =========================================================================

    public static enum GenericEditOp {
        DEL, INS, MOV, UPD, NONE
    }

    // =========================================================================

    public static final class Edit {
        public final GenericEditOp op;

        public final String label;

        public final String text;

        public final int ins;

        public final int del;

        // ---------------------------------------------------------------------

        public Edit(GenericEditOp op, String label, String text) {
            this.op = op;
            this.label = label;
            this.text = text;
            this.ins = 0;
            this.del = 0;
        }

        public Edit(GenericEditOp op, int ins, int del) {
            this.op = op;
            this.label = null;
            this.text = null;
            this.ins = ins;
            this.del = del;
        }

        @Override
        public String toString() {
            return "Edit [op=" + op + ", label=" + label + ", text=" + text + ", ins=" + ins + ", del=" + del + "]";
        }
    }
}