AdocDiffMarker.java
package network.ike.docs.plugin.diff;
import com.github.difflib.DiffUtils;
import com.github.difflib.patch.AbstractDelta;
import com.github.difflib.patch.Patch;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Marks the differences between two versions of an AsciiDoc fragment as
* inline {@code [.diff-ins]#…#} / {@code [.diff-del]#…#} roles, producing
* a renderable "marked copy" for a doc-diff review packet
* (ike-issues#648, #649).
*
* <p>The marker is deliberately conservative. Constructs that inline role
* markup would corrupt are <em>protected</em> — never marked in place —
* and their changes are reported in a per-topic change-summary
* {@code [NOTE]} block instead:
*
* <ul>
* <li>the header region before the {@code [[anchor]]} line
* (comment header and attribute block);</li>
* <li>attribute-value continuation lines (trailing backslash);</li>
* <li>verbatim, listing, passthrough, and table interiors;</li>
* <li>block-attribute, include, indexterm, anchor, and table-cell
* lines, and block delimiters;</li>
* <li>lines carrying inline macros that word-splicing could break
* ({@code xref:}, {@code k:}, {@code ((…))}, {@code footnote:},
* {@code image:}, …);</li>
* <li>lines containing the role delimiter character {@code #}.</li>
* </ul>
*
* <p>Markable lines get a line-level diff with word-level refinement
* inside changed line pairs. Because IKE fragments use semantic line
* breaks, line-level deltas are already clause-grade. Structural
* prefixes (heading and list markers) are kept outside the role so an
* inserted heading or list item stays syntactically valid.
*
* <p>All methods are pure functions of their inputs — no I/O — so the
* marking rules are unit-testable in isolation.
*/
public final class AdocDiffMarker {
/** Lines never marked inline; changes to them are reported as notes. */
private static final Pattern PROTECTED = Pattern.compile(
"^(:[A-Za-z0-9_-]+:|//|\\[\\[|\\[|include::|indexterm:|\\||<<<|\\+$|''')");
/** Block delimiter lines (also protected). */
private static final Pattern DELIMITERS = Pattern.compile(
"^(----|\\.\\.\\.\\.|====|\\*\\*\\*\\*|\\+\\+\\+\\+|____|\\|===|--)\\s*$");
/** Inline constructs that make word-splicing risky → whole-line fallback. */
private static final Pattern RISKY_INLINE = Pattern.compile(
"(xref:|image:|pass:|k:[A-Za-z]|\\(\\(|footnote:|btn:|kbd:)");
/** Structural prefix kept outside the role (headings, list markers). */
private static final Pattern STRUCTURAL_PREFIX = Pattern.compile(
"^((?:=+\\s)|(?:\\*+\\s)|(?:\\.+\\s)|(?:-\\s)|(?:\\d+\\.\\s))");
/** Delimiters that open a verbatim region (interior never marked). */
private static final Pattern VERBATIM_OPEN = Pattern.compile(
"^(----|\\.\\.\\.\\.|\\+\\+\\+\\+|\\|===)\\s*$");
private AdocDiffMarker() {
}
/**
* The result of marking one fragment: the marked lines, word-level
* insertion/deletion counts, and the notes describing changes to
* protected constructs.
*
* @param lines the marked fragment, line by line, including the
* injected change-summary note block
* @param insWords approximate count of inserted words
* @param delWords approximate count of deleted words
* @param notes human-readable notes for changes the marker could
* not show inline
*/
public record MarkResult(List<String> lines, int insWords, int delWords, List<String> notes) {
}
/**
* Source of STAMP endnote refs for marked regions (ike-issues#656).
* When provided, each change boundary's last marked line gains a
* reusable footnote ref — collapsed to one per paragraph when every
* boundary in the paragraph shares the same stamp.
*/
public interface StampSource {
/**
* The footnote macro for an Active region (insertion or
* replacement) whose last new-side line has the given index.
*
* @param newLineIndex the region's last to-side line index
* @return the {@code footnote:id[text]} macro, or {@code null}
* to skip stamping this region
*/
String activeRef(int newLineIndex);
/**
* The footnote macro for an Inactive region (unpaired deletion).
*
* @return the {@code footnote:id[text]} macro, or {@code null}
*/
String inactiveRef();
}
/**
* Produce a marked copy of a changed fragment, without stamps.
*
* @param oldLines the fragment's lines on the from side
* @param newLines the fragment's lines on the to side
* @return the marked fragment with a change-summary note injected
* after the level-1 title
*/
public static MarkResult mark(List<String> oldLines, List<String> newLines) {
return mark(oldLines, newLines, null);
}
/**
* Produce a marked copy of a changed fragment, with optional STAMP
* endnote refs per change boundary.
*
* @param oldLines the fragment's lines on the from side
* @param newLines the fragment's lines on the to side
* @param stamps stamp-ref source, or {@code null} for no stamps
* @return the marked fragment with a change-summary note injected
* after the level-1 title
*/
public static MarkResult mark(List<String> oldLines, List<String> newLines,
StampSource stamps) {
Patch<String> patch = DiffUtils.diff(oldLines, newLines);
List<String> out = new ArrayList<>();
List<String> notes = new ArrayList<>();
State state = new State();
int[] words = new int[2]; // ins, del
List<int[]> pendingRefs = new ArrayList<>(); // [outIdx]
List<String> pendingRefText = new ArrayList<>();
int newPos = 0;
for (AbstractDelta<String> delta : patch.getDeltas()) {
// Copy the equal region preceding this delta from the new side.
while (newPos < delta.getTarget().getPosition()) {
String l = newLines.get(newPos);
if (l.isBlank()) {
flushParagraphRefs(out, pendingRefs, pendingRefText);
}
emit(out, state, l);
newPos++;
}
List<String> oldRun = delta.getSource().getLines();
List<String> newRun = delta.getTarget().getLines();
int outStart = out.size();
boolean markedAny = applyDelta(oldRun, newRun, out, notes, state, words);
if (stamps != null && markedAny && out.size() > outStart) {
// Stamp the last substantive line: walk back past trailing
// blanks (and the EOF phantom) on both the to side — for
// blame lookup — and the output — for ref attachment.
int lastNew = delta.getTarget().getPosition() + newRun.size() - 1;
while (lastNew > delta.getTarget().getPosition()
&& newLines.get(lastNew).isBlank()) {
lastNew--;
}
String ref = newRun.isEmpty()
? stamps.inactiveRef()
: stamps.activeRef(lastNew);
if (ref != null) {
int attach = out.size() - 1;
while (attach > outStart && out.get(attach).isBlank()) {
attach--;
}
if (!out.get(attach).isBlank()) {
pendingRefs.add(new int[] {attach});
pendingRefText.add(ref);
}
}
}
newPos += newRun.size();
}
while (newPos < newLines.size()) {
String l = newLines.get(newPos);
if (l.isBlank()) {
flushParagraphRefs(out, pendingRefs, pendingRefText);
}
emit(out, state, l);
newPos++;
}
flushParagraphRefs(out, pendingRefs, pendingRefText);
List<String> withNote = injectSummary(out, words[0], words[1], notes);
return new MarkResult(withNote, words[0], words[1], notes);
}
/**
* Attach the paragraph's pending stamp refs: one ref at the last
* boundary when all boundaries share a stamp, otherwise each
* boundary keeps its own.
*
* @param out the marked stream
* @param pendingRefs boundary end positions in the stream
* @param pendingRefText the corresponding footnote macros
*/
private static void flushParagraphRefs(List<String> out, List<int[]> pendingRefs,
List<String> pendingRefText) {
if (pendingRefs.isEmpty()) {
return;
}
boolean uniform = pendingRefText.stream().distinct().count() == 1;
for (int i = 0; i < pendingRefs.size(); i++) {
if (uniform && i < pendingRefs.size() - 1) {
continue;
}
int idx = pendingRefs.get(i)[0];
out.set(idx, out.get(idx) + pendingRefText.get(i));
}
pendingRefs.clear();
pendingRefText.clear();
}
/** Block-attribute line opening a diagram block. */
private static final Pattern DIAGRAM_ATTR = Pattern.compile(
"^\\[(plantuml|graphviz|mermaid)\\b.*]\\s*$");
/**
* One diagram block found in a fragment.
*
* @param kind the diagram kind (plantuml, graphviz, mermaid)
* @param body the lines between the delimiters
* @param closeLine the index of the closing delimiter line
*/
private record DiagramBlock(String kind, List<String> body, int closeLine) {
}
/**
* Append, after every diagram block whose body meaningfully changed
* between the two sides, the compact source delta followed by the
* rendered previous version — so the reader sees the new drawing,
* what changed in the spec, and the old drawing, in that order.
* Blocks are paired per diagram kind in occurrence order; bodies
* are compared whitespace-insensitively, so formatting-only edits
* never produce a pair, and diagrams only present on one side are
* left alone (the change-summary notes already report removed
* lines).
*
* @param oldLines the fragment's from-side lines
* @param markedLines the marked fragment (its verbatim regions carry
* the to-side diagram blocks unmodified)
* @return the marked fragment with delta-plus-previous blocks
* inserted
*/
public static List<String> withDiagramHistory(List<String> oldLines,
List<String> markedLines) {
List<DiagramBlock> oldBlocks = diagramBlocks(oldLines);
List<DiagramBlock> newBlocks = diagramBlocks(markedLines);
List<String> out = new ArrayList<>(markedLines);
// Bottom-up so earlier insertions don't shift later indices.
for (int i = newBlocks.size() - 1; i >= 0; i--) {
DiagramBlock now = newBlocks.get(i);
DiagramBlock before = nthOfKind(oldBlocks, now.kind(), kindIndex(newBlocks, i));
if (before == null
|| normalized(before.body()).equals(normalized(now.body()))) {
continue;
}
List<String> insert = new ArrayList<>();
insert.add("");
insert.add("[.diff-meta]##This diagram changed — source delta, then the previous rendering:##");
insert.add("");
insert.add("[source,diff]");
insert.add("-----");
insert.addAll(bodyDelta(before.body(), now.body()));
insert.add("-----");
insert.add("");
insert.add("[" + now.kind() + "]");
insert.add("----");
insert.addAll(before.body());
insert.add("----");
out.addAll(now.closeLine() + 1, insert);
}
return out;
}
private static List<String> normalized(List<String> body) {
return body.stream().map(String::strip).filter(s -> !s.isEmpty()).toList();
}
private static List<String> bodyDelta(List<String> oldBody, List<String> newBody) {
List<String> out = new ArrayList<>();
for (AbstractDelta<String> d : DiffUtils.diff(oldBody, newBody).getDeltas()) {
d.getSource().getLines().forEach(l -> out.add("- " + l));
d.getTarget().getLines().forEach(l -> out.add("+ " + l));
}
return out;
}
private static List<DiagramBlock> diagramBlocks(List<String> lines) {
List<DiagramBlock> out = new ArrayList<>();
for (int i = 0; i < lines.size(); i++) {
Matcher m = DIAGRAM_ATTR.matcher(lines.get(i).strip());
if (!m.matches()) {
continue;
}
int open = i + 1;
while (open < lines.size() && lines.get(open).isBlank()) {
open++;
}
if (open >= lines.size() || !lines.get(open).strip().equals("----")) {
continue;
}
int close = open + 1;
while (close < lines.size() && !lines.get(close).strip().equals("----")) {
close++;
}
if (close >= lines.size()) {
continue;
}
out.add(new DiagramBlock(m.group(1),
List.copyOf(lines.subList(open + 1, close)), close));
i = close;
}
return out;
}
private static int kindIndex(List<DiagramBlock> blocks, int position) {
String kind = blocks.get(position).kind();
int n = 0;
for (int i = 0; i < position; i++) {
if (blocks.get(i).kind().equals(kind)) {
n++;
}
}
return n;
}
private static DiagramBlock nthOfKind(List<DiagramBlock> blocks, String kind, int n) {
int seen = 0;
for (DiagramBlock b : blocks) {
if (b.kind().equals(kind)) {
if (seen == n) {
return b;
}
seen++;
}
}
return null;
}
/**
* Produce a full-text copy of an added fragment with a banner note
* instead of inline marks.
*
* @param newLines the fragment's lines on the to side
* @param fromRef the from-side ref name, used in the banner text
* @return the banner-annotated fragment
*/
public static MarkResult markAdded(List<String> newLines, String fromRef) {
return markAdded(newLines, fromRef, null);
}
/**
* Produce a full-text copy of an added fragment with a banner note,
* optionally stamped — an added topic is one Active stamp, carried
* on the banner rather than in the flow.
*
* @param newLines the fragment's lines on the to side
* @param fromRef the from-side ref name, used in the banner text
* @param stampRef the stamp footnote macro, or {@code null}
* @return the banner-annotated fragment
*/
public static MarkResult markAdded(List<String> newLines, String fromRef, String stampRef) {
List<String> out = new ArrayList<>();
boolean placed = false;
for (String l : newLines) {
out.add(l);
if (!placed && l.startsWith("= ")) {
out.add("");
out.add("[NOTE]");
out.add("====");
out.add("[.diff-meta]#New topic — did not exist at " + fromRef
+ "; full text follows.#" + (stampRef == null ? "" : stampRef));
out.add("====");
placed = true;
}
}
int ins = newLines.stream().mapToInt(l -> l.split("\\s+").length).sum();
return new MarkResult(out, ins, 0, List.of());
}
private static boolean applyDelta(List<String> oldRun, List<String> newRun,
List<String> out, List<String> notes,
State state, int[] words) {
boolean markedAny = false;
int max = Math.max(oldRun.size(), newRun.size());
for (int k = 0; k < max; k++) {
String o = k < oldRun.size() ? oldRun.get(k) : null;
String n = k < newRun.size() ? newRun.get(k) : null;
if (o != null && n != null) {
boolean ok = inlineOk(o, state);
boolean nk = inlineOk(n, state);
String merged = (ok && nk) ? wordMerge(o, n) : null;
if (merged != null) {
emit(out, state, merged);
markedAny = true;
} else if (ok && nk) {
out.add(wrapPrefixed("diff-del", o));
emit(out, state, wrapPrefixed("diff-ins", n));
markedAny = true;
} else {
emit(out, state, n);
notes.add("changed (shown as new): `" + clip(o, 60)
+ "` -> `" + clip(n, 60) + "`");
}
words[0] += wordCount(n);
words[1] += wordCount(o);
} else if (n != null) {
if (inlineOk(n, state)) {
emit(out, state, wrapPrefixed("diff-ins", n));
markedAny = true;
} else {
emit(out, state, n);
if (!n.isBlank()) {
notes.add("added (shown unmarked): `" + clip(n, 90) + "`");
}
}
words[0] += wordCount(n);
} else if (o != null) {
if (inlineOk(o, state)) {
out.add(wrapPrefixed("diff-del", o));
markedAny = true;
} else if (!o.isBlank()) {
notes.add("removed: `" + clip(o, 90) + "`");
}
words[1] += wordCount(o);
}
}
return markedAny;
}
/**
* Minimum share of a line pair's characters that must be unchanged
* for word-level merging; below this the pair reads better as a
* whole struck line followed by a whole inserted line (rewrites
* produce unreadable interleave when word-merged).
*/
private static final double MERGE_SIMILARITY_FLOOR = 0.4;
/**
* Merge one old/new line pair with word-level ins/del roles, or
* return {@code null} when the pair's structural prefixes differ or
* the pair is too dissimilar (a rewrite), in which case the caller
* falls back to whole-line marking.
*
* @param oldLine the from-side line
* @param newLine the to-side line
* @return the merged line, or {@code null} when no readable merge
* exists
*/
static String wordMerge(String oldLine, String newLine) {
String[] po = splitPrefix(oldLine);
String[] pn = splitPrefix(newLine);
if (!po[0].equals(pn[0])) {
return null;
}
List<String> ot = tokenize(po[1]);
List<String> nt = tokenize(pn[1]);
Patch<String> patch = DiffUtils.diff(ot, nt);
int changed = 0;
for (AbstractDelta<String> d : patch.getDeltas()) {
changed += d.getSource().getLines().stream().mapToInt(String::length).sum()
+ d.getTarget().getLines().stream().mapToInt(String::length).sum();
}
int total = po[1].length() + pn[1].length();
if (total > 0 && (double) (total - changed) / total < MERGE_SIMILARITY_FLOOR) {
return null;
}
StringBuilder sb = new StringBuilder(pn[0]);
int pos = 0;
for (AbstractDelta<String> d : patch.getDeltas()) {
while (pos < d.getTarget().getPosition()) {
sb.append(nt.get(pos));
pos++;
}
String o = String.join("", d.getSource().getLines());
String n = String.join("", d.getTarget().getLines());
if (!o.isBlank()) {
sb.append(wrap("diff-del", o.strip())).append(' ');
} else {
sb.append(o);
}
if (!n.isBlank()) {
sb.append(wrap("diff-ins", n.strip()));
if (n.endsWith(" ")) {
sb.append(' ');
}
} else if (!n.isEmpty() && o.isEmpty()) {
sb.append(n);
}
pos += d.getTarget().getLines().size();
}
while (pos < nt.size()) {
sb.append(nt.get(pos));
pos++;
}
return sb.toString().stripTrailing();
}
/**
* Decide whether a line may carry inline role markup given the
* current stream state.
*
* @param line the candidate line
* @param state the marker's stream state (header, continuation,
* verbatim tracking)
* @return whether inline marking is safe for this line
*/
static boolean inlineOk(String line, State state) {
if (state.inHeader || state.continuation || state.inVerbatim()) {
return false;
}
String s = line.stripTrailing();
if (s.isBlank() || PROTECTED.matcher(s).find() || DELIMITERS.matcher(s).matches()) {
return false;
}
if (s.endsWith("\\") || s.indexOf('#') >= 0) {
return false;
}
return !RISKY_INLINE.matcher(s).find();
}
private static void emit(List<String> out, State state, String line) {
state.feed(line);
out.add(line);
}
private static List<String> injectSummary(List<String> lines, int ins, int del,
List<String> notes) {
List<String> out = new ArrayList<>(lines.size() + 8);
boolean placed = false;
for (String l : lines) {
out.add(l);
if (!placed && l.startsWith("= ")) {
out.add("");
out.add("[NOTE]");
out.add("====");
out.add("[.diff-meta]#Change summary: ~" + ins + " words added, ~"
+ del + " words removed.#");
if (!notes.isEmpty()) {
out.add("");
Set<String> seen = new LinkedHashSet<>(notes);
for (String n : seen) {
out.add("* [.diff-meta]#" + n + "#");
}
}
out.add("====");
placed = true;
}
}
return out;
}
private static String wrap(String role, String text) {
// Unconstrained form (##…##): a constrained role fails to parse at
// non-word boundaries (after ';', ']', …) and leaks literally into
// the render. Content with a literal '#' never reaches here — such
// lines are protected by inlineOk().
return text.isBlank() ? text : "[." + role + "]##" + text + "##";
}
private static String wrapPrefixed(String role, String line) {
String[] p = splitPrefix(line);
return p[0] + wrap(role, p[1]);
}
private static String[] splitPrefix(String s) {
Matcher m = STRUCTURAL_PREFIX.matcher(s);
if (m.find()) {
return new String[] {m.group(0), s.substring(m.end())};
}
return new String[] {"", s};
}
private static List<String> tokenize(String s) {
List<String> tokens = new ArrayList<>();
int i = 0;
while (i < s.length()) {
int start = i;
boolean ws = Character.isWhitespace(s.charAt(i));
while (i < s.length() && Character.isWhitespace(s.charAt(i)) == ws) {
i++;
}
tokens.add(s.substring(start, i));
}
return tokens;
}
private static int wordCount(String s) {
String t = s.strip();
return t.isEmpty() ? 0 : t.split("\\s+").length;
}
private static String clip(String s, int max) {
String t = s.strip();
return t.length() <= max ? t : t.substring(0, max);
}
/**
* Mutable stream state threaded through one marking pass: whether we
* are still in the pre-anchor header region, whether the previous
* emitted line ended in an attribute continuation, and the verbatim
* delimiter currently open (if any).
*/
static final class State {
private boolean inHeader = true;
private boolean continuation;
private String verbatim;
/**
* Update the state from one emitted line.
*
* @param line the line just emitted to the marked stream
*/
void feed(String line) {
if (inHeader && line.startsWith("[[")) {
inHeader = false;
}
continuation = line.stripTrailing().endsWith("\\");
String s = line.strip();
if (verbatim == null) {
if (VERBATIM_OPEN.matcher(s).matches()) {
verbatim = s;
}
} else if (s.equals(verbatim)) {
verbatim = null;
}
}
/**
* Whether the stream is currently inside a verbatim region.
*
* @return whether a verbatim delimiter is open
*/
boolean inVerbatim() {
return verbatim != null;
}
}
}