OutputValidator.java
package network.ike.docs.plugin;
import java.io.IOException;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Scans generated AsciiDoc output for common conversion problems.
*
* <p>Detects unresolved attributes, missing includes, broken cross-references,
* and missing images. Results are returned as a list of {@link Issue} records
* for the caller to log or fail the build.
*/
public class OutputValidator {
/** Creates a new output validator. */
public OutputValidator() {}
/** Unresolved attribute reference: {some-attribute} in output text. */
private static final Pattern UNRESOLVED_ATTR =
Pattern.compile("\\{[a-zA-Z][a-zA-Z0-9_-]*\\}");
/** AsciidoctorJ's marker for a missing include directive. */
private static final String UNRESOLVED_INCLUDE = "Unresolved directive in";
/** Broken cross-reference marker. */
private static final String BROKEN_XREF = "[broken]";
/** HTML image tag — group 1 captures the src path. */
private static final Pattern IMG_SRC =
Pattern.compile("<img[^>]+src=\"([^\"]+)\"", Pattern.CASE_INSENSITIVE);
/** Severity levels for validation issues. */
public enum Severity {
/** Non-fatal issue (e.g., missing image). */
WARNING,
/** Fatal issue when strict mode is enabled (e.g., unresolved attribute). */
ERROR
}
/**
* A single validation issue found in a generated file.
*
* @param file the file containing the issue
* @param line the 1-based line number
* @param severity WARNING or ERROR
* @param message human-readable description
*/
public record Issue(Path file, int line, Severity severity, String message) {
@Override
public String toString() {
return "[%s] %s:%d — %s".formatted(severity, file.getFileName(), line, message);
}
}
/**
* Validate all files in the given output directory.
*
* @param outputDir the directory to scan
* @param backend the backend that produced the output
* @return list of issues found (empty if clean)
* @throws IOException if directory traversal fails
*/
public List<Issue> validate(Path outputDir, Backend backend) throws IOException {
if (!Files.isDirectory(outputDir)) {
return List.of();
}
List<Issue> issues = new ArrayList<>();
String extension = switch (backend) {
case HTML -> "*.html";
case DOCBOOK -> "*.xml";
case PDF -> null; // binary — skip
};
if (extension == null) {
return issues;
}
PathMatcher matcher = outputDir.getFileSystem().getPathMatcher("glob:" + extension);
Files.walkFileTree(outputDir, new SimpleFileVisitor<>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (matcher.matches(file.getFileName())) {
validateFile(file, backend, issues);
}
return FileVisitResult.CONTINUE;
}
});
return issues;
}
private void validateFile(Path file, Backend backend, List<Issue> issues) throws IOException {
List<String> lines = Files.readAllLines(file);
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i);
int lineNum = i + 1;
// Unresolved include directives
if (line.contains(UNRESOLVED_INCLUDE)) {
issues.add(new Issue(file, lineNum, Severity.ERROR,
"Unresolved include directive"));
}
// Broken cross-references
if (line.contains(BROKEN_XREF)) {
issues.add(new Issue(file, lineNum, Severity.ERROR,
"Broken cross-reference"));
}
// Unresolved attribute references
Matcher attrMatcher = UNRESOLVED_ATTR.matcher(line);
while (attrMatcher.find()) {
String attr = attrMatcher.group();
// Skip common false positives (CSS, JS, URLs)
if (!isFalsePositive(attr, line)) {
issues.add(new Issue(file, lineNum, Severity.ERROR,
"Unresolved attribute: " + attr));
}
}
// Missing images (HTML only)
if (backend == Backend.HTML) {
Matcher imgMatcher = IMG_SRC.matcher(line);
while (imgMatcher.find()) {
String src = imgMatcher.group(1);
if (!src.startsWith("data:") && !src.startsWith("http")) {
Path imgPath = file.getParent().resolve(src);
if (!Files.exists(imgPath)) {
issues.add(new Issue(file, lineNum, Severity.WARNING,
"Missing image: " + src));
}
}
}
}
}
}
/**
* Filter out common false positives for unresolved attributes.
* Code blocks, CSS, JavaScript, and URL templates all use curly
* braces legitimately.
*/
private boolean isFalsePositive(String attr, String line) {
// Inside <code>, <pre>, or <listing> elements
if (line.contains("<code") || line.contains("</code>")
|| line.contains("<pre") || line.contains("</pre>")
|| line.contains("CodeRay") || line.contains("highlight")) return true;
// AsciiDoc source examples (include:: directives shown as text)
if (line.contains("include::")) return true;
// CSS: var(--foo) or content: "{bar}"
if (line.contains("var(--") || line.contains("content:")) return true;
// JavaScript object literals
if (line.trim().startsWith("//") || line.trim().startsWith("*")) return true;
// Inside <script> or <style> blocks (heuristic)
if (line.contains("<script") || line.contains("<style")) return true;
// URL templates
if (line.contains("://")) return true;
// Revision/metadata lines in AsciiDoc source listings
if (line.contains(":revnumber:") || line.contains(":revdate:")
|| line.contains(":docdate")) return true;
// Known AsciiDoc/HTML entities that look like attributes
String inner = attr.substring(1, attr.length() - 1);
if (inner.contains(".") || inner.contains("/")) return true;
// Java/shell identifiers commonly seen in code samples
if (inner.equals("static") || inner.equals("return") || inner.equals("this")
|| inner.equals("super") || inner.equals("new") || inner.equals("class")
|| inner.equals("void") || inner.equals("null") || inner.equals("true")
|| inner.equals("false")) return true;
// All-uppercase identifiers are likely shell variables or symbolic constants
if (inner.equals(inner.toUpperCase()) && inner.matches("[A-Z_]+")) return true;
// Greek/Unicode symbol names used in scientific text
if (inner.equals("micro") || inner.equals("mu") || inner.equals("alpha")
|| inner.equals("beta") || inner.equals("gamma") || inner.equals("delta")
|| inner.equals("XOR") || inner.equals("AND") || inner.equals("OR")) return true;
return false;
}
}