PublishedArtifactSet.java
package network.ike.workspace;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Scans a Maven subproject root to determine the complete set of
* published artifacts (groupId:artifactId pairs).
*
* <p>This is the "published artifact set" from the handoff design:
* given a subproject root directory, recursively walk the POM hierarchy
* (root POM plus all subprojects/modules) and collect every
* groupId:artifactId pair that the subproject publishes.
*
* <p>POM parsing uses simple regex matching (consistent with the
* {@code ReleaseSupport} pattern) rather than a full XML parser.
* The {@code <parent>} block is stripped before extracting the
* project's own groupId and artifactId; if no groupId is declared
* outside the parent block, the parent's groupId is inherited.
*/
public final class PublishedArtifactSet {
private PublishedArtifactSet() {}
/**
* A published Maven artifact coordinate.
*
* @param groupId the Maven groupId
* @param artifactId the Maven artifactId
*/
public record Artifact(String groupId, String artifactId) {}
private static final Pattern VERSION_PATTERN =
Pattern.compile("<version>([^<]+)</version>");
private static final Pattern GROUP_ID_PATTERN =
Pattern.compile("<groupId>([^<]+)</groupId>");
private static final Pattern ARTIFACT_ID_PATTERN =
Pattern.compile("<artifactId>([^<]+)</artifactId>");
private static final Pattern SUBPROJECTS_PATTERN =
Pattern.compile("<subproject>([^<]+)</subproject>");
private static final Pattern MODULES_PATTERN =
Pattern.compile("<module>([^<]+)</module>");
private static final Pattern PARENT_BLOCK =
Pattern.compile("(?s)<parent>.*?</parent>");
/** Self-closing {@code <parent/>} — Maven 4.1.0's inferred parent (carries no groupId). */
private static final Pattern PARENT_SELF_CLOSING =
Pattern.compile("<parent\\s*/>");
/**
* Start of the first POM body section that may itself contain a {@code <groupId>}
* — a dependency, managed dependency, or plugin. The project's own coordinates are
* schema-ordered before these, so own-coordinate extraction stops here so a
* dependency's groupId is never mistaken for the project's (ike-issues#719).
*/
private static final Pattern BODY_SECTION = Pattern.compile(
"<(dependencies|dependencyManagement|build|reporting|profiles|distributionManagement)\\b");
/**
* Scan a subproject root and return the complete set of published
* artifacts (groupId:artifactId pairs).
*
* <p>Reads the root pom.xml, extracts its coordinates, then
* recursively descends into each subproject (or module) directory
* to collect all published artifacts.
*
* @param subprojectRoot the root directory of the Maven subproject
* @return the set of all published artifacts
* @throws IOException if a POM file cannot be read
*/
public static Set<Artifact> scan(Path subprojectRoot) throws IOException {
Set<Artifact> artifacts = new LinkedHashSet<>();
Path rootPom = subprojectRoot.resolve("pom.xml");
if (!Files.exists(rootPom)) {
return artifacts;
}
scanPom(subprojectRoot, rootPom, null, artifacts);
return artifacts;
}
/**
* Check whether a groupId:artifactId pair is in the published set.
*
* @param artifacts the set from {@link #scan(Path)}
* @param groupId the groupId to check
* @param artifactId the artifactId to check
* @return true if the pair is in the set
*/
public static boolean matches(Set<Artifact> artifacts,
String groupId, String artifactId) {
return artifacts.contains(new Artifact(groupId, artifactId));
}
/**
* Parse a single POM, add its artifact to the set, then recurse
* into any declared subprojects or modules.
*
* @param subprojectRoot the subproject root (for resolving relative paths)
* @param pomPath the POM file to parse
* @param inheritGroupId the parent groupId to inherit if not declared
* @param artifacts accumulator for discovered artifacts
*/
private static void scanPom(Path subprojectRoot, Path pomPath,
String inheritGroupId,
Set<Artifact> artifacts) throws IOException {
String content = Files.readString(pomPath, StandardCharsets.UTF_8);
// groupId declared inside a paired <parent>…</parent> block, if any (used
// for inheritance). A self-closing <parent/> — Maven 4.1.0's inferred
// parent — carries none, so the groupId then comes from inheritGroupId.
String parentGroupId = null;
Matcher parentMatcher = PARENT_BLOCK.matcher(content);
if (parentMatcher.find()) {
Matcher gm = GROUP_ID_PATTERN.matcher(parentMatcher.group());
if (gm.find()) {
parentGroupId = gm.group(1).trim();
}
}
// Strip the parent block (paired or self-closing) so the parent's own
// groupId is never read as the project's.
String stripped = PARENT_BLOCK.matcher(content).replaceFirst("");
stripped = PARENT_SELF_CLOSING.matcher(stripped).replaceFirst("");
// The project's own <groupId>/<artifactId> are schema-ordered BEFORE any
// body section that can also carry a <groupId> (dependencies, managed
// dependencies, plugins). Restrict extraction to that header so a
// dependency's groupId is never mistaken for the project's — the bug that
// dropped komet's inter-subproject edges (ike-issues#719). A module that
// declares no own groupId (the Maven-4.1.0 norm under <parent/>) then
// correctly yields null here and inherits below.
String header = stripped.substring(0, bodySectionStart(stripped));
// Inherit groupId: prefer own, then the parent block, then the reactor
// parent passed down the recursion.
String groupId = firstCapture(GROUP_ID_PATTERN, header);
if (groupId == null) {
groupId = parentGroupId;
}
if (groupId == null) {
groupId = inheritGroupId;
}
String artifactId = firstCapture(ARTIFACT_ID_PATTERN, header);
if (groupId != null && artifactId != null) {
artifacts.add(new Artifact(groupId, artifactId));
}
// The groupId to pass down for inheritance
String effectiveGroupId = groupId;
// Find subprojects (POM 4.1.0) or modules (POM 4.0.0)
Path pomDir = pomPath.getParent();
// Scan <subproject> entries first (newer model)
Matcher subMatcher = SUBPROJECTS_PATTERN.matcher(content);
while (subMatcher.find()) {
String subproject = subMatcher.group(1).trim();
Path subPom = pomDir.resolve(subproject).resolve("pom.xml");
if (Files.exists(subPom)) {
scanPom(subprojectRoot, subPom, effectiveGroupId, artifacts);
}
}
// Scan <module> entries (classic model)
Matcher modMatcher = MODULES_PATTERN.matcher(content);
while (modMatcher.find()) {
String module = modMatcher.group(1).trim();
Path modPom = pomDir.resolve(module).resolve("pom.xml");
if (Files.exists(modPom)) {
scanPom(subprojectRoot, modPom, effectiveGroupId, artifacts);
}
}
}
/** First capture group of {@code pattern} in {@code text}, trimmed, or null. */
private static String firstCapture(Pattern pattern, String text) {
Matcher m = pattern.matcher(text);
return m.find() ? m.group(1).trim() : null;
}
/**
* Index where the first {@code <groupId>}-bearing body section starts, or the
* full length when there is none — bounding own-coordinate extraction so a
* dependency/plugin groupId is never read as the project's.
*/
private static int bodySectionStart(String content) {
Matcher m = BODY_SECTION.matcher(content);
return m.find() ? m.start() : content.length();
}
}