diff --git a/docs/user/reference/cli/azldev_component.md b/docs/user/reference/cli/azldev_component.md index 0516a5e8..1c78ba8d 100644 --- a/docs/user/reference/cli/azldev_component.md +++ b/docs/user/reference/cli/azldev_component.md @@ -42,6 +42,7 @@ components defined in the project configuration. * [azldev component build](azldev_component_build.md) - Build packages for components * [azldev component changed](azldev_component_changed.md) - Detect which components changed between two git refs * [azldev component diff-sources](azldev_component_diff-sources.md) - Show the diff that overlays apply to a component's sources +* [azldev component history](azldev_component_history.md) - Report per-component change activity and customization detail * [azldev component list](azldev_component_list.md) - List components in this project * [azldev component prepare-sources](azldev_component_prepare-sources.md) - Prepare buildable sources for components * [azldev component query](azldev_component_query.md) - Query info for components in this project diff --git a/docs/user/reference/cli/azldev_component_history.md b/docs/user/reference/cli/azldev_component_history.md new file mode 100644 index 00000000..00c58cd5 --- /dev/null +++ b/docs/user/reference/cli/azldev_component_history.md @@ -0,0 +1,74 @@ + + +## azldev component history + +Report per-component change activity and customization detail + +### Synopsis + +Report three independent change-activity signals per component: + + - toml-commits: commits to the component's source TOML file + - customizations: count of explicit customization items in the config + - fingerprint-changes: commits where the lock file's input-fingerprint changed + +Use this to find which packages get the most attention (for documentation, +review prioritization, or refactoring planning). + +When a component shares its source TOML with other components (e.g., a bare +entry in a shared components.toml), the toml-commit count is coarse and the +component is marked 'toml-shared'. Use --shared=omit to drop those rows. + +When exactly one component is selected the customization items are printed +inline below the row, showing kind, value and description — useful for +hand-picking entries to document. + +``` +azldev component history [flags] +``` + +### Examples + +``` + # Heatmap of an entire project + azldev component history -a + + # JSON for downstream tooling + azldev component history -a -O json + + # Drill into a single component (auto-expands customization details) + azldev component history bash +``` + +### Options + +``` + -a, --all-components Include all components + -p, --component stringArray Component name pattern + -g, --component-group stringArray Component group name + -h, --help help for history + --include-bare Include components with zero customizations in the output. By default they are hidden -- their config inherits everything from defaults, and computing their git metrics is the dominant cost on large projects. + --shared string How to report rows for components that share a TOML file with others: show (keep row, count is coarse), omit (drop row). (default "show") + -s, --spec-path stringArray Spec path +``` + +### Options inherited from parent commands + +``` + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output +``` + +### SEE ALSO + +* [azldev component](azldev_component.md) - Manage components + diff --git a/internal/app/azldev/cmds/component/component.go b/internal/app/azldev/cmds/component/component.go index df28df19..9cfee2ea 100644 --- a/internal/app/azldev/cmds/component/component.go +++ b/internal/app/azldev/cmds/component/component.go @@ -27,6 +27,7 @@ components defined in the project configuration.`, buildOnAppInit(app, cmd) changedOnAppInit(app, cmd) diffSourcesOnAppInit(app, cmd) + historyOnAppInit(app, cmd) listOnAppInit(app, cmd) prepareOnAppInit(app, cmd) queryOnAppInit(app, cmd) diff --git a/internal/app/azldev/cmds/component/history.go b/internal/app/azldev/cmds/component/history.go new file mode 100644 index 00000000..2ea21ef5 --- /dev/null +++ b/internal/app/azldev/cmds/component/history.go @@ -0,0 +1,465 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/parmap" + "github.com/spf13/cobra" +) + +// HistoryOptions holds options for the component history command. +type HistoryOptions struct { + ComponentFilter components.ComponentFilter + // SharedTomlMode controls how toml-commit counts are reported for + // components that share their source TOML file with at least one other + // component: + // "show" (default): include the row, report the count, set SharedToml=true + // "omit": drop the row entirely + // + // JSON consumers always see the raw TomlCommits + SharedToml fields and + // can apply their own presentation (e.g., zero out shared rows) via jq. + SharedTomlMode string + // IncludeBare, when true, keeps components with zero customizations in + // the output. By default they are filtered out -- they have no + // per-component config worth reporting, and computing their git + // metrics across all selected components is the dominant cost on + // large projects (e.g., azurelinux). + IncludeBare bool +} + +const ( + sharedTomlModeShow = "show" + sharedTomlModeOmit = "omit" +) + +func historyOnAppInit(_ *azldev.App, parentCmd *cobra.Command) { + parentCmd.AddCommand(NewHistoryCmd()) +} + +// NewHistoryCmd constructs a [cobra.Command] for the "component history" CLI subcommand. +func NewHistoryCmd() *cobra.Command { + options := &HistoryOptions{ + SharedTomlMode: sharedTomlModeShow, + } + + cmd := &cobra.Command{ + Use: "history", + Aliases: []string{"hist"}, + Short: "Report per-component change activity and customization detail", + Long: `Report three independent change-activity signals per component: + + - toml-commits: commits to the component's source TOML file + - customizations: count of explicit customization items in the config + - fingerprint-changes: commits where the lock file's input-fingerprint changed + +Use this to find which packages get the most attention (for documentation, +review prioritization, or refactoring planning). + +When a component shares its source TOML with other components (e.g., a bare +entry in a shared components.toml), the toml-commit count is coarse and the +component is marked 'toml-shared'. Use --shared=omit to drop those rows. + +When exactly one component is selected the customization items are printed +inline below the row, showing kind, value and description — useful for +hand-picking entries to document.`, + Example: ` # Heatmap of an entire project + azldev component history -a + + # JSON for downstream tooling + azldev component history -a -O json + + # Drill into a single component (auto-expands customization details) + azldev component history bash`, + RunE: azldev.RunFuncWithExtraArgs(func(env *azldev.Env, args []string) (interface{}, error) { + options.ComponentFilter.ComponentNamePatterns = append(args, options.ComponentFilter.ComponentNamePatterns...) + + results, err := ComponentHistory(env, options) + if err != nil { + return nil, err + } + + // Card view side-channel: when exactly one component is being + // reported in a human-readable format, render a vertical card + // ourselves and short-circuit the standard table renderer. + // reportResults treats a `true` return as a no-op (see + // reportResultsViaReflectable in azldev/command.go) which is + // how we suppress the would-be 1-row table. + // + // CAVEAT: the trigger is implicit (single result + human + // format) so a broad -a query that happens to narrow to one + // component silently switches output shape. JSON / CSV + // consumers always get the raw slice unchanged. + if shouldRenderCardView(env, results) { + renderCardView(env.ReportFile(), results[0]) + + return true, nil + } + + return results, nil + }), + ValidArgsFunction: components.GenerateComponentNameCompletions, + } + + components.AddComponentFilterOptionsToCommand(cmd, &options.ComponentFilter) + + cmd.Flags().StringVar(&options.SharedTomlMode, "shared", sharedTomlModeShow, + "How to report rows for components that share a TOML file with others: "+ + "show (keep row, count is coarse), omit (drop row).") + // Shell completion advertises the valid choices. Note: the MCP tool + // schema does not yet derive an `enum` constraint from cobra flag + // completion functions (see internal/app/azldev/core/mcp/mcpserver.go), + // so MCP agents see this as an unconstrained string until that gap is + // closed. Runtime validation happens in [validateSharedTomlMode]. + _ = cmd.RegisterFlagCompletionFunc("shared", + func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return []string{sharedTomlModeShow, sharedTomlModeOmit}, + cobra.ShellCompDirectiveNoFileComp + }) + cmd.Flags().BoolVar(&options.IncludeBare, "include-bare", false, + "Include components with zero customizations in the output. "+ + "By default they are hidden -- their config inherits everything from defaults, "+ + "and computing their git metrics is the dominant cost on large projects.") + + // History is read-only; the lock validation flag is meaningless here. + _ = cmd.Flags().MarkHidden("skip-lock-validation") + + azldev.ExportAsMCPTool(cmd) + + return cmd +} + +// CustomizationItem captures one user-authored customization on a component. +// +// Kind is a dotted-namespace string forming part of the JSON wire contract +// (downstream `jq`/`gjson` consumers key on it). It is either an overlay +// type emitted verbatim (e.g. "spec-remove-tag", "patch-add") or a fixed +// token derived from the structured TOML path. The fixed set: +// +// build.with, build.without, build.defines, build.undefines, +// build.check.skip, spec.source-type, spec.upstream-commit, +// spec.upstream-name, spec.upstream-distro, release.calculation, +// render.skip-file-filter, packages, source-files, +// source-files.replace-upstream +// +// Adding a Kind is non-breaking; renaming or removing one is breaking. +// Value is a short summary suitable for table cells; Description is the +// human-readable rationale from the config (overlay.description, +// check.skip_reason, etc.). +type CustomizationItem struct { + Kind string `json:"kind"` + Value string `json:"value,omitempty"` + Description string `json:"description,omitempty"` +} + +// HistoryResult is the per-component output row. +// +// This is the stable wire contract that downstream tooling pins against: +// adding a field is non-breaking; renaming or removing one is a breaking +// change. Keep JSON tags stable. +type HistoryResult struct { + // Name of the component. We intentionally do *not* tag this with + // 'sortkey' -- the reflectable table writer would otherwise re-sort + // by name and stomp our customizations-first sort. + Name string `json:"name"` + + // TomlCommits is the number of commits touching the component's source + // TOML file. When shared-mode = "omit" and the component shares its TOML + // with another component, the count is suppressed to zero (with + // SharedToml=true) -- unless the component was named explicitly, which + // always reports the real count. + TomlCommits int `json:"tomlCommits"` + + // SharedToml is true when at least one other component anywhere in the + // project (not just within the current selection) uses the same source + // TOML file. The TomlCommits count is then coarse because git history + // is on a per-file basis -- the count includes commits that touched the + // shared file for any reason, not just for this component. + SharedToml bool `json:"sharedToml,omitempty"` + + // TomlPath is the repo-relative path of the component's source TOML file. + TomlPath string `json:"tomlPath,omitempty"` + + // LatestCommit is the timestamp of the most recent commit to the TOML + // file. Zero if no commits found. Uses 'omitzero' (Go 1.24+) rather than + // 'omitempty' because the latter is a no-op for struct types and would + // serialize as "0001-01-01T00:00:00Z" for components with no history. + LatestCommit time.Time `json:"latestCommit,omitzero" table:"-"` + + // Customizations is the count of customization items (len of the + // Customization slice). + Customizations int `json:"customizations"` + + // CustomizationItems are the individual customization records; + // rendered as JSON detail and as inline expansion for single-component + // invocations. + CustomizationItems []CustomizationItem `json:"customizationItems,omitempty" table:"-"` + + // FingerprintChanges is the number of commits where the lock file's + // input-fingerprint actually changed. + FingerprintChanges int `json:"fingerprintChanges"` + + // FingerprintChangeDetails is the per-commit metadata for each + // fingerprint change counted in [FingerprintChanges] (oldest first). + // Hidden from the human-readable table -- use JSON output to consume + // them (e.g., to hand-author changelog entries). + // + // Each entry is populated from [sources.FingerprintChange] via an + // explicit field-by-field copy in [populateLockMetrics]. The + // gathering algorithm is shared with the synthetic dist-git history + // flow; the wire-level type is local so that: + // - the JSON contract for this command lives in this file, and + // - removing a field from [sources.FingerprintChange] / + // [sources.CommitMetadata] surfaces as a compile error at the + // copy site rather than silently dropping changelog metadata. + // The compile-error guard is one-directional (it catches REMOVED + // upstream fields); a NEWLY ADDED upstream field is caught instead by + // TestFingerprintChangeDTOMirrorsSource. + FingerprintChangeDetails []FingerprintChange `json:"fingerprintChangeDetails,omitempty" table:"-"` + + // HasLock is true when a lock file currently exists for this component. + HasLock bool `json:"hasLock,omitempty" table:"-"` + + // HasImport is true when the lock file records a non-empty + // import-commit (i.e., the component was forked from upstream). + HasImport bool `json:"hasImport,omitempty" table:"-"` + + // ManualBump is the lock file's manual-bump counter. Always emitted + // (no omitempty) so a real bump of 0 isn't indistinguishable from an + // absent field; pair it with HasLock to tell "no lock" from "bump 0". + ManualBump int `json:"manualBump" table:"-"` + + // Warnings collects per-component diagnostics for failure paths that + // were swallowed to keep the overall report rendering. Empty when no + // problems were encountered. Surfaces in the single-component card + // view and in JSON; hidden from the human-readable table. + Warnings []string `json:"warnings,omitempty" table:"-"` +} + +// FingerprintChange is the wire-level representation of one lock-file +// fingerprint change for the [HistoryResult.FingerprintChangeDetails] +// field. It mirrors the fields of [sources.FingerprintChange] (and its +// embedded [sources.CommitMetadata]) that consumers of `azldev component +// history` JSON output care about. +// +// The fields are copied explicitly in [populateLockMetrics] rather than +// embedding [sources.FingerprintChange] directly so that: +// - the JSON contract for this command is owned by this package, and +// - dropping a field from the synthetic-history source type produces a +// compile error at the copy site instead of silently emptying the +// downstream changelog data. +type FingerprintChange struct { + Hash string `json:"hash"` + Author string `json:"author"` + AuthorEmail string `json:"authorEmail"` + Timestamp int64 `json:"timestamp"` + Message string `json:"message"` + UpstreamCommit string `json:"upstreamCommit,omitempty"` +} + +// ComponentHistory computes the per-component history data for the components +// matching options.ComponentFilter. Per-component work runs in parallel; a +// progress event tracks completion for the (often slow) -a case. +// +// By default, components with zero customizations are skipped before any +// git work runs (set IncludeBare to keep them). This is the dominant +// performance lever on large projects -- the vast majority of components +// in real distros inherit everything from defaults and have no +// per-component history worth reporting. +// +// When the user explicitly names component(s) (via positional args, --component, +// or --spec-path) the bare filter is force-disabled regardless of IncludeBare, +// so `azldev component history nano` always returns a row for nano even when +// nano has zero customizations. The perf rationale for the default does not +// apply to scope-limiting explicit selections. +func ComponentHistory(env *azldev.Env, options *HistoryOptions) ([]HistoryResult, error) { + if err := validateSharedTomlMode(options.SharedTomlMode); err != nil { + return nil, err + } + + // History is read-only; skip lock validation so stale or missing locks + // don't block reporting. + options.ComponentFilter.SkipLockValidation = true + + resolver := components.NewResolver(env) + + comps, err := resolver.FindComponents(&options.ComponentFilter) + if err != nil { + return nil, fmt.Errorf("resolving components:\n%w", err) + } + + ctx, err := newHistoryContext(env) + if err != nil { + return nil, err + } + + // Phase 0: compute customizations for every selected component + // (sync, fast, no git). When --include-bare is off, drop components + // with zero customizations before any expensive work runs -- unless + // the user explicitly named components, in which case they get what + // they asked for regardless. + explicit := hasExplicitComponentSelection(&options.ComponentFilter) + effectiveIncludeBare := options.IncludeBare || explicit + + stubs := buildHistoryStubs(env, comps.Components(), effectiveIncludeBare) + if len(stubs) == 0 { + return nil, nil + } + + tomlSharing := countTomlSharing(env.Config().Components) + + workerEnv, cancel := env.WithCancel() + defer cancel() + + // Phase A: memoize toml-commit counts per unique source TOML path. + // In real projects (e.g., azurelinux) thousands of components share a + // single components.toml; without this we'd re-run the same `git log` + // thousands of times. + tomlCache, err := precomputeTomlMetricsForStubs(workerEnv, env, ctx, stubs) + if err != nil { + return nil, err + } + + // Phase B: build per-component results in parallel. + progressEvent := env.StartEvent("Computing component history", "count", len(stubs)) + defer progressEvent.End() + + total := int64(len(stubs)) + + parmapResults := parmap.Map( + workerEnv, + // Each worker shells out to git; that's I/O-bound work, matching the + // concurrency model used by render/update on similar workloads. + env.IOBoundConcurrency(), + stubs, + func(done, _ int) { progressEvent.SetProgress(int64(done), total) }, + func(_ context.Context, stub historyStub) HistoryResult { + // workerEnv carries the cancellable ctx; the parmap-supplied + // ctx is identical (parmap derives it from workerEnv) and + // unused here. Mirrors how render.go does this. + return buildHistoryResult( //nolint:contextcheck // env carries the ctx + workerEnv, stub, ctx, tomlSharing, tomlCache, options.SharedTomlMode, explicit, + ) + }, + ) + + results := make([]HistoryResult, 0, len(stubs)) + + for _, parmapRes := range parmapResults { + if parmapRes.Cancelled { + continue + } + + // --shared=omit drops the row entirely for components whose source + // TOML is shared with at least one other component -- unless the user + // explicitly named it, in which case they get the row regardless + // (mirroring the --include-bare override above; sharing is a + // presentation default, not the user's intent). + if options.SharedTomlMode == sharedTomlModeOmit && parmapRes.Value.SharedToml && !explicit { + continue + } + + results = append(results, parmapRes.Value) + } + + // FingerprintChangeDetails is potentially the largest field in the + // payload (one entry per fingerprint change per component, each with + // commit metadata); JSON consumers on -a runs at azurelinux scale would + // otherwise get multi-MB responses. The details exist for drilling into + // a single component to author a changelog, so keep them only when + // exactly one component survives filtering. This is decided AFTER the + // --shared=omit drop above so a single surviving row always carries its + // details (the same len()==1 predicate the card view keys off). + if len(results) != 1 { + for i := range results { + results[i].FingerprintChangeDetails = nil + } + } + + sortHistoryResults(results) + + return results, nil +} + +// historyStub carries the cheap, sync-computed slice of work for one +// component: customization items (pre-collected) plus the underlying +// Component handle for later git-metric work. Keyed by component name. +type historyStub struct { + component components.Component + customizationItems []CustomizationItem +} + +// buildHistoryStubs computes customization items for every selected +// component synchronously. When includeBare is false, components with no +// customizations are excluded so that the expensive parallel phases +// don't run on them at all. +func buildHistoryStubs( + env *azldev.Env, comps []components.Component, includeBare bool, +) []historyStub { + stubs := make([]historyStub, 0, len(comps)) + + for _, comp := range comps { + name := comp.GetName() + + // Read the raw per-component config (as authored in TOML), not the + // resolved one returned by comp.GetConfig() -- the resolver + // pre-merges project- and group-level defaults, which would + // otherwise look like per-component customizations. + var items []CustomizationItem + if raw, ok := env.Config().Components[name]; ok { + items = collectCustomizations(name, &raw) + } + + if !includeBare && len(items) == 0 { + continue + } + + stubs = append(stubs, historyStub{component: comp, customizationItems: items}) + } + + return stubs +} + +// hasExplicitComponentSelection reports whether the user pinpointed +// individual components (vs asking for everything, a group, or relying on +// no-criteria defaults). Used by [ComponentHistory] to override +// --include-bare (and the --shared=omit count suppression) in the explicit +// case so that `azldev component history nano` always returns a row for nano +// with its real count. +// +// Only an *exact* name (or a spec path) counts as explicit. A glob pattern +// (e.g. -p '*') can select the whole project, so it carries no more intent +// than -a or --component-group and must not defeat those filters' perf +// rationale. Wildcard detection mirrors the resolver (see +// [components.Resolver]). +// +// Group selection (--component-group) is likewise NOT treated as explicit -- +// groups can contain hundreds of components. +func hasExplicitComponentSelection(filter *components.ComponentFilter) bool { + for _, pattern := range filter.ComponentNamePatterns { + if !strings.ContainsAny(pattern, "*?[") { + return true + } + } + + return len(filter.SpecPaths) > 0 +} + +// validateSharedTomlMode rejects unrecognized --shared values. +func validateSharedTomlMode(mode string) error { + switch mode { + case sharedTomlModeShow, sharedTomlModeOmit: + return nil + default: + return fmt.Errorf( + "invalid --shared value %#q (want one of: %s, %s)", + mode, sharedTomlModeShow, sharedTomlModeOmit) + } +} diff --git a/internal/app/azldev/cmds/component/history_customizations.go b/internal/app/azldev/cmds/component/history_customizations.go new file mode 100644 index 00000000..f0fa64a2 --- /dev/null +++ b/internal/app/azldev/cmds/component/history_customizations.go @@ -0,0 +1,228 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "fmt" + "sort" + "strconv" + + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" +) + +// collectCustomizations gathers all customization items declared on the +// component's config into a uniform list. Items are emitted in a stable +// order (overlays first in declared order; then build, spec, release, +// packages, source-files in field order) so the output is deterministic. +func collectCustomizations(name string, config *projectconfig.ComponentConfig) []CustomizationItem { + if config == nil { + return nil + } + + items := make([]CustomizationItem, 0) + + items = appendOverlayItems(items, config.Overlays) + items = appendBuildItems(items, config.Build) + items = appendSpecItems(items, name, config.Spec) + items = appendReleaseItems(items, config.Release) + items = appendRenderItems(items, config.Render) + items = appendPackageItems(items, config.Packages) + items = appendSourceFileItems(items, config.SourceFiles) + + return items +} + +// appendRenderItems flags non-default render-config customizations. +func appendRenderItems( + items []CustomizationItem, render projectconfig.ComponentRenderConfig, +) []CustomizationItem { + if render.SkipFileFilter { + items = append(items, CustomizationItem{ + Kind: "render.skip-file-filter", + Value: strconv.FormatBool(true), + }) + } + + return items +} + +// appendOverlayItems converts each overlay into a CustomizationItem. +func appendOverlayItems( + items []CustomizationItem, overlays []projectconfig.ComponentOverlay, +) []CustomizationItem { + for i := range overlays { + overlay := &overlays[i] + + items = append(items, CustomizationItem{ + Kind: string(overlay.Type), + Value: overlaySummary(overlay), + Description: overlay.Description, + }) + } + + return items +} + +// overlaySummary returns a short human-readable identification of an overlay, +// suitable for the Value field of a CustomizationItem. +func overlaySummary(overlay *projectconfig.ComponentOverlay) string { + switch { + case overlay.Tag != "" && overlay.Value != "": + return fmt.Sprintf("%s=%s", overlay.Tag, overlay.Value) + case overlay.Tag != "": + return overlay.Tag + case overlay.EffectiveSourceName() != "": + return overlay.EffectiveSourceName() + case overlay.Filename != "": + return overlay.Filename + case overlay.SectionName != "": + return overlay.SectionName + case overlay.Regex != "": + return overlay.Regex + default: + return "" + } +} + +// appendBuildItems converts non-default build-config fields into items. +func appendBuildItems( + items []CustomizationItem, build projectconfig.ComponentBuildConfig, +) []CustomizationItem { + for _, flag := range build.With { + items = append(items, CustomizationItem{Kind: "build.with", Value: flag}) + } + + for _, flag := range build.Without { + items = append(items, CustomizationItem{Kind: "build.without", Value: flag}) + } + + // Sort define keys so iteration order is deterministic. + defineKeys := make([]string, 0, len(build.Defines)) + for key := range build.Defines { + defineKeys = append(defineKeys, key) + } + + sort.Strings(defineKeys) + + for _, key := range defineKeys { + items = append(items, CustomizationItem{ + Kind: "build.defines", + Value: fmt.Sprintf("%s=%s", key, build.Defines[key]), + }) + } + + for _, macro := range build.Undefines { + items = append(items, CustomizationItem{Kind: "build.undefines", Value: macro}) + } + + if build.Check.Skip { + items = append(items, CustomizationItem{ + Kind: "build.check.skip", + Value: strconv.FormatBool(true), + Description: build.Check.SkipReason, + }) + } + + return items +} + +// appendSpecItems captures spec-source customizations relative to the +// inherited default. We cannot perfectly know the inherited default without +// re-resolving, but we can flag the cases that are unambiguous (commit pin, +// upstream-name renamed away from the component name, upstream-distro set). +func appendSpecItems( + items []CustomizationItem, name string, spec projectconfig.SpecSource, +) []CustomizationItem { + // Only surface SourceType when explicitly set in the raw per-component + // config -- components that inherit from group defaults leave it empty, + // so this avoids inflating the customization count for every component. + if spec.SourceType != "" { + items = append(items, CustomizationItem{ + Kind: "spec.source-type", + Value: string(spec.SourceType), + }) + } + + if spec.UpstreamCommit != "" { + items = append(items, CustomizationItem{ + Kind: "spec.upstream-commit", + Value: spec.UpstreamCommit, + }) + } + + if spec.UpstreamName != "" && spec.UpstreamName != name { + items = append(items, CustomizationItem{ + Kind: "spec.upstream-name", + Value: spec.UpstreamName, + }) + } + + // Both Name and Version are real build inputs (only Snapshot carries + // fingerprint:"-"), so a version-only pin is a genuine customization. + if spec.UpstreamDistro.Name != "" || spec.UpstreamDistro.Version != "" { + items = append(items, CustomizationItem{ + Kind: "spec.upstream-distro", + Value: spec.UpstreamDistro.String(), + }) + } + + return items +} + +// appendReleaseItems flags non-default release-calculation modes. +func appendReleaseItems( + items []CustomizationItem, release projectconfig.ReleaseConfig, +) []CustomizationItem { + if release.Calculation == "" || release.Calculation == projectconfig.ReleaseCalculationAuto { + return items + } + + return append(items, CustomizationItem{ + Kind: "release.calculation", + Value: string(release.Calculation), + }) +} + +// appendPackageItems emits one item per binary package override. +func appendPackageItems( + items []CustomizationItem, packages map[string]projectconfig.PackageConfig, +) []CustomizationItem { + if len(packages) == 0 { + return items + } + + keys := make([]string, 0, len(packages)) + for key := range packages { + keys = append(keys, key) + } + + sort.Strings(keys) + + for _, key := range keys { + items = append(items, CustomizationItem{Kind: "packages", Value: key}) + } + + return items +} + +// appendSourceFileItems emits one item per declared source-file reference, +// plus a distinct item for the high-signal ReplaceUpstream toggle (which +// actively masks a same-named upstream source and would otherwise be hidden +// behind the plain filename entry). +func appendSourceFileItems( + items []CustomizationItem, sourceFiles []projectconfig.SourceFileReference, +) []CustomizationItem { + for _, sourceFile := range sourceFiles { + items = append(items, CustomizationItem{Kind: "source-files", Value: sourceFile.Filename}) + + if sourceFile.ReplaceUpstream { + items = append(items, CustomizationItem{ + Kind: "source-files.replace-upstream", + Value: sourceFile.Filename, + }) + } + } + + return items +} diff --git a/internal/app/azldev/cmds/component/history_gitmetrics.go b/internal/app/azldev/cmds/component/history_gitmetrics.go new file mode 100644 index 00000000..f89a6b5c --- /dev/null +++ b/internal/app/azldev/cmds/component/history_gitmetrics.go @@ -0,0 +1,387 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" + "github.com/microsoft/azure-linux-dev-tools/internal/lockfile" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/git" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/parmap" +) + +// historyContext holds resolved repo state shared across components. +type historyContext struct { + repoRoot string + lockDir string +} + +// newHistoryContext opens the project repository once just to resolve the +// worktree root, then discards it: go-git's *Repository is not safe to +// share across goroutines (see synthistory.go which always opens a fresh +// repo per component). Per-worker repos are reopened inline. +func newHistoryContext(env *azldev.Env) (*historyContext, error) { + cfg := env.Config() + if cfg == nil { + return nil, errors.New("no project configuration loaded") + } + + repo, err := git.OpenProjectRepo(env.ProjectDir()) + if err != nil { + return nil, fmt.Errorf("opening project repository:\n%w", err) + } + + worktree, err := repo.Worktree() + if err != nil { + return nil, fmt.Errorf("getting project worktree:\n%w", err) + } + + return &historyContext{ + repoRoot: worktree.Filesystem.Root(), + lockDir: cfg.Project.LockDir, + }, nil +} + +// countTomlSharing returns the number of components that point at each +// source TOML path. Used to detect shared files (where toml-commit counts +// are coarse). +func countTomlSharing(allComponents map[string]projectconfig.ComponentConfig) map[string]int { + sharing := make(map[string]int) + + for _, cfg := range allComponents { + if cfg.SourceConfigFile == nil { + continue + } + + path := cfg.SourceConfigFile.SourcePath() + if path == "" { + continue + } + + sharing[path]++ + } + + return sharing +} + +// tomlMetrics is one entry in the precomputed cache populated by +// [precomputeTomlMetrics]. Keyed by repo-relative TOML path. A non-nil err +// records a real `git log` failure so [populateTomlMetrics] can surface a +// warning, keeping it distinguishable from a genuine zero-commit history. +type tomlMetrics struct { + count int + latest time.Time + err error +} + +// precomputeTomlMetricsForStubs runs `git log` once per *unique* source- +// TOML path across the selected stubs and returns the results keyed by +// repo-relative path. This is the central performance optimization: in +// real projects (e.g., azurelinux) thousands of components share a single +// components.toml file, and without de-duplicating we'd re-run the same +// `git log` thousands of times. +// +// Paths that resolve outside the repo are skipped. A `git log` failure is +// cached as a per-path error (not a fatal one) so [populateTomlMetrics] can +// surface a warning, keeping it distinguishable from a genuine zero-commit +// history. +func precomputeTomlMetricsForStubs( + workerEnv *azldev.Env, + env *azldev.Env, + ctx *historyContext, + stubs []historyStub, +) (map[string]tomlMetrics, error) { + uniqueRelPaths := collectUniqueTomlRelPathsFromStubs(ctx.repoRoot, stubs) + if len(uniqueRelPaths) == 0 { + return map[string]tomlMetrics{}, nil + } + + progressEvent := env.StartEvent("Counting TOML commit history", "uniqueFiles", len(uniqueRelPaths)) + defer progressEvent.End() + + total := int64(len(uniqueRelPaths)) + + parmapResults := parmap.Map( + workerEnv, + env.IOBoundConcurrency(), + uniqueRelPaths, + func(done, _ int) { progressEvent.SetProgress(int64(done), total) }, + func(_ context.Context, relPath string) tomlMetrics { + count, latest, err := git.CountCommitsTouchingFile( //nolint:contextcheck // env carries the ctx + workerEnv, workerEnv, ctx.repoRoot, relPath, + ) + if err != nil { + // Cache the failure rather than failing the whole command -- + // populateTomlMetrics surfaces it as a warning so a real error + // (corrupt repo, permission denied) stays distinguishable from a + // genuine zero-commit history. + return tomlMetrics{err: err} + } + + return tomlMetrics{count: count, latest: latest} + }, + ) + + cache := make(map[string]tomlMetrics, len(uniqueRelPaths)) + + for idx, parmapRes := range parmapResults { + if parmapRes.Cancelled { + continue + } + + cache[uniqueRelPaths[idx]] = parmapRes.Value + } + + return cache, nil +} + +// collectUniqueTomlRelPathsFromStubs returns the deduplicated set of in- +// repo, repo-relative source-TOML paths across the given stubs. +func collectUniqueTomlRelPathsFromStubs(repoRoot string, stubs []historyStub) []string { + seen := make(map[string]struct{}) + + relPaths := make([]string, 0) + + for _, stub := range stubs { + config := stub.component.GetConfig() + if config.SourceConfigFile == nil { + continue + } + + absPath := config.SourceConfigFile.SourcePath() + if absPath == "" { + continue + } + + relPath, err := repoRelPath(repoRoot, absPath) + if err != nil { + continue + } + + if _, dup := seen[relPath]; dup { + continue + } + + seen[relPath] = struct{}{} + + relPaths = append(relPaths, relPath) + } + + return relPaths +} + +// buildHistoryResult assembles a single [HistoryResult] for a stub. The +// stub already carries the precomputed customization items; this function +// fills in the git-driven metrics (toml-commits via cache, fingerprint-changes via +// per-call repo). +func buildHistoryResult( + env *azldev.Env, + stub historyStub, + ctx *historyContext, + tomlSharing map[string]int, + tomlCache map[string]tomlMetrics, + sharedMode string, + explicit bool, +) HistoryResult { + result := HistoryResult{ + Name: stub.component.GetName(), + CustomizationItems: stub.customizationItems, + Customizations: len(stub.customizationItems), + } + + populateTomlMetrics(stub.component, ctx, tomlSharing, tomlCache, sharedMode, explicit, &result) + populateLockMetrics(env, stub.component, ctx, &result) + + return result +} + +// populateTomlMetrics fills in TomlCommits, SharedToml, TomlPath, +// LatestCommit from the precomputed [tomlMetrics] cache. +func populateTomlMetrics( + comp components.Component, + ctx *historyContext, + tomlSharing map[string]int, + tomlCache map[string]tomlMetrics, + sharedMode string, + explicit bool, + result *HistoryResult, +) { + config := comp.GetConfig() + + if config.SourceConfigFile == nil || config.SourceConfigFile.SourcePath() == "" { + return + } + + tomlAbsPath := config.SourceConfigFile.SourcePath() + result.SharedToml = tomlSharing[tomlAbsPath] > 1 + + tomlRelPath, err := repoRelPath(ctx.repoRoot, tomlAbsPath) + if err != nil { + // A TOML file outside the repo isn't a hard error -- record a + // warning and leave path/commit counts empty. + result.Warnings = append(result.Warnings, + fmt.Sprintf("source TOML %q is outside the git repository; toml-commits skipped: %v", + tomlAbsPath, err)) + + return + } + + result.TomlPath = tomlRelPath + + // --shared=omit suppresses the (coarse) count for shared TOMLs, but an + // explicitly-named component is the user asking for that component + // specifically -- give them the real count, mirroring the row-keep + // override in [ComponentHistory]. + if result.SharedToml && sharedMode == sharedTomlModeOmit && !explicit { + return + } + + metrics, ok := tomlCache[tomlRelPath] + if !ok { + // Precompute didn't run for this path (e.g., out-of-repo TOML + // or a precompute failure that was tolerated). Surface so the + // user can tell zero-counts apart from missing-data. + result.Warnings = append(result.Warnings, + fmt.Sprintf("no TOML commit metrics cached for %q; toml-commits left at zero", tomlRelPath)) + + return + } + + if metrics.err != nil { + // A real `git log` failure was cached during precompute. Surface it + // rather than silently reporting zero commits (mirrors the lock-path + // warning behavior). + result.Warnings = append(result.Warnings, + fmt.Sprintf("counting TOML commits for %q failed; toml-commits left at zero: %v", + tomlRelPath, metrics.err)) + + return + } + + result.TomlCommits = metrics.count + result.LatestCommit = metrics.latest +} + +// populateLockMetrics fills in FingerprintChanges, FingerprintChangeDetails, +// HasLock, HasImport, ManualBump. +// A missing lock file is "no data", not an error; a genuine read failure +// (corrupt/unparseable lock) is surfaced via result.Warnings so a +// tomlCommits/fingerprintChanges of 0 can't be silently confused with a +// real failure. +// +// FingerprintChangeDetails is always populated here; the caller strips it +// when more than one component is reported. See [ComponentHistory] for the +// rationale. +func populateLockMetrics( + env *azldev.Env, + comp components.Component, + ctx *historyContext, + result *HistoryResult, +) { + name := comp.GetName() + + lockReader := env.LockReader() + if lockReader != nil { + lock, lockErr := lockReader.Get(name) + + switch { + case lockErr == nil && lock != nil: + result.HasLock = true + result.HasImport = lock.ImportCommit != "" + result.ManualBump = lock.ManualBump + case lockErr != nil: + // Distinguish a missing lock ("no data", expected) from a real + // read failure (corrupt/unparseable lock). Mirror the store's + // own not-found detection (Exists) since the wrapped fs error + // isn't reliably errors.Is(os.ErrNotExist)-comparable. Only a + // genuine failure earns a warning, so a fingerprintChanges of 0 + // can't be silently confused with a load error. + exists, existsErr := lockReader.Exists(name) + switch { + case existsErr != nil: + result.Warnings = append(result.Warnings, + fmt.Sprintf("reading lock file for %q: %v (existence check also failed: %v)", + name, lockErr, existsErr)) + case exists: + result.Warnings = append(result.Warnings, + fmt.Sprintf("reading lock file for %q: %v", name, lockErr)) + } + } + } + + lockAbsPath, err := lockfile.LockPath(ctx.lockDir, name) + if err != nil { + // Invalid component name for path resolution: skip lock metrics + // rather than failing the whole report. + result.Warnings = append(result.Warnings, + fmt.Sprintf("resolving lock path: %v", err)) + + return + } + + lockRelPath, err := repoRelPath(ctx.repoRoot, lockAbsPath) + if err != nil { + // Lock dir lives outside the repo: nothing to count. + result.Warnings = append(result.Warnings, + fmt.Sprintf("lock file %q is outside the git repository; fingerprint-changes skipped: %v", + lockAbsPath, err)) + + return + } + + fingerprintChanges, err := func() ([]sources.FingerprintChange, error) { + // Open a fresh repo for this call -- go-git's *Repository is not + // safe for concurrent use. Opening is cheap (just reads .git/config). + repo, openErr := git.OpenProjectRepo(env.ProjectDir()) + if openErr != nil { + return nil, fmt.Errorf("opening project repository:\n%w", openErr) + } + + return sources.FindFingerprintChanges(env.Context(), env, repo, ctx.repoRoot, lockRelPath) + }() + if err != nil { + // A lock file with no committed history is NOT an error here -- + // FindFingerprintChanges returns (nil, nil) in that case. This + // branch only fires on real failures (git open, blob read, etc.). + result.Warnings = append(result.Warnings, + fmt.Sprintf("computing fingerprint changes for %q: %v", lockRelPath, err)) + + return + } + + result.FingerprintChanges = len(fingerprintChanges) + result.FingerprintChangeDetails = toFingerprintChanges(fingerprintChanges) +} + +// toFingerprintChanges copies each [sources.FingerprintChange] into the +// local [FingerprintChange] wire type by naming every field explicitly. +// Removing a field from [sources.FingerprintChange] or +// [sources.CommitMetadata] trips a compile error here, alerting us to a +// quietly-shrunk changelog payload. +func toFingerprintChanges(changes []sources.FingerprintChange) []FingerprintChange { + if len(changes) == 0 { + return nil + } + + out := make([]FingerprintChange, len(changes)) + for i, change := range changes { + out[i] = FingerprintChange{ + Hash: change.Hash, + Author: change.Author, + AuthorEmail: change.AuthorEmail, + Timestamp: change.Timestamp, + Message: change.Message, + UpstreamCommit: change.UpstreamCommit, + } + } + + return out +} diff --git a/internal/app/azldev/cmds/component/history_internal_test.go b/internal/app/azldev/cmds/component/history_internal_test.go new file mode 100644 index 00000000..3504f390 --- /dev/null +++ b/internal/app/azldev/cmds/component/history_internal_test.go @@ -0,0 +1,327 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "reflect" + "strings" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/stretchr/testify/assert" +) + +// TestHasExplicitComponentSelection pins the NEW-1 fix: only an exact name or +// spec path is "explicit". A glob pattern selects broadly and must not defeat +// --include-bare / --shared=omit (it carries no more intent than -a). +func TestHasExplicitComponentSelection(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + filter components.ComponentFilter + want bool + }{ + {"exact name", components.ComponentFilter{ComponentNamePatterns: []string{"curl"}}, true}, + {"spec path", components.ComponentFilter{SpecPaths: []string{"specs/curl/curl.spec"}}, true}, + {"star glob", components.ComponentFilter{ComponentNamePatterns: []string{"*"}}, false}, + {"prefix glob", components.ComponentFilter{ComponentNamePatterns: []string{"lib*"}}, false}, + {"char-class glob", components.ComponentFilter{ComponentNamePatterns: []string{"cur[lp]"}}, false}, + {"question glob", components.ComponentFilter{ComponentNamePatterns: []string{"cur?"}}, false}, + {"glob plus exact", components.ComponentFilter{ComponentNamePatterns: []string{"*", "curl"}}, true}, + {"nothing", components.ComponentFilter{}, false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.want, hasExplicitComponentSelection(&tc.filter)) + }) + } +} + +// TestCustomizationCollectorsCoverEveryFingerprintableField pins the +// "customization vs upstream" split to the existing fingerprint:"-" +// taxonomy: a field counts as a customization iff it contributes to the +// component's input fingerprint (i.e., it changes what we ship). +// +// The collector layer in [collectCustomizations] is hand-written so it can +// produce nice human-readable Kind/Value/Description entries per field. +// This test enforces that every fingerprint-relevant field of +// [projectconfig.ComponentConfig] (and its directly walked sub-structs) +// has been consciously categorized in [expectedCovered]. When a new field +// is added to one of these structs, this test forces a choice: +// +// - Tag it `fingerprint:"-"` (declaring it operational metadata such as +// publish channels, build hints, or maintenance markers). The +// fingerprint test in projectconfig already enforces tag presence. +// - Add it here AND wire up a collector in [collectCustomizations]. +// +// Structs whose fingerprintable fields are surfaced *wholesale* by a +// single collector ([projectconfig.ComponentOverlay], +// [projectconfig.PackageConfig]) are intentionally NOT walked here -- only +// their parent ComponentConfig field appears in expectedCovered. +// Field-level drift inside those opaque units is still caught by the +// fingerprint exhaustiveness test in projectconfig, at which point a +// human reviewer decides whether richer per-field surfacing belongs in +// history too. +func TestCustomizationCollectorsCoverEveryFingerprintableField(t *testing.T) { + t.Parallel() + + // Structs walked here are those we want field-level drift detection on, + // so adding a fingerprintable field forces a conscious decision about + // surfacing it. A walked field need not map to a *distinct* Kind: + // DistroReference's two fields both fold into spec.upstream-distro, and + // SourceFileReference's Hash/HashType fold into the file's entry. Adding + // a new sub-struct that should get this scrutiny means adding it here. + walkedStructs := []reflect.Type{ + reflect.TypeFor[projectconfig.ComponentConfig](), + reflect.TypeFor[projectconfig.ComponentBuildConfig](), + reflect.TypeFor[projectconfig.CheckConfig](), + reflect.TypeFor[projectconfig.SpecSource](), + reflect.TypeFor[projectconfig.DistroReference](), + reflect.TypeFor[projectconfig.ReleaseConfig](), + reflect.TypeFor[projectconfig.ComponentRenderConfig](), + reflect.TypeFor[projectconfig.SourceFileReference](), + } + + // Maps "StructName.FieldName" -> short note describing how the field + // surfaces in `component history` output. Every fingerprint-relevant + // (i.e., NOT `fingerprint:"-"`) field in walkedStructs must appear here. + expectedCovered := map[string]string{ + // ComponentConfig -- top-level fields dispatch to sub-collectors + // or are treated as opaque-unit collections. + "ComponentConfig.Spec": "appendSpecItems (per-field via SpecSource walk)", + "ComponentConfig.Release": "appendReleaseItems (per-field via ReleaseConfig walk)", + "ComponentConfig.Overlays": "appendOverlayItems (opaque unit per overlay)", + "ComponentConfig.Build": "appendBuildItems (per-field via ComponentBuildConfig walk)", + "ComponentConfig.Render": "appendRenderItems (per-field via ComponentRenderConfig walk)", + "ComponentConfig.SourceFiles": "appendSourceFileItems (opaque unit per source file)", + "ComponentConfig.Packages": "appendPackageItems (opaque unit per package override)", + + // ComponentBuildConfig. + "ComponentBuildConfig.With": "build.with", + "ComponentBuildConfig.Without": "build.without", + "ComponentBuildConfig.Defines": "build.defines", + "ComponentBuildConfig.Undefines": "build.undefines", + "ComponentBuildConfig.Check": "delegates to CheckConfig walk", + + // CheckConfig. + "CheckConfig.Skip": "build.check.skip", + + // SpecSource. + "SpecSource.SourceType": "spec.source-type", + "SpecSource.UpstreamDistro": "spec.upstream-distro", + "SpecSource.UpstreamName": "spec.upstream-name (only when distinct from component name)", + "SpecSource.UpstreamCommit": "spec.upstream-commit", + + // DistroReference -- both fields fold into the single spec.upstream-distro + // item emitted by appendSpecItems (DistroReference.String()). + "DistroReference.Name": "spec.upstream-distro", + "DistroReference.Version": "spec.upstream-distro", + + // ReleaseConfig. + "ReleaseConfig.Calculation": "release.calculation (only when non-auto)", + + // ComponentRenderConfig. + "ComponentRenderConfig.SkipFileFilter": "render.skip-file-filter", + + // SourceFileReference -- Filename and the ReplaceUpstream toggle each get + // their own Kind. Hash/HashType are deliberately NOT emitted as output: + // the file's *presence* is the customization signal, and a checksum-only + // change is still caught by toml-commits / fingerprint-changes. + "SourceFileReference.Filename": "source-files", + "SourceFileReference.Hash": "not emitted (checksum change caught via toml-commits/fingerprint)", + "SourceFileReference.HashType": "not emitted (ditto Hash)", + "SourceFileReference.ReplaceUpstream": "source-files.replace-upstream", + } + + actualFields := make(map[string]bool) + + for _, st := range walkedStructs { + for i := range st.NumField() { + field := st.Field(i) + key := st.Name() + "." + field.Name + + // Fields excluded from the fingerprint are operational + // metadata (publish channels, build hints, maintenance + // markers, etc.), not modifications to upstream. Skip them. + if field.Tag.Get("fingerprint") == "-" { + continue + } + + actualFields[key] = true + + _, ok := expectedCovered[key] + assert.Truef(t, ok, + "field %q is fingerprint-relevant but has no entry in expectedCovered. "+ + "Either tag it `fingerprint:\"-\"` (operational metadata) or add it "+ + "to expectedCovered AND wire a collector in collectCustomizations.", key) + } + } + + // Reverse: no stale entries left after a field was removed or + // re-tagged `fingerprint:"-"`. + for key := range expectedCovered { + assert.Truef(t, actualFields[key], + "expectedCovered entry %q does not correspond to a fingerprint-relevant "+ + "field. Was the field removed, renamed, or tagged `fingerprint:\"-\"`?", key) + } +} + +// TestCollectCustomizationsEmitsEveryKind complements the reflection-based +// coverage test above: that test proves every fingerprintable field is +// *categorized*, this one proves the collectors are actually *wired* by +// invoking collectCustomizations on a config with every customizable field +// populated and asserting each expected Kind appears. Deleting a collector +// call or emptying a collector body turns this red (the reflection test +// alone would stay green). +func TestCollectCustomizationsEmitsEveryKind(t *testing.T) { + t.Parallel() + + config := projectconfig.ComponentConfig{ + Overlays: []projectconfig.ComponentOverlay{ + {Type: projectconfig.ComponentOverlayAddSpecTag, Tag: "Release", Value: "1"}, + }, + Build: projectconfig.ComponentBuildConfig{ + With: []string{"feature"}, + Without: []string{"docs"}, + Defines: map[string]string{"macro": "value"}, + Undefines: []string{"othermacro"}, + Check: projectconfig.CheckConfig{Skip: true, SkipReason: "flaky"}, + }, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + UpstreamName: "different-name", + UpstreamCommit: "abc1234", + UpstreamDistro: projectconfig.DistroReference{Name: "fedora", Version: "43"}, + }, + Release: projectconfig.ReleaseConfig{ + Calculation: projectconfig.ReleaseCalculationAutorelease, + }, + Render: projectconfig.ComponentRenderConfig{SkipFileFilter: true}, + Packages: map[string]projectconfig.PackageConfig{ + "libfoo": {}, + }, + SourceFiles: []projectconfig.SourceFileReference{ + {Filename: "extra.tar.gz", ReplaceUpstream: true, ReplaceReason: "vendored fix"}, + }, + } + + wantKinds := []string{ + "spec-add-tag", + "build.with", + "build.without", + "build.defines", + "build.undefines", + "build.check.skip", + "spec.source-type", + "spec.upstream-commit", + "spec.upstream-name", + "spec.upstream-distro", + "release.calculation", + "render.skip-file-filter", + "packages", + "source-files", + "source-files.replace-upstream", + } + + items := collectCustomizations("comp", &config) + + gotKinds := make(map[string]bool, len(items)) + for _, item := range items { + gotKinds[item.Kind] = true + } + + for _, kind := range wantKinds { + assert.Truef(t, gotKinds[kind], + "collectCustomizations did not emit an item of Kind %q; "+ + "a collector for it may be unwired or its trigger condition wrong", kind) + } +} + +// TestFingerprintChangeDTOMirrorsSource guards the direction the explicit +// field-by-field copy in [toFingerprintChanges] cannot: a NEW field added to +// [sources.FingerprintChange] / [sources.CommitMetadata] would compile fine +// but silently never reach JSON consumers. This asserts the local DTO carries +// a field of the same type for every exported source field (matched by name), +// so a field addition OR a type change (e.g. int64->int32) trips the test. +func TestFingerprintChangeDTOMirrorsSource(t *testing.T) { + t.Parallel() + + dtoFields := exportedFieldTypes(reflect.TypeFor[FingerprintChange]()) + + for name, srcType := range exportedFieldTypes(reflect.TypeFor[sources.FingerprintChange]()) { + dtoType, ok := dtoFields[name] + if !assert.Truef(t, ok, + "sources.FingerprintChange field %q has no counterpart in the local "+ + "FingerprintChange DTO; add it (and to toFingerprintChanges) so it "+ + "reaches JSON consumers, or it is silently dropped.", name) { + continue + } + + assert.Equalf(t, srcType, dtoType, + "FingerprintChange DTO field %q has type %s but sources.FingerprintChange "+ + "has %s; the explicit copy in toFingerprintChanges would silently "+ + "narrow or mistype the value.", name, dtoType, srcType) + } +} + +// TestRenderCardViewFingerprintHint pins the N6 fix: the single-component +// card omits the per-commit FingerprintChangeDetails (to stay scannable) but +// must point the user at -O json whenever fingerprint changes exist, so the +// details aren't a silent dead end. +func TestRenderCardViewFingerprintHint(t *testing.T) { + t.Parallel() + + var withChanges strings.Builder + + renderCardView(&withChanges, HistoryResult{ + Name: "curl", + TomlPath: "azldev.toml", + TomlCommits: 3, + Customizations: 2, + FingerprintChanges: 2, + }) + + out := withChanges.String() + assert.Contains(t, out, "Component: curl") + assert.Contains(t, out, "FP changes: 2") + assert.Contains(t, out, "-O json", + "card should point at -O json when fingerprint changes exist") + + var noChanges strings.Builder + + renderCardView(&noChanges, HistoryResult{Name: "bash"}) + + assert.NotContains(t, noChanges.String(), "-O json", + "no fingerprint changes means no -O json hint") +} + +// exportedFieldTypes returns the exported fields of a struct type keyed by +// name -> type, flattening anonymously-embedded structs (e.g. CommitMetadata) +// into the parent's namespace. +func exportedFieldTypes(t reflect.Type) map[string]reflect.Type { + types := make(map[string]reflect.Type) + + for i := range t.NumField() { + field := t.Field(i) + + if field.Anonymous && field.Type.Kind() == reflect.Struct { + for name, typ := range exportedFieldTypes(field.Type) { + types[name] = typ + } + + continue + } + + if field.IsExported() { + types[field.Name] = field.Type + } + } + + return types +} diff --git a/internal/app/azldev/cmds/component/history_render.go b/internal/app/azldev/cmds/component/history_render.go new file mode 100644 index 00000000..55e9030e --- /dev/null +++ b/internal/app/azldev/cmds/component/history_render.go @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "fmt" + "io" + "sort" + "time" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" +) + +// sortHistoryResults orders results "most-customized first": highest +// customization count first, then fingerprint-changes, then alphabetical by name. +// Customizations is the most direct signal of human attention paid to a +// component (and it's deterministic / fast); fingerprint-changes and the name +// tie-break it for stable output. +func sortHistoryResults(results []HistoryResult) { + sort.SliceStable(results, func(left, right int) bool { + if results[left].Customizations != results[right].Customizations { + return results[left].Customizations > results[right].Customizations + } + + if results[left].FingerprintChanges != results[right].FingerprintChanges { + return results[left].FingerprintChanges > results[right].FingerprintChanges + } + + return results[left].Name < results[right].Name + }) +} + +// shouldRenderCardView decides whether to print the per-component "card" +// view instead of falling through to the default table renderer. We only +// switch to the card for exactly one result and only for the plain table +// format; markdown falls through to the reflectable renderer so a +// `-O markdown` consumer gets real markdown structure, and JSON / CSV +// consumers always get the machine-readable slice. +func shouldRenderCardView(env *azldev.Env, results []HistoryResult) bool { + if len(results) != 1 { + return false + } + + switch env.DefaultReportFormat() { + case azldev.ReportFormatTable: + return true + case azldev.ReportFormatCSV, azldev.ReportFormatJSON, azldev.ReportFormatMarkdown: + return false + default: + return false + } +} + +// renderCardView prints a single-component card view: a vertical key/value +// header followed by an indented list of customization items (with their +// descriptions when present). This is what the user sees from +// `azldev component history ` and is intended to be the most useful +// view for hand-picking entries to document. +func renderCardView(writer io.Writer, result HistoryResult) { + fmt.Fprintf(writer, "Component: %s\n", result.Name) + + if result.TomlPath != "" { + fmt.Fprintf(writer, " Source TOML: %s\n", result.TomlPath) + } + + sharedNote := "" + if result.SharedToml { + sharedNote = " (shared file -- count is coarse)" + } + + latestNote := "" + if !result.LatestCommit.IsZero() { + // Render in UTC so the same commit shows the same date regardless of + // the host's local timezone. + latestNote = ", latest " + result.LatestCommit.UTC().Format(time.DateOnly) + } + + fmt.Fprintf(writer, " TOML commits: %d%s%s\n", result.TomlCommits, sharedNote, latestNote) + fmt.Fprintf(writer, " Customizations: %d\n", result.Customizations) + fmt.Fprintf(writer, " FP changes: %d\n", result.FingerprintChanges) + + // The per-commit FingerprintChangeDetails are populated for a single + // surviving component but omitted from the card to keep it scannable; + // point the user at -O json so the changelog records aren't a dead end. + if result.FingerprintChanges > 0 { + fmt.Fprintln(writer, " (run with -O json for per-commit details)") + } + + if result.HasLock { + fmt.Fprintf(writer, + " Lock state: locked (manual-bump=%d, has-import=%t)\n", + result.ManualBump, result.HasImport) + } else { + fmt.Fprintln(writer, " Lock state: no lock") + } + + if len(result.Warnings) > 0 { + fmt.Fprintln(writer) + fmt.Fprintln(writer, "Warnings:") + + for _, warning := range result.Warnings { + fmt.Fprintf(writer, " - %s\n", warning) + } + } + + if len(result.CustomizationItems) == 0 { + return + } + + fmt.Fprintln(writer) + fmt.Fprintln(writer, "Customizations:") + + for idx, item := range result.CustomizationItems { + value := item.Value + if value == "" { + value = "(no value)" + } + + fmt.Fprintf(writer, " %d. [%s] %s\n", idx+1, item.Kind, value) + + if item.Description != "" { + fmt.Fprintf(writer, " %s\n", item.Description) + } + } +} diff --git a/internal/app/azldev/command.go b/internal/app/azldev/command.go index ab40fed0..1880b332 100644 --- a/internal/app/azldev/command.go +++ b/internal/app/azldev/command.go @@ -234,6 +234,14 @@ func createReflectableOptions(env *Env, format reflectable.Format) *reflectable. // Displays the results of a command to stdout in JSON format. func reportResultsAsJSON(env *Env, results interface{}) error { + // Mirror reportResultsViaReflectable: a nil/bool sentinel means "nothing + // to report" (e.g. a command that already rendered its own output and + // returns true to suppress the framework's). Without this guard such a + // value would marshal to a literal `true`/`false`/`null`. + if results == nil || results == true || results == false { + return nil + } + // Normalize a typed-nil slice/map to an empty one so it marshals as `[]` // or `{}` rather than `null`. This keeps JSON output friendly for // downstream pipelines (e.g., `jq '.[]'` and `jq 'keys'` work whether or diff --git a/internal/utils/git/log.go b/internal/utils/git/log.go new file mode 100644 index 00000000..2f1257e4 --- /dev/null +++ b/internal/utils/git/log.go @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package git + +import ( + "context" + "fmt" + "strconv" + "strings" + "time" + + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" +) + +// CountCommitsTouchingFile returns the number of commits that touched relPath +// in the git repository rooted at repoDir, plus the timestamp of the most +// recent such commit (zero when no commits found). +// +// The returned timestamp is on the committer-date axis: we format with %ct so +// it matches the order 'git log' walks (newest committer-date first). +// +// Shells out to 'git log -- ' because go-git's PathFilter walks the +// entire commit graph in-process and is prohibitively slow on large repos +// (see the commentary on gitLogFileMetadata in +// internal/app/azldev/core/sources/synthistory.go). +func CountCommitsTouchingFile( + ctx context.Context, + cmdFactory opctx.CmdFactory, + repoDir, relPath string, +) (count int, latest time.Time, err error) { + args := []string{"log", "--format=%ct"} + + args = append(args, "--", relPath) + + output, err := RunInDir(ctx, cmdFactory, repoDir, args...) + if err != nil { + return 0, time.Time{}, fmt.Errorf("listing commits for %#q:\n%w", relPath, err) + } + + if output == "" { + return 0, time.Time{}, nil + } + + lines := strings.Split(output, "\n") + + // 'git log' emits newest-first; the first line is the latest commit. + unixSeconds, err := strconv.ParseInt(strings.TrimSpace(lines[0]), 10, 64) + if err != nil { + return 0, time.Time{}, fmt.Errorf("parsing commit timestamp %#q:\n%w", lines[0], err) + } + + // Normalize to UTC so the timestamp serializes identically regardless of + // the host's local timezone (time.Unix defaults to Location: Local, which + // would otherwise make JSON output non-reproducible across machines). + return len(lines), time.Unix(unixSeconds, 0).UTC(), nil +} diff --git a/scenario/__snapshots__/TestMCPServerMode_1.snap.json b/scenario/__snapshots__/TestMCPServerMode_1.snap.json index 356765c2..0972acfe 100755 --- a/scenario/__snapshots__/TestMCPServerMode_1.snap.json +++ b/scenario/__snapshots__/TestMCPServerMode_1.snap.json @@ -200,6 +200,101 @@ }, "name": "component-diff-sources" }, + { + "annotations": { + "destructiveHint": true, + "idempotentHint": false, + "openWorldHint": true, + "readOnlyHint": false + }, + "description": "Report per-component change activity and customization detail", + "inputSchema": { + "properties": { + "accept-all": { + "default": false, + "description": "accept all prompts", + "type": "boolean" + }, + "all-components": { + "default": false, + "description": "Include all components", + "type": "boolean" + }, + "color": { + "description": "output colorization mode {always, auto, never}", + "type": "string" + }, + "component": { + "description": "Component name pattern", + "type": "string" + }, + "component-group": { + "description": "Component group name", + "type": "string" + }, + "config-file": { + "description": "additional TOML config file(s) to merge (may be repeated)", + "type": "string" + }, + "dry-run": { + "default": false, + "description": "dry run only (do not take action)", + "type": "boolean" + }, + "include-bare": { + "default": false, + "description": "Include components with zero customizations in the output. By default they are hidden -- their config inherits everything from defaults, and computing their git metrics is the dominant cost on large projects.", + "type": "boolean" + }, + "network-retries": { + "default": 3, + "description": "maximum number of attempts for network operations (minimum 1)", + "type": "number" + }, + "no-default-config": { + "default": false, + "description": "disable default configuration", + "type": "boolean" + }, + "output-format": { + "description": "output format {csv, json, markdown, table}", + "type": "string" + }, + "permissive-config": { + "default": false, + "description": "do not fail on unknown fields in TOML config files", + "type": "boolean" + }, + "project": { + "default": "", + "description": "path to Azure Linux project", + "type": "string" + }, + "quiet": { + "default": false, + "description": "only enable minimal output", + "type": "boolean" + }, + "shared": { + "default": "show", + "description": "How to report rows for components that share a TOML file with others: show (keep row, count is coarse), omit (drop row).", + "type": "string" + }, + "spec-path": { + "description": "Spec path", + "type": "string" + }, + "verbose": { + "default": false, + "description": "enable verbose output", + "type": "boolean" + } + }, + "required": [], + "type": "object" + }, + "name": "component-history" + }, { "annotations": { "destructiveHint": true, diff --git a/scenario/component_history_test.go b/scenario/component_history_test.go new file mode 100644 index 00000000..5d584771 --- /dev/null +++ b/scenario/component_history_test.go @@ -0,0 +1,218 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//go:build scenario + +package scenario_tests + +import ( + "encoding/json" + "os/exec" + "path/filepath" + "testing" + + componentcmds "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/component" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/scenario/internal/projecttest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// runHistory runs `azldev component history` with the given args and returns +// parsed JSON results. Fails the test on any error. Decodes into the real +// [componentcmds.HistoryResult] so the test stays in sync with the command's +// schema automatically. +func runHistory(t *testing.T, azldevBin, projectDir string, extraArgs ...string) []componentcmds.HistoryResult { + t.Helper() + + args := []string{"-C", projectDir, "--no-default-config", "component", "history"} + args = append(args, extraArgs...) + args = append(args, "-q", "-O", "json") + + cmd := exec.CommandContext(t.Context(), azldevBin, args...) + out, err := cmd.CombinedOutput() + require.NoError(t, err, "azldev failed: %s", string(out)) + + var results []componentcmds.HistoryResult + require.NoError(t, json.Unmarshal(out, &results), "failed to parse JSON: %s", string(out)) + + return results +} + +// historyMap converts a slice of [componentcmds.HistoryResult] into a map keyed +// by component name. +func historyMap(results []componentcmds.HistoryResult) map[string]componentcmds.HistoryResult { + m := make(map[string]componentcmds.HistoryResult, len(results)) + for _, r := range results { + m[r.Name] = r + } + + return m +} + +// TestComponentHistory_Smoke exercises the `azldev component history` command +// end-to-end with a real git repository, verifying that: +// - customized components are reported with their customization count and items, +// - bare components are excluded by default, +// - `--include-bare` brings the bare components back into the output. +// +// This is a smoke test — it doesn't validate every metric, just that the command +// runs, emits valid JSON, and respects the most common filtering flag. +func TestComponentHistory_Smoke(t *testing.T) { + t.Parallel() + + if testing.Short() { + t.Skip("skipping long test") + } + + azldevBin, projectDir := setupProjectWithGit(t, + []*projecttest.TestSpec{ + projecttest.NewSpec( + projecttest.WithName("curl"), + projecttest.WithVersion("8.0.0"), + projecttest.WithRelease("1%{?dist}"), + projecttest.WithBuildArch(projecttest.NoArch), + ), + }, + []*projectconfig.ComponentConfig{ + { + // curl has explicit customizations so it should appear by default. + Name: "curl", + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: filepath.Join("specs", "curl", "curl.spec"), + }, + Build: projectconfig.ComponentBuildConfig{ + With: []string{"feature-a"}, + }, + }, + { + // bash is truly bare: no Spec, no Build, no anything. The + // collectors emit zero items so it gets filtered out unless + // --include-bare is passed. + Name: "bash", + }, + }, + nil, + ) + + // Commit everything so the project has a git history for toml-commit counting. + gitInDir(t, projectDir, "add", ".") + gitInDir(t, projectDir, "-c", "commit.gpgsign=false", "commit", "-m", "initial") + + // Seed two commits to curl's lock file with distinct fingerprints so the + // fp-change details path has something to report. + writeFileInDir(t, projectDir, "locks/curl.lock", + `version = 1`+"\n"+`input-fingerprint = "sha256:curl-v1"`+"\n") + gitInDir(t, projectDir, "add", "locks/curl.lock") + gitInDir(t, projectDir, "-c", "commit.gpgsign=false", "commit", "-m", "curl: initial lock") + + writeFileInDir(t, projectDir, "locks/curl.lock", + `version = 1`+"\n"+`input-fingerprint = "sha256:curl-v2"`+"\n") + gitInDir(t, projectDir, "add", "locks/curl.lock") + gitInDir(t, projectDir, "-c", "commit.gpgsign=false", "commit", "-m", "curl: bump fingerprint") + + // Default run: bare components are filtered out. + results := runHistory(t, azldevBin, projectDir, "-a") + rm := historyMap(results) + + require.Contains(t, rm, "curl", "customized component should be reported by default") + require.NotContains(t, rm, "bash", "bare component should be filtered out by default") + + curl := rm["curl"] + // curl's config sets exactly two fingerprintable fields: Build.With and the + // explicit Spec.SourceType=local. Pin the exact count and Kinds so a + // regression in either collector (or an accidental extra emission) is caught + // rather than masked by a loose >= comparison. + assert.Equal(t, 2, curl.Customizations, "curl should have exactly two customizations") + assert.ElementsMatch(t, []string{"build.with", "spec.source-type"}, customizationKinds(curl), + "curl's customization Kinds should be exactly build.with and spec.source-type") + assert.NotEmpty(t, curl.TomlPath, "curl's source TOML path should be populated") + // Both components are defined in the single azldev.toml, so it is shared and + // the one initial commit that created it is the only one touching it. + assert.True(t, curl.SharedToml, "curl shares azldev.toml with bash") + assert.Equal(t, 1, curl.TomlCommits, "only the initial commit touched the shared azldev.toml") + + // Fingerprint-change details: should include both lock commits with full + // author / message metadata sourced from the synthetic-distgit + // FingerprintChange type via the local DTO copy. + require.Equal(t, 2, curl.FingerprintChanges, "expected two fingerprint changes") + require.Len(t, curl.FingerprintChangeDetails, curl.FingerprintChanges, + "FingerprintChangeDetails length must match FingerprintChanges count") + + for i, change := range curl.FingerprintChangeDetails { + assert.NotEmpty(t, change.Hash, "change[%d].Hash should be populated", i) + assert.NotEmpty(t, change.Author, "change[%d].Author should be populated", i) + assert.NotEmpty(t, change.Message, "change[%d].Message should be populated", i) + assert.Positive(t, change.Timestamp, "change[%d].Timestamp should be populated", i) + } + + // With --include-bare both components show up. With more than one result, + // FingerprintChangeDetails is suppressed in JSON output to keep responses + // bounded on -a runs (count is still populated). + results = runHistory(t, azldevBin, projectDir, "-a", "--include-bare") + rm = historyMap(results) + + require.Contains(t, rm, "curl", "customized component should still be reported with --include-bare") + require.Contains(t, rm, "bash", "bare component should be reported with --include-bare") + assert.Equal(t, 0, rm["bash"].Customizations, "bash has no customizations") + assert.Equal(t, 2, rm["curl"].FingerprintChanges, + "FingerprintChanges count should still be populated on multi-result runs") + assert.Nil(t, rm["curl"].FingerprintChangeDetails, + "FingerprintChangeDetails should be suppressed when more than one component is reported") + + // Explicit single-component query for a bare component: --include-bare + // is force-disabled so the user gets the row they asked for. + results = runHistory(t, azldevBin, projectDir, "bash") + rm = historyMap(results) + + require.Contains(t, rm, "bash", + "explicit positional name should override --include-bare and return the row") + assert.Equal(t, 0, rm["bash"].Customizations) + + // Explicit single-component query for curl: even though curl shares its TOML, + // being the only surviving row means FingerprintChangeDetails is retained + // (the multi-result suppression only kicks in with >1 row). + results = runHistory(t, azldevBin, projectDir, "curl") + rm = historyMap(results) + require.Contains(t, rm, "curl") + require.Len(t, results, 1, "explicit single-component query returns exactly one row") + assert.Len(t, rm["curl"].FingerprintChangeDetails, 2, + "single surviving row retains its FingerprintChangeDetails") + + // --shared=omit without an explicit selection drops shared-TOML rows. Both + // curl and bash live in the shared azldev.toml, so the omit run is empty. + results = runHistory(t, azldevBin, projectDir, "-a", "--include-bare", "--shared=omit") + rm = historyMap(results) + assert.NotContains(t, rm, "curl", "--shared=omit drops shared-TOML rows without explicit selection") + assert.NotContains(t, rm, "bash", "--shared=omit drops shared-TOML rows without explicit selection") + + // An explicit positional selection overrides --shared=omit: the user asked + // for curl by name, so they get it back even though its TOML is shared -- + // and with the real toml-commit count, not a suppressed zero (N3). + results = runHistory(t, azldevBin, projectDir, "curl", "--shared=omit") + rm = historyMap(results) + require.Contains(t, rm, "curl", + "explicit selection overrides --shared=omit") + assert.Equal(t, 1, rm["curl"].TomlCommits, + "explicit --shared=omit survivor keeps its real count, not a suppressed zero") +} + +// customizationKinds returns the set of CustomizationItem Kinds in a result, +// deduplicated, for order-independent assertions. +func customizationKinds(r componentcmds.HistoryResult) []string { + seen := make(map[string]bool, len(r.CustomizationItems)) + kinds := make([]string, 0, len(r.CustomizationItems)) + + for _, item := range r.CustomizationItems { + if seen[item.Kind] { + continue + } + + seen[item.Kind] = true + + kinds = append(kinds, item.Kind) + } + + return kinds +}