diff --git a/docs/user/reference/cli/azldev.md b/docs/user/reference/cli/azldev.md index 00c8b4db..1a7d9ce9 100644 --- a/docs/user/reference/cli/azldev.md +++ b/docs/user/reference/cli/azldev.md @@ -41,5 +41,6 @@ lives), or use -C to point to one. * [azldev image](azldev_image.md) - Manage Azure Linux images * [azldev package](azldev_package.md) - Manage binary package configuration * [azldev project](azldev_project.md) - Manage Azure Linux projects +* [azldev repo](azldev_repo.md) - Query published RPM repositories * [azldev version](azldev_version.md) - Print the CLI version diff --git a/docs/user/reference/cli/azldev_component.md b/docs/user/reference/cli/azldev_component.md index 0516a5e8..2ba04107 100644 --- a/docs/user/reference/cli/azldev_component.md +++ b/docs/user/reference/cli/azldev_component.md @@ -44,7 +44,7 @@ components defined in the project configuration. * [azldev component diff-sources](azldev_component_diff-sources.md) - Show the diff that overlays apply to a component's sources * [azldev component list](azldev_component_list.md) - List components in this project * [azldev component prepare-sources](azldev_component_prepare-sources.md) - Prepare buildable sources for components -* [azldev component query](azldev_component_query.md) - Query info for components in this project +* [azldev component query](azldev_component_query.md) - Query info from locally rendered component specs * [azldev component render](azldev_component_render.md) - Render post-overlay specs and sidecar files to a checked-in directory * [azldev component update](azldev_component_update.md) - Resolve and lock source identities for components diff --git a/docs/user/reference/cli/azldev_component_query.md b/docs/user/reference/cli/azldev_component_query.md index 03a49fe1..688fdc35 100644 --- a/docs/user/reference/cli/azldev_component_query.md +++ b/docs/user/reference/cli/azldev_component_query.md @@ -2,16 +2,21 @@ ## azldev component query -Query info for components in this project +Query info from locally rendered component specs ### Synopsis -Query detailed information for components by fetching and parsing their spec files. +Query detailed information for components from their locally rendered specs. -Unlike 'list', which only shows configuration metadata, 'query' resolves -upstream sources and parses the RPM spec to report version, release, -subpackages, dependencies, and other spec-level details. This makes it -slower than 'list' but more informative. +This command reads the post-overlay specs from the project's rendered-specs-dir +(produced by 'azldev component render') and runs rpmspec against them in a +single shared mock chroot, batching all specs into one chroot invocation with +parallel per-spec processing. For each component, it reports the source NEVR +and the list of binary subpackages the spec would produce when built. + +The rendered-specs-dir must exist on disk; if it doesn't, run +'azldev component render' first. Components that previously failed to render +(those with a RENDER_FAILED marker file) are skipped with a warning. ``` azldev component query [flags] @@ -31,6 +36,7 @@ azldev component query [flags] ``` -a, --all-components Include all components + --arch arch Target architecture passed to rpmspec via --target (x86_64, aarch64). Defaults to x86_64. Specs that ExclusiveArch/ExcludeArch-exclude the target are emitted with only the component name populated rather than as errors. (default x86_64) -p, --component stringArray Component name pattern -g, --component-group stringArray Component group name -h, --help help for query diff --git a/docs/user/reference/cli/azldev_repo.md b/docs/user/reference/cli/azldev_repo.md new file mode 100644 index 00000000..0049bd83 --- /dev/null +++ b/docs/user/reference/cli/azldev_repo.md @@ -0,0 +1,42 @@ + + +## azldev repo + +Query published RPM repositories + +### Synopsis + +Query published RPM repositories. + +Subcommands wrap 'dnf repoquery' against an Azure Linux published repo URL +(e.g. an azl4-dev blob storage endpoint) and bucket the results into the +on-disk layout expected by downstream tooling. + +### Options + +``` + -h, --help help for repo +``` + +### Options inherited from parent commands + +``` + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output +``` + +### SEE ALSO + +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev repo diff](azldev_repo_diff.md) - Diff a published repo against the local project's expected package set +* [azldev repo query](azldev_repo_query.md) - Query a published repo and write per-channel package lists + diff --git a/docs/user/reference/cli/azldev_repo_diff.md b/docs/user/reference/cli/azldev_repo_diff.md new file mode 100644 index 00000000..528b5ead --- /dev/null +++ b/docs/user/reference/cli/azldev_repo_diff.md @@ -0,0 +1,71 @@ + + +## azldev repo diff + +Diff a published repo against the local project's expected package set + +### Synopsis + +Diff a published Azure Linux repo against what the local project would publish. + +For the requested arch this command: + + 1. Runs 'azldev repo query' against --source to capture the published + per-channel package lists (base.txt, sdk.txt, base-srpms.txt, + sdk-srpms.txt) under '/repo//'. + 2. Runs 'azldev component query' for all components and resolves each + subpackage's publish channel via 'azldev pkg list --rpm-file', producing + the project-side per-channel lists under '/project//'. + 3. Diffs the two sides and emits one unified-style '.diff' file per bucket + under '/diff//'. Lines beginning with '+' are present only + in the project; lines beginning with '-' are present only in the repo. + +Requires a configured project; component specs must already be rendered (run +'azldev component render' first if needed). + +``` +azldev repo diff --source [--arch x86_64|aarch64] [--out-dir ] [flags] +``` + +### Examples + +``` + # Diff the local project against the beta repo for x86_64 + azldev repo diff --source https://packages.microsoft.com/azurelinux/4.0/beta + + # Diff aarch64 into a custom directory + azldev repo diff \ + --source https://packages.microsoft.com/azurelinux/4.0/beta \ + --arch aarch64 \ + --out-dir /tmp/azl-diff +``` + +### Options + +``` + --arch arch Target architecture for both the repo query and the local component query (x86_64, aarch64). Defaults to x86_64. (default x86_64) + -h, --help help for diff + -o, --out-dir string Directory for repo/, project/, and diff/ output trees. Defaults to '$TMPDIR/azldev-repo-diff/' (repoID is the final path segment of --source). + --source string Base URL of the published repo (per-channel URL is '//') +``` + +### Options inherited from parent commands + +``` + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output +``` + +### SEE ALSO + +* [azldev repo](azldev_repo.md) - Query published RPM repositories + diff --git a/docs/user/reference/cli/azldev_repo_query.md b/docs/user/reference/cli/azldev_repo_query.md new file mode 100644 index 00000000..796cf071 --- /dev/null +++ b/docs/user/reference/cli/azldev_repo_query.md @@ -0,0 +1,73 @@ + + +## azldev repo query + +Query a published repo and write per-channel package lists + +### Synopsis + +Query a published Azure Linux repo with 'dnf repoquery' and write the +results into a per-arch, per-channel directory layout. + +For each channel (base, sdk), the per-channel repo URL is constructed as +'//' and queried with: + + dnf repoquery --quiet \ + --repofrompath=, --repo= \ + --forcearch \ + --queryformat '%{name}|%{source_name}\n' + +The binary names go into '//.txt' and the +deduplicated source-package names go into '//-srpms.txt', +each sorted and one name per line. + +This mirrors the 'from-repoquery' enumeration step in the upstream +'scripts/regen-channel-lists.sh' but produces only the per-channel rpm/srpm +lists; channel reconciliation against the local branch is out of scope. + +``` +azldev repo query --source [--arch x86_64|aarch64] [--out-dir ] [flags] +``` + +### Examples + +``` + # Query the default azl4-dev repo for x86_64 + azldev repo query --source https://packages.microsoft.com/azurelinux/4.0/beta + + # Query aarch64 into a custom directory + azldev repo query \ + --source https://packages.microsoft.com/azurelinux/4.0/beta \ + --arch aarch64 \ + --out-dir /tmp/azl-lists +``` + +### Options + +``` + --arch arch Target architecture passed to dnf via --forcearch (x86_64, aarch64). Defaults to x86_64. (default x86_64) + -h, --help help for query + -o, --out-dir string Directory under which '/.txt' and '/-srpms.txt' are written. Defaults to '$TMPDIR/azldev-repo-query/' (repoID is the final path segment of --source). + --source string Base URL of the published repo (per-channel URL is '//') +``` + +### Options inherited from parent commands + +``` + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output +``` + +### SEE ALSO + +* [azldev repo](azldev_repo.md) - Query published RPM repositories + diff --git a/internal/app/azldev/cmds/component/mockproc.go b/internal/app/azldev/cmds/component/mockproc.go new file mode 100644 index 00000000..8f458fc8 --- /dev/null +++ b/internal/app/azldev/cmds/component/mockproc.go @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "log/slog" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" +) + +// Required-package presets for the shared MockProcessor. +// +// Render needs rpmautospec (macro expansion), rpmdevtools (spectool), and git +// (required for rpmautospec to read commit history). python3-click is required +// by rpmautospec but not declared as an RPM dependency. Ecosystem macro +// packages (go-srpm-macros, etc.) are already present via @buildsys-build → +// azurelinux-rpm-config. +// +// Query needs rpm-build for the `rpmspec` binary. It's typically already +// pulled in via @buildsys-build, but we install it explicitly so we don't +// depend on a particular buildgroup composition. +func mockPackagesForRender() []string { + return []string{"rpmautospec", "rpmdevtools", "git", "python3-click"} +} + +func mockPackagesForQuery() []string { + // rpm-build provides rpmspec; python3 is needed to run query_process.py. + // (The render path gets python3 transitively via python3-click, but the + // query path doesn't install rpmautospec/python3-click.) + // + // Additional macro packages are installed so that build-time macros + // affecting %files / %package expansion (and therefore --builtrpms + // output) resolve during rpmspec parsing. Without these, --builtrpms + // under-reports subpackages for specs that generate their %files + // sections via macros, or that use macros like %pyproject_extras_subpkg + // to emit whole subpackage stanzas at parse time. + // + // Curated list of common macro packages that emit %package / %files in + // the Azure Linux spec corpus: + // * fonts-rpm-macros — %fontfiles, %fontfamily_subpkg, etc. + // * pyproject-rpm-macros — %pyproject_extras_subpkg + // * java-srpm-macros, javapackages-tools, javapackages-common — + // %mvn_package, %mvn_install, + // %javadoc_package (auto + // -javadoc subpackages, from + // macros.fjava in + // javapackages-common), + // jp_minimal bcond default. + // javapackages-common is + // normally pulled in via + // javapackages-tools, but we + // install it explicitly so + // %javadoc_package never + // silently disappears. + // * ghc-rpm-macros — %ghc_lib_subpackage and ghc_prof/haddock + // bcond defaults. Requires the + // ghc_version_override define set by + // query_process.py to avoid shelling out + // to a `ghc` binary that isn't installed + // in the chroot. + // + // We install `java-srpm-macros` (the actual binary RPM) rather than + // `java-rpm-macros`, which is the SRPM name; the latter has no + // `%files` section for the main package and is not a buildable binary. + // + // Macros that only affect %prep/%build/%install (e.g. %cargo_install, + // %py3_build) don't need to be added — they don't change which binary + // RPMs would be built. + return []string{ + "rpm-build", + "python3", + "fonts-rpm-macros", + "pyproject-rpm-macros", + "java-srpm-macros", + "javapackages-tools", + "javapackages-common", + "ghc-rpm-macros", + } +} + +// createMockProcessor creates a [sources.MockProcessor] using the project's +// mock config. Returns nil if the mock config is not available (e.g., no project +// config loaded, or no mock config path configured). +// +// requiredPackages is the set of packages to install in the chroot on first +// use. Use one of the mockPackagesFor* presets above to pick the right set +// for the calling command. +func createMockProcessor(env *azldev.Env, requiredPackages []string) *sources.MockProcessor { + _, distroVerDef, err := env.Distro() + if err != nil { + slog.Info("Mock processor unavailable; could not resolve distro", "error", err) + + return nil + } + + if distroVerDef.MockConfigPath == "" { + slog.Info("Mock processor unavailable; no mock config path configured") + + return nil + } + + slog.Info("Mock processor available", "mockConfig", distroVerDef.MockConfigPath) + + return sources.NewMockProcessor(env, distroVerDef.MockConfigPath, requiredPackages) +} diff --git a/internal/app/azldev/cmds/component/query.go b/internal/app/azldev/cmds/component/query.go index 59985a73..38b67b3a 100644 --- a/internal/app/azldev/cmds/component/query.go +++ b/internal/app/azldev/cmds/component/query.go @@ -4,11 +4,19 @@ package component import ( + "errors" "fmt" + "log/slog" + "path/filepath" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/specs" + "github.com/microsoft/azure-linux-dev-tools/internal/rpm" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/qemu" "github.com/spf13/cobra" ) @@ -16,6 +24,12 @@ import ( type QueryComponentsOptions struct { // Standard filter for selecting components. ComponentFilter components.ComponentFilter + + // Target architecture passed to rpmspec via --target. Defaults to + // x86_64. Drives ExclusiveArch/ExcludeArch evaluation; specs that + // exclude the target are emitted with only SpecInfo.Name populated + // (no Version/Subpackages) rather than as errors. + Arch qemu.Arch } func queryOnAppInit(_ *azldev.App, parentCmd *cobra.Command) { @@ -24,17 +38,24 @@ func queryOnAppInit(_ *azldev.App, parentCmd *cobra.Command) { // Constructs a [cobra.Command] for "component query" CLI subcommand. func NewComponentQueryCommand() *cobra.Command { - options := &QueryComponentsOptions{} + options := &QueryComponentsOptions{ + Arch: qemu.Arch(qemu.ArchX86_64), + } cmd := &cobra.Command{ Use: "query", - Short: "Query info for components in this project", - Long: `Query detailed information for components by fetching and parsing their spec files. + Short: "Query info from locally rendered component specs", + Long: `Query detailed information for components from their locally rendered specs. + +This command reads the post-overlay specs from the project's rendered-specs-dir +(produced by 'azldev component render') and runs rpmspec against them in a +single shared mock chroot, batching all specs into one chroot invocation with +parallel per-spec processing. For each component, it reports the source NEVR +and the list of binary subpackages the spec would produce when built. -Unlike 'list', which only shows configuration metadata, 'query' resolves -upstream sources and parses the RPM spec to report version, release, -subpackages, dependencies, and other spec-level details. This makes it -slower than 'list' but more informative.`, +The rendered-specs-dir must exist on disk; if it doesn't, run +'azldev component render' first. Components that previously failed to render +(those with a RENDER_FAILED marker file) are skipped with a warning.`, Example: ` # Query a single component azldev component query -p curl @@ -50,43 +71,251 @@ slower than 'list' but more informative.`, components.AddComponentFilterOptionsToCommand(cmd, &options.ComponentFilter) + cmd.Flags().Var(&options.Arch, "arch", + "Target architecture passed to rpmspec via --target (x86_64, aarch64). "+ + "Defaults to x86_64. Specs that ExclusiveArch/ExcludeArch-exclude the "+ + "target are emitted with only the component name populated rather than "+ + "as errors.") + _ = cmd.RegisterFlagCompletionFunc("arch", + func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return qemu.SupportedArchitectures(), cobra.ShellCompDirectiveNoFileComp + }) + return cmd } -// componentDetails encapsulates detailed information about a component. -type componentDetails struct { +// ComponentDetails encapsulates detailed information about a component. +// +// Arch records the target arch the query ran against. Components that the +// spec excludes for the requested arch (ExclusiveArch/ExcludeArch) are +// emitted with only the embedded SpecInfo.Name populated (Version and +// Subpackages stay at their zero values); the per-arch summary is reported +// via the excludedCount log line. +type ComponentDetails struct { + Arch string specs.ComponentSpecDetails } -// Queries env for component details, in accordance with options. Returns the found components. +// QueryComponents queries info for selected components by reading the locally +// rendered specs and running rpmspec against them in a single shared mock +// chroot. Returns one entry per successfully queried component, in the order +// returned by the resolver. Components with a RENDER_FAILED marker are +// skipped with a loud warning. Per-component rpmspec failures are surfaced +// as warnings; the corresponding entry is omitted from the result list and +// the function returns an aggregated error after attempting every component. +// +//nolint:cyclop,funlen // Linear pipeline; further splitting hurts readability. func QueryComponents( env *azldev.Env, options *QueryComponentsOptions, -) (results []*componentDetails, err error) { - var comps *components.ComponentSet +) ([]*ComponentDetails, error) { + renderedSpecsDir := env.Config().Project.RenderedSpecsDir + if renderedSpecsDir == "" { + return nil, errors.New( + "project.rendered-specs-dir is not configured; " + + "set it in the project config and run 'azldev component render' first") + } + + dirExists, err := fileutils.DirExists(env.FS(), renderedSpecsDir) + if err != nil { + return nil, fmt.Errorf("checking rendered-specs-dir %#q:\n%w", renderedSpecsDir, err) + } + + if !dirExists { + return nil, fmt.Errorf( + "rendered-specs-dir %#q does not exist; run 'azldev component render' first", + renderedSpecsDir) + } resolver := components.NewResolver(env) - comps, err = resolver.FindComponents(&options.ComponentFilter) + comps, err := resolver.FindComponents(&options.ComponentFilter) + if err != nil { + return nil, fmt.Errorf("failed to resolve components:\n%w", err) + } + + if comps.Len() == 0 { + return nil, errors.New("no components were selected; " + + "please use command-line options to indicate which components to query") + } + + inputs, skipped, err := buildSpecQueryInputs(env, comps.Components(), renderedSpecsDir) if err != nil { - return results, fmt.Errorf("failed to resolve components:\n%w", err) + return nil, err + } + + if len(inputs) == 0 { + return nil, fmt.Errorf("no components have a rendered spec on disk; skipped %d", skipped) } - allDetails := make([]*componentDetails, 0, comps.Len()) + mockProcessor := createMockProcessor(env, mockPackagesForQuery()) + if mockProcessor == nil { + return nil, errors.New( + "mock config required for querying; ensure the project has a valid distro with mock config") + } + + defer mockProcessor.Destroy(env) + + if err := env.FS().MkdirAll(env.WorkDir(), fileperms.PublicDir); err != nil { + return nil, fmt.Errorf("creating work directory:\n%w", err) + } + + scratchDir, err := fileutils.MkdirTemp(env.FS(), env.WorkDir(), "azldev-query-scratch-") + if err != nil { + return nil, fmt.Errorf("creating scratch directory:\n%w", err) + } + + defer func() { + if removeErr := env.FS().RemoveAll(scratchDir); removeErr != nil { + slog.Debug("Failed to clean up scratch directory", "path", scratchDir, "error", removeErr) + } + }() + + archStr := options.Arch.String() + + queryResults, err := mockProcessor.BatchQuerySpecs( + env, env, renderedSpecsDir, scratchDir, archStr, + inputs, env.FS(), env.CPUBoundConcurrency(), + ) + if err != nil { + return nil, fmt.Errorf("batch-querying rendered specs:\n%w", err) + } + + allDetails := make([]*ComponentDetails, 0, len(queryResults)) + + var ( + failed int + excluded int + ) + + for _, queryResult := range queryResults { + if queryResult.Error != nil { + slog.Warn("Failed to query rendered spec", + "component", queryResult.Name, "error", queryResult.Error) - for _, comp := range comps.Components() { - spec := comp.GetSpec() + failed++ - specInfo, err := spec.Parse() - if err != nil { - return nil, fmt.Errorf("failed to parse spec for component %q:\n%w", comp.GetName(), err) + continue } - details := &componentDetails{ - ComponentSpecDetails: *specInfo, + if queryResult.ExcludedFromArch { + // Per-component logging here would flood stderr on cross-arch + // queries (e.g. --arch aarch64 against an x86_64-heavy distro + // excludes thousands of specs); a single summary log is emitted + // below the loop instead. + excluded++ + + allDetails = append(allDetails, &ComponentDetails{ + Arch: archStr, + ComponentSpecDetails: specs.ComponentSpecDetails{ + SpecInfo: rpm.SpecInfo{Name: queryResult.Name}, + }, + }) + + continue } - allDetails = append(allDetails, details) + allDetails = append(allDetails, &ComponentDetails{ + Arch: archStr, + ComponentSpecDetails: specs.ComponentSpecDetails{ + SpecInfo: *queryResult.Info, + }, + }) + } + + if excluded > 0 { + slog.Info("Some components excluded from arch by spec", + "arch", archStr, "excludedCount", excluded) + } + + if failed > 0 { + // Intentionally return nil error: returning an error would suppress + // the results table (runFuncInternal skips reportResults on error), + // hiding the successfully-queried components. Per-component failures + // are already surfaced via the slog.Warn above. + slog.Error("Some components failed to query", "failedCount", failed) } return allDetails, nil } + +// buildSpecQueryInputs walks the resolved components and constructs the list +// of [sources.SpecQueryInput] entries to pass to BatchQuerySpecs. Components +// whose rendered spec directory carries a RENDER_FAILED marker (or whose +// rendered .spec file is missing) are skipped with a loud warning and counted +// toward `skipped`. +func buildSpecQueryInputs( + env *azldev.Env, + componentList []components.Component, + renderedSpecsDir string, +) (inputs []sources.SpecQueryInput, skipped int, err error) { + inputs = make([]sources.SpecQueryInput, 0, len(componentList)) + + for _, comp := range componentList { + name := comp.GetName() + cfg := comp.GetConfig() + + if cfg.RenderedSpecDir == "" { + return nil, 0, fmt.Errorf( + "component %#q has no rendered-spec dir; ensure project.rendered-specs-dir is set", + name) + } + + if hasMarker, markerErr := hasRenderFailedMarker(env, cfg.RenderedSpecDir); markerErr != nil { + return nil, 0, fmt.Errorf("checking RENDER_FAILED marker for %#q:\n%w", name, markerErr) + } else if hasMarker { + slog.Warn( + "Skipping component: RENDER_FAILED marker present; run 'azldev component render' to refresh", + "component", name, "dir", cfg.RenderedSpecDir) + + skipped++ + + continue + } + + specPath := filepath.Join(cfg.RenderedSpecDir, name+".spec") + + specExists, statErr := fileutils.Exists(env.FS(), specPath) + if statErr != nil { + return nil, 0, fmt.Errorf("checking rendered spec %#q:\n%w", specPath, statErr) + } + + if !specExists { + slog.Warn( + "Skipping component: rendered spec not found; run 'azldev component render' to produce it", + "component", name, "expectedSpec", specPath) + + skipped++ + + continue + } + + relSpecPath, relErr := filepath.Rel(renderedSpecsDir, specPath) + if relErr != nil { + return nil, 0, fmt.Errorf("relativizing spec path %#q against %#q:\n%w", + specPath, renderedSpecsDir, relErr) + } + + inputs = append(inputs, sources.SpecQueryInput{ + Name: name, + SpecRelPath: relSpecPath, + With: cfg.Build.With, + Without: cfg.Build.Without, + Defines: cfg.Build.Defines, + }) + } + + return inputs, skipped, nil +} + +// hasRenderFailedMarker reports whether the given rendered-spec dir carries +// the marker file written by 'component render' on failure. +func hasRenderFailedMarker(env *azldev.Env, renderedSpecDir string) (bool, error) { + markerPath := filepath.Join(renderedSpecDir, renderErrorMarkerFile) + + exists, err := fileutils.Exists(env.FS(), markerPath) + if err != nil { + return false, fmt.Errorf("checking %#q:\n%w", markerPath, err) + } + + return exists, nil +} diff --git a/internal/app/azldev/cmds/component/query_internal_test.go b/internal/app/azldev/cmds/component/query_internal_test.go new file mode 100644 index 00000000..012d1f63 --- /dev/null +++ b/internal/app/azldev/cmds/component/query_internal_test.go @@ -0,0 +1,153 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "path/filepath" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/testutils" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// resolveComponents is a small helper to drive the component resolver in +// internal tests so we can call buildSpecQueryInputs with realistic inputs. +func resolveComponents(t *testing.T, testEnv *testutils.TestEnv, names ...string) []components.Component { + t.Helper() + + resolver := components.NewResolver(testEnv.Env) + + comps, err := resolver.FindComponents(&components.ComponentFilter{ + ComponentNamePatterns: names, + }) + require.NoError(t, err) + + return comps.Components() +} + +func TestBuildSpecQueryInputs_Happy(t *testing.T) { + const ( + componentName = "curl" + renderedSpecsDir = "/project/specs" + ) + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + testEnv.Config.Components[componentName] = projectconfig.ComponentConfig{ + Name: componentName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/project/curl.spec", + }, + Build: projectconfig.ComponentBuildConfig{ + With: []string{"foo"}, + Without: []string{"bar"}, + Defines: map[string]string{"key": "value"}, + }, + } + + // Spec source (for resolver) and rendered spec (for our path). + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), "/project/curl.spec", []byte(""), fileperms.PublicFile, + )) + + renderedDir := filepath.Join(renderedSpecsDir, "c", componentName) + require.NoError(t, fileutils.MkdirAll(testEnv.FS(), renderedDir)) + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), filepath.Join(renderedDir, componentName+".spec"), + []byte(""), fileperms.PublicFile, + )) + + resolved := resolveComponents(t, testEnv, componentName) + require.Len(t, resolved, 1) + + inputs, skipped, err := buildSpecQueryInputs(testEnv.Env, resolved, renderedSpecsDir) + require.NoError(t, err) + assert.Zero(t, skipped) + require.Len(t, inputs, 1) + + assert.Equal(t, componentName, inputs[0].Name) + assert.Equal(t, filepath.Join("c", componentName, componentName+".spec"), inputs[0].SpecRelPath) + assert.Equal(t, []string{"foo"}, inputs[0].With) + assert.Equal(t, []string{"bar"}, inputs[0].Without) + assert.Equal(t, map[string]string{"key": "value"}, inputs[0].Defines) +} + +func TestBuildSpecQueryInputs_SkipsRenderFailedMarker(t *testing.T) { + const ( + componentName = "curl" + renderedSpecsDir = "/project/specs" + ) + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + testEnv.Config.Components[componentName] = projectconfig.ComponentConfig{ + Name: componentName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/project/curl.spec", + }, + } + + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), "/project/curl.spec", []byte(""), fileperms.PublicFile, + )) + + renderedDir := filepath.Join(renderedSpecsDir, "c", componentName) + require.NoError(t, fileutils.MkdirAll(testEnv.FS(), renderedDir)) + // Spec file exists, but so does the marker — the marker wins. + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), filepath.Join(renderedDir, componentName+".spec"), + []byte(""), fileperms.PublicFile, + )) + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), filepath.Join(renderedDir, renderErrorMarkerFile), + []byte("RENDER FAILED"), fileperms.PublicFile, + )) + + resolved := resolveComponents(t, testEnv, componentName) + + inputs, skipped, err := buildSpecQueryInputs(testEnv.Env, resolved, renderedSpecsDir) + require.NoError(t, err) + assert.Empty(t, inputs) + assert.Equal(t, 1, skipped) +} + +func TestBuildSpecQueryInputs_SkipsMissingSpec(t *testing.T) { + const ( + componentName = "curl" + renderedSpecsDir = "/project/specs" + ) + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + testEnv.Config.Components[componentName] = projectconfig.ComponentConfig{ + Name: componentName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/project/curl.spec", + }, + } + + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), "/project/curl.spec", []byte(""), fileperms.PublicFile, + )) + + // Rendered dir exists but the .spec inside it does not. + require.NoError(t, fileutils.MkdirAll( + testEnv.FS(), filepath.Join(renderedSpecsDir, "c", componentName), + )) + + resolved := resolveComponents(t, testEnv, componentName) + + inputs, skipped, err := buildSpecQueryInputs(testEnv.Env, resolved, renderedSpecsDir) + require.NoError(t, err) + assert.Empty(t, inputs) + assert.Equal(t, 1, skipped) +} diff --git a/internal/app/azldev/cmds/component/query_test.go b/internal/app/azldev/cmds/component/query_test.go index 9d605e02..8f61c6ee 100644 --- a/internal/app/azldev/cmds/component/query_test.go +++ b/internal/app/azldev/cmds/component/query_test.go @@ -4,15 +4,11 @@ package component_test import ( - "os/exec" "testing" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/component" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/testutils" - "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" - "github.com/microsoft/azure-linux-dev-tools/internal/rpm/mock" - "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -38,45 +34,56 @@ func TestComponentQueryCmd_NoMatch(t *testing.T) { require.Error(t, err) } -func TestQueryComponents_OneComponent(t *testing.T) { - const ( - testComponentName = "test-component" - testSpecPath = "/path/to/spec" - ) - +func TestQueryComponents_MissingRenderedSpecsDir(t *testing.T) { testEnv := testutils.NewTestEnv(t) - testEnv.Config.Components[testComponentName] = projectconfig.ComponentConfig{ - Name: testComponentName, - Spec: projectconfig.SpecSource{ - SourceType: projectconfig.SpecSourceTypeLocal, - Path: testSpecPath, + + // Test env constructProjectConfig leaves RenderedSpecsDir empty. + options := component.QueryComponentsOptions{ + ComponentFilter: components.ComponentFilter{ + ComponentNamePatterns: []string{"any"}, }, } - // Pretend mock is present. - testEnv.CmdFactory.RegisterCommandInSearchPath(mock.MockBinary) + _, err := component.QueryComponents(testEnv.Env, &options) + require.Error(t, err) + assert.Contains(t, err.Error(), "rendered-specs-dir is not configured") +} + +func TestQueryComponents_RenderedSpecsDirDoesNotExist(t *testing.T) { + const renderedSpecsDir = "/project/specs" - // Mock the rpmspec command to return valid output - // NOTE: This takes a dependency on knowing how rpmspec gets invoked. - testEnv.CmdFactory.RunAndGetOutputHandler = func(cmd *exec.Cmd) (string, error) { - // Return mock rpmspec output in the expected format: name|epoch|version|release - return "name=test-component\nepoch=0\nversion=1.0.0\nrelease=1.azl3\n", nil - } + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + // Do NOT create the directory on the test filesystem. options := component.QueryComponentsOptions{ ComponentFilter: components.ComponentFilter{ - ComponentNamePatterns: []string{testComponentName}, + ComponentNamePatterns: []string{"any"}, }, } - // Simulate the spec file existing. - err := fileutils.WriteFile(testEnv.FS(), testSpecPath, []byte("test spec content"), fileperms.PublicFile) - require.NoError(t, err) + _, err := component.QueryComponents(testEnv.Env, &options) + require.Error(t, err) + assert.Contains(t, err.Error(), "does not exist") +} + +// Smoke test: when filter matches no components, the resolver surfaces an +// error before any rendered-spec validation runs. +func TestQueryComponents_NoComponentsSelected(t *testing.T) { + const renderedSpecsDir = "/project/specs" + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + + require.NoError(t, fileutils.MkdirAll(testEnv.FS(), renderedSpecsDir)) - results, err := component.QueryComponents(testEnv.Env, &options) - require.NoError(t, err) - require.Len(t, results, 1) + // No components configured at all. + options := component.QueryComponentsOptions{ + ComponentFilter: components.ComponentFilter{ + ComponentNamePatterns: []string{"nonexistent"}, + }, + } - result := results[0] - assert.Equal(t, testComponentName, result.Name) + _, err := component.QueryComponents(testEnv.Env, &options) + require.Error(t, err) } diff --git a/internal/app/azldev/cmds/component/render.go b/internal/app/azldev/cmds/component/render.go index 2f2cc54f..79477548 100644 --- a/internal/app/azldev/cmds/component/render.go +++ b/internal/app/azldev/cmds/component/render.go @@ -173,7 +173,7 @@ func RenderComponents(env *azldev.Env, options *RenderOptions) ([]*RenderResult, } // Create mock processor for rpmautospec/spectool. - mockProcessor := createMockProcessor(env) + mockProcessor := createMockProcessor(env, mockPackagesForRender()) if mockProcessor == nil { return nil, errors.New( "mock config required for rendering; ensure the project has a valid distro with mock config") @@ -1126,28 +1126,6 @@ func writeFailureMarkers( } } -// createMockProcessor creates a [sources.MockProcessor] using the project's -// mock config. Returns nil if the mock config is not available (e.g., no project -// config loaded, or no mock config path configured). -func createMockProcessor(env *azldev.Env) *sources.MockProcessor { - _, distroVerDef, err := env.Distro() - if err != nil { - slog.Info("Mock processor unavailable; could not resolve distro", "error", err) - - return nil - } - - if distroVerDef.MockConfigPath == "" { - slog.Info("Mock processor unavailable; no mock config path configured") - - return nil - } - - slog.Info("Mock processor available", "mockConfig", distroVerDef.MockConfigPath) - - return sources.NewMockProcessor(env, distroVerDef.MockConfigPath) -} - // validateCleanStaleOptions enforces the constraints around --clean-stale. // Extracted from RenderComponents to keep its complexity below the linter's // cyclomatic threshold. diff --git a/internal/app/azldev/cmds/repo/diff.go b/internal/app/azldev/cmds/repo/diff.go new file mode 100644 index 00000000..b74a5516 --- /dev/null +++ b/internal/app/azldev/cmds/repo/diff.go @@ -0,0 +1,552 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package repo + +import ( + "encoding/json" + "errors" + "fmt" + "log/slog" + "net/url" + "os" + "path" + "path/filepath" + "sort" + "strings" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/component" + pkgcmd "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/pkg" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/qemu" + "github.com/spf13/afero" + "github.com/spf13/cobra" +) + +// Bucket names used in both repo-side and project-side per-arch output dirs. +const ( + bucketBase = "base" + bucketSDK = "sdk" + bucketBaseSRPM = "base-srpms" + bucketSDKSRPM = "sdk-srpms" + bucketBaseDebuginfo = "base-debuginfo" + bucketSDKDebuginfo = "sdk-debuginfo" +) + +// allBuckets is the fixed set of bucket names diffed by [DiffRepo]. +// +//nolint:gochecknoglobals // Fixed bucket order; kept at package scope for clarity. +var allBuckets = []string{ + bucketBase, bucketSDK, + bucketBaseSRPM, bucketSDKSRPM, + bucketBaseDebuginfo, bucketSDKDebuginfo, +} + +// RepoDiffOptions controls a single 'azldev repo diff' run. +type RepoDiffOptions struct { + // Source is the base URL of the published repo (same semantics as + // [RepoQueryOptions.Source]). + Source string + + // Arch is the target architecture for both the repo query and the local + // component query. Defaults to x86_64. + Arch qemu.Arch + + // OutDir is the directory under which '/' and '/' sub-trees + // are written. When empty, defaults to '/azldev-repo-diff/'. + OutDir string +} + +// repoDiffBucketResult is one row reported per bucket. +type repoDiffBucketResult struct { + Bucket string `json:"bucket" table:"Bucket"` + InBoth int `json:"inBoth" table:"In Both"` + OnlyInProject int `json:"onlyInProject" table:"Only in Project"` + OnlyInRepo int `json:"onlyInRepo" table:"Only in Repo"` + DiffFile string `json:"diffFile" table:"Diff File"` +} + +func diffOnAppInit(_ *azldev.App, parent *cobra.Command) { + parent.AddCommand(NewRepoDiffCommand()) +} + +// NewRepoDiffCommand constructs the cobra command for "repo diff". +// +//nolint:dupl // Parallel cobra setup with NewRepoQueryCommand; merging would obscure each subcommand. +func NewRepoDiffCommand() *cobra.Command { + options := &RepoDiffOptions{ + Arch: qemu.Arch(qemu.ArchX86_64), + } + + cmd := &cobra.Command{ + Use: "diff --source [--arch x86_64|aarch64] [--out-dir ]", + Short: "Diff a published repo against the local project's expected package set", + Long: `Diff a published Azure Linux repo against what the local project would publish. + +For the requested arch this command: + + 1. Runs 'azldev repo query' against --source to capture the published + per-channel package lists (base.txt, sdk.txt, base-srpms.txt, + sdk-srpms.txt) under '/repo//'. + 2. Runs 'azldev component query' for all components and resolves each + subpackage's publish channel via 'azldev pkg list --rpm-file', producing + the project-side per-channel lists under '/project//'. + 3. Diffs the two sides and emits one unified-style '.diff' file per bucket + under '/diff//'. Lines beginning with '+' are present only + in the project; lines beginning with '-' are present only in the repo. + +Requires a configured project; component specs must already be rendered (run +'azldev component render' first if needed).`, + Example: ` # Diff the local project against the beta repo for x86_64 + azldev repo diff --source https://packages.microsoft.com/azurelinux/4.0/beta + + # Diff aarch64 into a custom directory + azldev repo diff \ + --source https://packages.microsoft.com/azurelinux/4.0/beta \ + --arch aarch64 \ + --out-dir /tmp/azl-diff`, + RunE: azldev.RunFunc(func(env *azldev.Env) (interface{}, error) { + return DiffRepo(env, options) + }), + } + + cmd.Flags().StringVar(&options.Source, "source", "", + "Base URL of the published repo (per-channel URL is '//')") + cmd.Flags().Var(&options.Arch, "arch", + "Target architecture for both the repo query and the local component query (x86_64, aarch64). Defaults to x86_64.") + cmd.Flags().StringVarP(&options.OutDir, "out-dir", "o", "", + "Directory for repo/, project/, and diff/ output trees. "+ + "Defaults to '$TMPDIR/azldev-repo-diff/' "+ + "(repoID is the final path segment of --source).") + + _ = cmd.MarkFlagRequired("source") + _ = cmd.RegisterFlagCompletionFunc("arch", + func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return qemu.SupportedArchitectures(), cobra.ShellCompDirectiveNoFileComp + }) + + return cmd +} + +// DiffRepo orchestrates the three steps described in the command help and +// returns one [repoDiffBucketResult] per bucket. +func DiffRepo(env *azldev.Env, options *RepoDiffOptions) ([]*repoDiffBucketResult, error) { + repoID, arch, outDir, err := resolveDiffPaths(options) + if err != nil { + return nil, err + } + + repoDir := filepath.Join(outDir, "repo") + projectDir := filepath.Join(outDir, "project") + diffDir := filepath.Join(outDir, "diff", arch) + + if err := env.FS().MkdirAll(diffDir, fileperms.PublicDir); err != nil { + return nil, fmt.Errorf("creating diff directory %#q:\n%w", diffDir, err) + } + + slog.Info("Diffing project against published repo", + "repo", repoID, "arch", arch, "source", options.Source, "outDir", outDir) + + step1 := env.StartEvent("repo diff: step 1/3 — query published repo", + "repo", repoID, "arch", arch, "outDir", repoDir) + + repoBuckets, err := collectRepoBuckets(env, options.Source, options.Arch, repoDir, arch) + + step1.End() + + if err != nil { + return nil, fmt.Errorf("step 1/3 (query published repo) failed:\n%w", err) + } + + slog.Info("Step 1/3 complete: repo-side lists captured", bucketCountArgs(repoBuckets)...) + + step2 := env.StartEvent("repo diff: step 2/3 — build project package lists", + "arch", arch, "outDir", projectDir) + + projectBuckets, err := collectProjectBuckets(env, options.Arch, projectDir, arch) + + step2.End() + + if err != nil { + return nil, fmt.Errorf("step 2/3 (build project package lists) failed:\n%w", err) + } + + slog.Info("Step 2/3 complete: project-side lists built", bucketCountArgs(projectBuckets)...) + + step3 := env.StartEvent("repo diff: step 3/3 — diff repo vs project", + "arch", arch, "outDir", diffDir) + defer step3.End() + + results, err := writeBucketDiffs(env.FS(), diffDir, arch, repoBuckets, projectBuckets) + if err != nil { + return nil, err + } + + logDiffSummary(arch, diffDir, results) + + return results, nil +} + +// resolveDiffPaths derives the repoID, arch, and outDir from options, applying +// defaults and validating the source URL. +func resolveDiffPaths(options *RepoDiffOptions) (repoID, arch, outDir string, err error) { + if options.Source == "" { + return "", "", "", errors.New("--source is required") + } + + parsedSource, parseErr := url.ParseRequestURI(options.Source) + if parseErr != nil { + return "", "", "", fmt.Errorf("invalid --source URL %#q:\n%w", options.Source, parseErr) + } + + repoID = path.Base(strings.TrimRight(parsedSource.Path, "/")) + if repoID == "" || repoID == "." || repoID == "/" { + return "", "", "", fmt.Errorf( + "cannot derive repo id from --source %#q (URL path has no trailing segment)", + options.Source) + } + + arch = options.Arch.String() + if arch == "" { + arch = qemu.ArchX86_64 + } + + outDir = options.OutDir + if outDir == "" { + outDir = filepath.Join(os.TempDir(), "azldev-repo-diff", repoID) + } + + return repoID, arch, outDir, nil +} + +// bucketCountArgs returns the slog key/value pairs for the four buckets. +func bucketCountArgs(buckets map[string]map[string]struct{}) []any { + args := make([]any, 0, len(allBuckets)*2) //nolint:mnd // 2 entries per bucket: key + value. + for _, bucket := range allBuckets { + args = append(args, bucket, len(buckets[bucket])) + } + + return args +} + +func logDiffSummary(arch, diffDir string, results []*repoDiffBucketResult) { + var totalOnlyProject, totalOnlyRepo, totalInBoth int + + for _, r := range results { + totalOnlyProject += r.OnlyInProject + totalOnlyRepo += r.OnlyInRepo + totalInBoth += r.InBoth + } + + slog.Info("Step 3/3 complete: diff written", + "arch", arch, + "inBoth", totalInBoth, + "onlyInProject", totalOnlyProject, + "onlyInRepo", totalOnlyRepo, + "diffDir", diffDir) +} + +// writeBucketDiffs runs [diffSets] for each known bucket, writes the per-bucket +// '.diff' file, and returns one [repoDiffBucketResult] per bucket. +func writeBucketDiffs( + fileSystem afero.Fs, + diffDir, arch string, + repoBuckets, projectBuckets map[string]map[string]struct{}, +) ([]*repoDiffBucketResult, error) { + results := make([]*repoDiffBucketResult, 0, len(allBuckets)) + + for _, bucket := range allBuckets { + onlyProj, onlyRepo, both := diffSets(projectBuckets[bucket], repoBuckets[bucket]) + + diffFile := filepath.Join(diffDir, bucket+".diff") + if err := writeBucketDiff(fileSystem, diffFile, bucket, arch, onlyProj, onlyRepo, both); err != nil { + return nil, fmt.Errorf("writing %#q:\n%w", diffFile, err) + } + + slog.Info("Bucket diff", + "bucket", bucket, "arch", arch, + "inBoth", both, + "onlyInProject", len(onlyProj), + "onlyInRepo", len(onlyRepo), + "diffFile", diffFile) + + results = append(results, &repoDiffBucketResult{ + Bucket: bucket, + InBoth: both, + OnlyInProject: len(onlyProj), + OnlyInRepo: len(onlyRepo), + DiffFile: diffFile, + }) + } + + return results, nil +} + +// collectRepoBuckets invokes [QueryRepo] and reads the four written files into sets. +func collectRepoBuckets( + env *azldev.Env, source string, arch qemu.Arch, repoOutDir, archStr string, +) (map[string]map[string]struct{}, error) { + if _, err := QueryRepo(env, &RepoQueryOptions{ + Source: source, + Arch: arch, + OutDir: repoOutDir, + }); err != nil { + return nil, err + } + + archDir := filepath.Join(repoOutDir, archStr) + buckets := make(map[string]map[string]struct{}, len(allBuckets)) + + for _, bucket := range allBuckets { + names, err := readLinesAsSet(env.FS(), filepath.Join(archDir, bucket+".txt")) + if err != nil { + return nil, err + } + + buckets[bucket] = names + } + + return buckets, nil +} + +// rpmSourceEntry mirrors the on-disk schema consumed by [pkgcmd.ListPackages] via '--rpm-file'. +type rpmSourceEntry struct { + PackageName string `json:"packageName"` + SourcePackageName string `json:"sourcePackageName"` +} + +// collectProjectBuckets runs 'component query' for all components, builds the rpm +// source map, resolves channels via [pkgcmd.ListPackages], buckets the results, +// and writes the four per-bucket files. Returns the in-memory sets. +func collectProjectBuckets( + env *azldev.Env, arch qemu.Arch, projectOutDir, archStr string, +) (map[string]map[string]struct{}, error) { + compResults, err := component.QueryComponents(env, &component.QueryComponentsOptions{ + ComponentFilter: components.ComponentFilter{IncludeAllComponents: true}, + Arch: arch, + }) + if err != nil { + return nil, fmt.Errorf("querying components:\n%w", err) + } + + entries := buildRPMSourceEntries(compResults) + + archDir := filepath.Join(projectOutDir, archStr) + if err := env.FS().MkdirAll(archDir, fileperms.PublicDir); err != nil { + return nil, fmt.Errorf("creating project arch directory %#q:\n%w", archDir, err) + } + + mapFile := filepath.Join(projectOutDir, "rpm-source-map.json") + + mapJSON, err := json.MarshalIndent(entries, "", " ") + if err != nil { + return nil, fmt.Errorf("marshalling rpm source map:\n%w", err) + } + + if err := afero.WriteFile(env.FS(), mapFile, mapJSON, fileperms.PublicFile); err != nil { + return nil, fmt.Errorf("writing %#q:\n%w", mapFile, err) + } + + pkgResults, err := pkgcmd.ListPackages(env, &pkgcmd.ListPackageOptions{RPMFile: mapFile}) + if err != nil { + return nil, fmt.Errorf("resolving packages from rpm source map:\n%w", err) + } + + buckets := bucketPackageResults(pkgResults) + + for _, bucket := range allBuckets { + file := filepath.Join(archDir, bucket+".txt") + if err := writeSortedLines(env.FS(), file, setToSortedSlice(buckets[bucket])); err != nil { + return nil, fmt.Errorf("writing %#q:\n%w", file, err) + } + } + + return buckets, nil +} + +// buildRPMSourceEntries converts component-query results into the 'rpm-file' schema. +// Components whose spec excludes the target arch (ExclusiveArch/ExcludeArch) are +// reported by 'component query' with an empty Subpackages list; they contribute +// neither an RPM nor an SRPM to the per-arch repo and are skipped here. +func buildRPMSourceEntries(compResults []*component.ComponentDetails) []rpmSourceEntry { + entries := make([]rpmSourceEntry, 0, len(compResults)) + + for _, comp := range compResults { + if comp.Name == "" || len(comp.Subpackages) == 0 { + continue + } + + for _, sub := range comp.Subpackages { + entries = append(entries, rpmSourceEntry{ + PackageName: sub, + SourcePackageName: comp.Name, + }) + } + } + + return entries +} + +// bucketPackageResults assigns each [pkgcmd.PackageListResult] to one of the +// six diff buckets via [channelToBucket]. Subpackages whose name ends in +// '-debuginfo' or '-debugsource' are routed to the base-debuginfo/sdk-debuginfo +// buckets based on their channel; all other subpackages go to the regular +// base/sdk (or base-srpms/sdk-srpms for source packages) buckets. +func bucketPackageResults(pkgResults []pkgcmd.PackageListResult) map[string]map[string]struct{} { + buckets := map[string]map[string]struct{}{ + bucketBase: {}, + bucketSDK: {}, + bucketBaseSRPM: {}, + bucketSDKSRPM: {}, + bucketBaseDebuginfo: {}, + bucketSDKDebuginfo: {}, + } + + var unmapped int + + for _, row := range pkgResults { + bucket := channelToBucket(row.Channel, row.Type) + if bucket == "" { + unmapped++ + + slog.Debug("Skipping package with unmapped channel", + "package", row.PackageName, "type", row.Type, "channel", row.Channel) + + continue + } + + buckets[bucket][row.PackageName] = struct{}{} + } + + if unmapped > 0 { + slog.Warn("Some packages had channels that did not map to base/sdk", + "count", unmapped) + } + + return buckets +} + +// channelToBucket maps a (channel, type) pair to one of the six diff buckets. +// Returns "" for packages that should be excluded from the diff (empty +// channel, "none", or a channel that contains none of "base-debuginfo", +// "sdk-debuginfo", "sdk", or "base"). +// +// The mapping is intentionally permissive so projects whose channel strings +// embed the bucket name (e.g. "rpm-sdk-srpm") map correctly without an +// explicit table. The '-debuginfo' cases are checked first because their +// channel strings also contain the bare "base"/"sdk" substrings. +func channelToBucket(channel, pkgType string) string { + channel = strings.ToLower(strings.TrimSpace(channel)) + if channel == "" || channel == "none" { + return "" + } + + var bucket string + + switch { + case strings.Contains(channel, "base-debuginfo"): + return bucketBaseDebuginfo + case strings.Contains(channel, "sdk-debuginfo"): + return bucketSDKDebuginfo + case strings.Contains(channel, "sdk"): + bucket = bucketSDK + case strings.Contains(channel, "base"): + bucket = bucketBase + + default: + return "" + } + + if pkgType == pkgcmd.PackageTypeSRPM { + return bucket + "-srpms" + } + + return bucket +} + +// diffSets returns (onlyInLeft, onlyInRight, intersectionCount). The two +// slices are sorted for deterministic output. +func diffSets(left, right map[string]struct{}) (onlyLeft, onlyRight []string, both int) { + onlyLeft = make([]string, 0) + onlyRight = make([]string, 0) + + for name := range left { + if _, ok := right[name]; ok { + both++ + } else { + onlyLeft = append(onlyLeft, name) + } + } + + for name := range right { + if _, ok := left[name]; !ok { + onlyRight = append(onlyRight, name) + } + } + + sort.Strings(onlyLeft) + sort.Strings(onlyRight) + + return onlyLeft, onlyRight, both +} + +func writeBucketDiff( + fileSystem afero.Fs, + filePath, bucket, arch string, + onlyProject, onlyRepo []string, + both int, +) error { + var buf strings.Builder + + fmt.Fprintf(&buf, "# Bucket: %s (%s)\n", bucket, arch) + fmt.Fprintf(&buf, "# in-both: %d\n", both) + fmt.Fprintf(&buf, "# only-in-project: %d\n", len(onlyProject)) + fmt.Fprintf(&buf, "# only-in-repo: %d\n", len(onlyRepo)) + + for _, name := range onlyProject { + buf.WriteString("+ ") + buf.WriteString(name) + buf.WriteByte('\n') + } + + for _, name := range onlyRepo { + buf.WriteString("- ") + buf.WriteString(name) + buf.WriteByte('\n') + } + + if err := afero.WriteFile(fileSystem, filePath, []byte(buf.String()), fileperms.PublicFile); err != nil { + return fmt.Errorf("writing %#q:\n%w", filePath, err) + } + + return nil +} + +// readLinesAsSet reads a file produced by [writeSortedLines] back into a set. +// A missing file is treated as an empty set, matching the case where the +// upstream repoquery returned no rows for that bucket. +func readLinesAsSet(fileSystem afero.Fs, filePath string) (map[string]struct{}, error) { + data, err := afero.ReadFile(fileSystem, filePath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return map[string]struct{}{}, nil + } + + return nil, fmt.Errorf("reading %#q:\n%w", filePath, err) + } + + set := make(map[string]struct{}) + + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + set[line] = struct{}{} + } + + return set, nil +} diff --git a/internal/app/azldev/cmds/repo/query.go b/internal/app/azldev/cmds/repo/query.go new file mode 100644 index 00000000..2487aabe --- /dev/null +++ b/internal/app/azldev/cmds/repo/query.go @@ -0,0 +1,289 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package repo + +import ( + "errors" + "fmt" + "log/slog" + "net/url" + "os" + "os/exec" + "path" + "path/filepath" + "sort" + "strings" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/qemu" + "github.com/spf13/afero" + "github.com/spf13/cobra" +) + +// queryChannel describes one channel queried per invocation: its logical name +// (used for output filenames and bucket keys) and its URL sub-path under the +// repo source. For most channels these match, but the debuginfo channels live +// at '/debuginfo' rather than '-debuginfo'. +type queryChannel struct { + name string + urlPath string +} + +// Channels queried for each invocation. The per-channel repo URL is +// constructed as '//'. +// +//nolint:gochecknoglobals // Fixed list of channels; kept at package scope for clarity. +var queryChannels = []queryChannel{ + {name: "base", urlPath: "base"}, + {name: "sdk", urlPath: "sdk"}, + {name: "base-debuginfo", urlPath: "base/debuginfo"}, + {name: "sdk-debuginfo", urlPath: "sdk/debuginfo"}, +} + +// RepoQueryOptions controls a single 'azldev repo query' run. +type RepoQueryOptions struct { + // Source is the base URL of the published repo (e.g. + // "https://packages.microsoft.com/azurelinux/4.0/beta"). + // Per-channel URLs are constructed as "//". + Source string + + // Arch is the target architecture passed to dnf via --forcearch. + // Defaults to x86_64. + Arch qemu.Arch + + // OutDir is the directory under which per-arch list files are written. + // When empty, defaults to '/azldev-repo-query/', + // where repoID is the final path segment of Source. + OutDir string +} + +// repoQueryChannelResult is one row reported per (arch, channel) pair. +type repoQueryChannelResult struct { + Arch string `json:"arch" table:"Arch"` + Channel string `json:"channel" table:"Channel"` + RPMCount int `json:"rpmCount" table:"RPMs"` + SRPMCount int `json:"srpmCount" table:"SRPMs"` + RPMFile string `json:"rpmFile" table:"RPM File"` + SRPMFile string `json:"srpmFile" table:"SRPM File"` +} + +func queryOnAppInit(_ *azldev.App, parent *cobra.Command) { + parent.AddCommand(NewRepoQueryCommand()) +} + +// NewRepoQueryCommand constructs the cobra command for "repo query". +// +//nolint:dupl // Parallel cobra setup with NewRepoDiffCommand; merging would obscure each subcommand. +func NewRepoQueryCommand() *cobra.Command { + options := &RepoQueryOptions{ + Arch: qemu.Arch(qemu.ArchX86_64), + } + + cmd := &cobra.Command{ + Use: "query --source [--arch x86_64|aarch64] [--out-dir ]", + Short: "Query a published repo and write per-channel package lists", + Long: `Query a published Azure Linux repo with 'dnf repoquery' and write the +results into a per-arch, per-channel directory layout. + +For each channel (base, sdk), the per-channel repo URL is constructed as +'//' and queried with: + + dnf repoquery --quiet \ + --repofrompath=, --repo= \ + --forcearch \ + --queryformat '%{name}|%{source_name}\n' + +The binary names go into '//.txt' and the +deduplicated source-package names go into '//-srpms.txt', +each sorted and one name per line. + +This mirrors the 'from-repoquery' enumeration step in the upstream +'scripts/regen-channel-lists.sh' but produces only the per-channel rpm/srpm +lists; channel reconciliation against the local branch is out of scope.`, + Example: ` # Query the default azl4-dev repo for x86_64 + azldev repo query --source https://packages.microsoft.com/azurelinux/4.0/beta + + # Query aarch64 into a custom directory + azldev repo query \ + --source https://packages.microsoft.com/azurelinux/4.0/beta \ + --arch aarch64 \ + --out-dir /tmp/azl-lists`, + RunE: azldev.RunFuncWithoutRequiredConfig(func(env *azldev.Env) (interface{}, error) { + return QueryRepo(env, options) + }), + } + + cmd.Flags().StringVar(&options.Source, "source", "", + "Base URL of the published repo (per-channel URL is '//')") + cmd.Flags().Var(&options.Arch, "arch", + "Target architecture passed to dnf via --forcearch (x86_64, aarch64). Defaults to x86_64.") + cmd.Flags().StringVarP(&options.OutDir, "out-dir", "o", "", + "Directory under which '/.txt' and '/-srpms.txt' "+ + "are written. Defaults to '$TMPDIR/azldev-repo-query/' "+ + "(repoID is the final path segment of --source).") + + _ = cmd.MarkFlagRequired("source") + _ = cmd.RegisterFlagCompletionFunc("arch", + func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return qemu.SupportedArchitectures(), cobra.ShellCompDirectiveNoFileComp + }) + + return cmd +} + +// QueryRepo runs 'dnf repoquery' for each hardcoded channel against the +// requested arch and writes the bucketed RPM and SRPM name lists under +// options.OutDir. Returns one result entry per channel. +func QueryRepo(env *azldev.Env, options *RepoQueryOptions) ([]*repoQueryChannelResult, error) { + if options.Source == "" { + return nil, errors.New("--source is required") + } + + parsedSource, err := url.ParseRequestURI(options.Source) + if err != nil { + return nil, fmt.Errorf("invalid --source URL %#q:\n%w", options.Source, err) + } + + repoID := path.Base(strings.TrimRight(parsedSource.Path, "/")) + if repoID == "" || repoID == "." || repoID == "/" { + return nil, fmt.Errorf( + "cannot derive repo id from --source %#q (URL path has no trailing segment)", + options.Source) + } + + arch := options.Arch.String() + if arch == "" { + arch = qemu.ArchX86_64 + } + + outDir := options.OutDir + if outDir == "" { + outDir = filepath.Join(os.TempDir(), "azldev-repo-query", repoID) + } + + archDir := filepath.Join(outDir, arch) + if err := env.FS().MkdirAll(archDir, fileperms.PublicDir); err != nil { + return nil, fmt.Errorf("creating output directory %#q:\n%w", archDir, err) + } + + source := strings.TrimRight(options.Source, "/") + results := make([]*repoQueryChannelResult, 0, len(queryChannels)) + + for _, channel := range queryChannels { + repoURL := fmt.Sprintf("%s/%s/%s", source, channel.urlPath, arch) + + slog.Info("Running dnf repoquery", "repo", repoID, "channel", channel.name, "arch", arch, "url", repoURL) + + rpms, srpms, err := runRepoquery(env, repoID, repoURL, arch) + if err != nil { + return nil, fmt.Errorf("repoquery for %#q failed:\n%w", repoID, err) + } + + rpmFile := filepath.Join(archDir, channel.name+".txt") + srpmFile := filepath.Join(archDir, channel.name+"-srpms.txt") + + if err := writeSortedLines(env.FS(), rpmFile, rpms); err != nil { + return nil, fmt.Errorf("writing %#q:\n%w", rpmFile, err) + } + + if err := writeSortedLines(env.FS(), srpmFile, srpms); err != nil { + return nil, fmt.Errorf("writing %#q:\n%w", srpmFile, err) + } + + results = append(results, &repoQueryChannelResult{ + Arch: arch, + Channel: channel.name, + RPMCount: len(rpms), + SRPMCount: len(srpms), + RPMFile: rpmFile, + SRPMFile: srpmFile, + }) + } + + return results, nil +} + +// runRepoquery invokes 'dnf repoquery' for a single (repo, arch) pair and +// returns the deduplicated, unsorted name lists. +func runRepoquery(env *azldev.Env, repoID, repoURL, arch string) (rpms, srpms []string, err error) { + args := []string{ + "repoquery", "--quiet", + "--setopt=skip_if_unavailable=false", + fmt.Sprintf("--repofrompath=%s,%s", repoID, repoURL), + "--repo=" + repoID, + "--forcearch", arch, + "--queryformat", `%{name}|%{source_name}\n`, + } + + var stderr strings.Builder + + cmd := exec.CommandContext(env, "dnf", args...) + cmd.Stderr = &stderr + + wrapped, wrapErr := env.Command(cmd) + if wrapErr != nil { + return nil, nil, fmt.Errorf("preparing dnf command:\n%w", wrapErr) + } + + wrapped.SetDescription(fmt.Sprintf("dnf repoquery (%s)", repoID)) + + stdout, runErr := wrapped.RunAndGetOutput(env) + if runErr != nil { + return nil, nil, fmt.Errorf( + "executing dnf:\n%w\nstderr:\n%s", runErr, stderr.String()) + } + + rpmSet := make(map[string]struct{}) + srpmSet := make(map[string]struct{}) + + for _, line := range strings.Split(stdout, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + name, source, ok := strings.Cut(line, "|") + if !ok { + slog.Warn("Skipping malformed repoquery line", "line", line) + + continue + } + + if name != "" { + rpmSet[name] = struct{}{} + } + + if source != "" { + srpmSet[source] = struct{}{} + } + } + + return setToSortedSlice(rpmSet), setToSortedSlice(srpmSet), nil +} + +func setToSortedSlice(set map[string]struct{}) []string { + out := make([]string, 0, len(set)) + for k := range set { + out = append(out, k) + } + + sort.Strings(out) + + return out +} + +func writeSortedLines(fileSystem afero.Fs, path string, lines []string) error { + var buf strings.Builder + for _, line := range lines { + buf.WriteString(line) + buf.WriteByte('\n') + } + + if err := afero.WriteFile(fileSystem, path, []byte(buf.String()), fileperms.PublicFile); err != nil { + return fmt.Errorf("writing %#q:\n%w", path, err) + } + + return nil +} diff --git a/internal/app/azldev/cmds/repo/repo.go b/internal/app/azldev/cmds/repo/repo.go new file mode 100644 index 00000000..c44cfb3d --- /dev/null +++ b/internal/app/azldev/cmds/repo/repo.go @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package repo + +import ( + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/spf13/cobra" +) + +// OnAppInit is called once when the app is initialized; registers the "repo" command tree. +func OnAppInit(app *azldev.App) { + cmd := &cobra.Command{ + Use: "repo", + Short: "Query published RPM repositories", + Long: `Query published RPM repositories. + +Subcommands wrap 'dnf repoquery' against an Azure Linux published repo URL +(e.g. an azl4-dev blob storage endpoint) and bucket the results into the +on-disk layout expected by downstream tooling.`, + } + + app.AddTopLevelCommand(cmd) + queryOnAppInit(app, cmd) + diffOnAppInit(app, cmd) +} diff --git a/internal/app/azldev/core/sources/mockprocessor.go b/internal/app/azldev/core/sources/mockprocessor.go index 0951779e..964a7594 100644 --- a/internal/app/azldev/core/sources/mockprocessor.go +++ b/internal/app/azldev/core/sources/mockprocessor.go @@ -9,6 +9,7 @@ import ( "encoding/json" "fmt" "log/slog" + "path" "path/filepath" "strconv" "strings" @@ -29,18 +30,22 @@ var renderProcessScript []byte // first use and supports batch processing of multiple components in a single // mock invocation. type MockProcessor struct { - mu sync.Mutex - runner *mock.Runner - initialized bool - initErr error + mu sync.Mutex + runner *mock.Runner + requiredPackages []string + initialized bool + initErr error } // NewMockProcessor creates a new processor that will lazily initialize // a mock chroot using the given config path. The runner is created eagerly -// but the chroot is only initialized on first use. -func NewMockProcessor(ctx opctx.Ctx, mockConfigPath string) *MockProcessor { +// but the chroot is only initialized on first use. requiredPackages are +// installed inside the chroot on first use; pass nil/empty to skip the +// install step (rely on whatever the buildroot ships by default). +func NewMockProcessor(ctx opctx.Ctx, mockConfigPath string, requiredPackages []string) *MockProcessor { return &MockProcessor{ - runner: mock.NewRunner(ctx, mockConfigPath), + runner: mock.NewRunner(ctx, mockConfigPath), + requiredPackages: append([]string(nil), requiredPackages...), } } @@ -99,7 +104,7 @@ func (p *MockProcessor) initOnce(ctx context.Context) error { return p.initErr } - slog.Info("Initializing mock chroot for rendering") + slog.Info("Initializing mock chroot") p.runner.EnableNetwork() @@ -110,21 +115,18 @@ func (p *MockProcessor) initOnce(ctx context.Context) error { return p.initErr } - // Install rpmautospec (macro expansion), rpmdevtools (spectool), and git - // (required for rpmautospec to read commit history). - // python3-click is required by rpmautospec but not declared as an RPM dependency. - // Ecosystem macro packages (go-srpm-macros, etc.) are already present via - // @buildsys-build → azurelinux-rpm-config. - if err := p.runner.InstallPackages(ctx, []string{"rpmautospec", "rpmdevtools", "git", "python3-click"}); err != nil { - p.initErr = fmt.Errorf("failed to install packages in mock chroot:\n%w", err) - p.initialized = true + if len(p.requiredPackages) > 0 { + if err := p.runner.InstallPackages(ctx, p.requiredPackages); err != nil { + p.initErr = fmt.Errorf("failed to install packages in mock chroot:\n%w", err) + p.initialized = true - return p.initErr + return p.initErr + } } p.initialized = true - slog.Info("Mock chroot ready for rendering") + slog.Info("Mock chroot ready") return nil } @@ -141,9 +143,6 @@ func (p *MockProcessor) BatchProcess( ctx context.Context, events opctx.EventListener, stagingDir string, inputs []ComponentInput, fs opctx.FS, maxWorkers int, ) ([]ComponentMockResult, error) { - p.mu.Lock() - defer p.mu.Unlock() - if len(inputs) == 0 { return nil, nil } @@ -152,37 +151,125 @@ func (p *MockProcessor) BatchProcess( return nil, err } - if err := p.initOnce(ctx); err != nil { - return nil, err + jsonInputs := make([]componentInputJSON, len(inputs)) + for idx, input := range inputs { + jsonInputs[idx] = componentInputJSON(input) + } + + inputsBytes, err := json.Marshal(jsonInputs) + if err != nil { + return nil, fmt.Errorf("marshaling inputs:\n%w", err) } slog.Info("Batch processing components in mock chroot", "count", len(inputs)) - // Write the Python script and inputs manifest to the staging directory. - scriptPath := filepath.Join(stagingDir, "render_process.py") - if err := fileutils.WriteFile(fs, scriptPath, renderProcessScript, fileperms.PublicExecutable); err != nil { - return nil, fmt.Errorf("writing render script:\n%w", err) + const chrootStagingPath = "/tmp/render" + + workers := strconv.Itoa(max(1, maxWorkers)) // 1x CPU; mock work is CPU-bound + + rawResults, err := p.runBatchScript(ctx, events, runBatchScriptOptions{ + Mounts: []batchBindMount{{Host: stagingDir, InChroot: chrootStagingPath}}, + ScratchHost: stagingDir, + ScratchInChroot: chrootStagingPath, + ScriptName: "render_process.py", + ScriptBytes: renderProcessScript, + InputsJSON: inputsBytes, + ResultsName: "results.json", + ScriptArgs: []string{chrootStagingPath, workers}, + ProgressLabel: "Processing specs in mock chroot", + ProgressTotal: int64(len(inputs)), + FS: fs, + }) + if err != nil { + return nil, err } - if err := writeInputsManifest(fs, stagingDir, inputs); err != nil { + return parseBatchJSON(string(rawResults), inputs) +} + +// batchBindMount describes one host-to-chroot bind mount used by runBatchScript. +type batchBindMount struct { + Host string + InChroot string +} + +// runBatchScriptOptions parameterizes a single batch-script invocation. +type runBatchScriptOptions struct { + // Mounts is the full set of host-to-chroot bind mounts to add to the runner. + // The scratch dir must be reachable via one of these mounts (typically the + // first entry), so the script can locate its inputs and write results. + Mounts []batchBindMount + // ScratchHost is the host-side directory where the script, inputs manifest, + // and results file are read and written. + ScratchHost string + // ScratchInChroot is the in-chroot path that maps to ScratchHost. + ScratchInChroot string + // ScriptName is the basename used when writing the embedded Python script + // into ScratchHost (e.g. "render_process.py"). + ScriptName string + ScriptBytes []byte + // InputsJSON is the JSON-encoded inputs manifest, written as + // /inputs.json. + InputsJSON []byte + // ResultsName is the basename of the results file the script is expected + // to write into ScratchHost (e.g. "results.json"). + ResultsName string + // ScriptArgs is appended to the python3 invocation after the script path. + ScriptArgs []string + // ProgressLabel labels the progress event surfaced to the user. + ProgressLabel string + // ProgressTotal is the total used for progress reporting from PROGRESS lines. + ProgressTotal int64 + FS opctx.FS +} + +// runBatchScript executes a batched, parallelizable Python helper inside the +// shared mock chroot. It owns the lock + lazy init, writes the script and +// inputs into the host-side scratch dir, runs the script (which is expected to +// emit "PROGRESS / " lines and write a results file), and +// returns the raw results bytes. +// +// This is the shared scaffolding for BatchProcess (rendering) and +// BatchQuerySpecs (querying). Per-operation concerns (input/result shape, +// embedded script, result parsing) live in the callers. +// + +func (p *MockProcessor) runBatchScript( + ctx context.Context, events opctx.EventListener, opts runBatchScriptOptions, +) ([]byte, error) { + p.mu.Lock() + defer p.mu.Unlock() + + if err := p.initOnce(ctx); err != nil { return nil, err } - // Clone the runner and add a single bind mount for the staging directory. + // Write the Python script and inputs manifest to the scratch directory. + scriptHostPath := filepath.Join(opts.ScratchHost, opts.ScriptName) + if err := fileutils.WriteFile(opts.FS, scriptHostPath, opts.ScriptBytes, fileperms.PublicExecutable); err != nil { + return nil, fmt.Errorf("writing script %#q:\n%w", opts.ScriptName, err) + } + + inputsHostPath := filepath.Join(opts.ScratchHost, "inputs.json") + if err := fileutils.WriteFile(opts.FS, inputsHostPath, opts.InputsJSON, fileperms.PublicFile); err != nil { + return nil, fmt.Errorf("writing inputs manifest:\n%w", err) + } + + // Clone the runner and add the requested bind mounts. // WithUnprivileged drops to the mockbuild user for chroot commands, // matching how mock builds run and avoiding root-owned files in the - // bind-mounted staging directory. This is safe because mock defaults + // bind-mounted scratch directory. This is safe because mock defaults // chrootuid to os.getuid() — the mockbuild user inside the chroot has // the same UID as the host user, so bind-mounted files remain writable. runner := p.runner.Clone() runner.WithUnprivileged() - const chrootStagingPath = "/tmp/render" - runner.AddBindMount(stagingDir, chrootStagingPath) + for _, mount := range opts.Mounts { + runner.AddBindMount(mount.Host, mount.InChroot) + } - chrootScript := filepath.Join(chrootStagingPath, "render_process.py") - workers := strconv.Itoa(max(1, maxWorkers)) // 1x CPU; mock work is CPU-bound - args := []string{"python3", chrootScript, chrootStagingPath, workers} + scriptInChroot := path.Join(opts.ScratchInChroot, opts.ScriptName) + args := append([]string{"python3", scriptInChroot}, opts.ScriptArgs...) cmd, err := runner.CmdInChroot(ctx, args, false) if err != nil { @@ -193,19 +280,17 @@ func (p *MockProcessor) BatchProcess( // The script prints "PROGRESS / " to stderr, but // mock --chroot merges the inner command's stderr into stdout, so we // listen on stdout. - mockProgress := events.StartEvent("Processing specs in mock chroot", "count", len(inputs)) - mockProgress.SetLongRunning("Processing specs in mock chroot") - - defer mockProgress.End() + progress := events.StartEvent(opts.ProgressLabel, "count", opts.ProgressTotal) + progress.SetLongRunning(opts.ProgressLabel) - total := int64(len(inputs)) + defer progress.End() if listenerErr := cmd.SetRealTimeStdoutListener(func(_ context.Context, line string) { // Parse "PROGRESS / " lines. if after, found := strings.CutPrefix(line, "PROGRESS "); found { if slashIdx := strings.Index(after, "/"); slashIdx > 0 { if completed, parseErr := strconv.ParseInt(after[:slashIdx], 10, 64); parseErr == nil { - mockProgress.SetProgress(completed, total) + progress.SetProgress(completed, opts.ProgressTotal) } } } @@ -222,14 +307,14 @@ func (p *MockProcessor) BatchProcess( // Read results from the file written by the Python script. // Using a file avoids bufio.Scanner token size limits that would truncate // large JSON payloads when capturing stdout (e.g., 7k components ≈ 560KB). - resultsPath := filepath.Join(stagingDir, "results.json") + resultsHostPath := filepath.Join(opts.ScratchHost, opts.ResultsName) - resultsData, readErr := fileutils.ReadFile(fs, resultsPath) + resultsData, readErr := fileutils.ReadFile(opts.FS, resultsHostPath) if readErr != nil { - return nil, fmt.Errorf("reading batch results from %#q:\n%w", resultsPath, readErr) + return nil, fmt.Errorf("reading batch results from %#q:\n%w", resultsHostPath, readErr) } - return parseBatchJSON(string(resultsData), inputs) + return resultsData, nil } // componentInputJSON is the JSON-serializable form written to inputs.json. @@ -284,27 +369,6 @@ func parseBatchJSON(stdout string, inputs []ComponentInput) ([]ComponentMockResu return results, nil } -// writeInputsManifest writes the inputs.json manifest to the staging directory -// so it can be read by the Python script inside the mock chroot. -func writeInputsManifest(fs opctx.FS, stagingDir string, inputs []ComponentInput) error { - jsonInputs := make([]componentInputJSON, len(inputs)) - for idx, input := range inputs { - jsonInputs[idx] = componentInputJSON(input) - } - - data, err := json.Marshal(jsonInputs) - if err != nil { - return fmt.Errorf("marshaling inputs:\n%w", err) - } - - inputsPath := filepath.Join(stagingDir, "inputs.json") - if err := fileutils.WriteFile(fs, inputsPath, data, fileperms.PublicFile); err != nil { - return fmt.Errorf("writing inputs manifest:\n%w", err) - } - - return nil -} - // Destroy cleans up the mock chroot. Should be called when rendering is complete. // The processor must not be reused after Destroy — create a new MockProcessor if needed. // Attempts cleanup even if initialization partially failed (e.g., InitRoot succeeded diff --git a/internal/app/azldev/core/sources/query_process.py b/internal/app/azldev/core/sources/query_process.py new file mode 100644 index 00000000..3fe08379 --- /dev/null +++ b/internal/app/azldev/core/sources/query_process.py @@ -0,0 +1,422 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Query RPM specs inside a mock chroot: run rpmspec twice per component (once +with --srpm for source NEVR, once without for binary subpackage names) and +write per-component results to a JSON file in the scratch directory. + +This script is embedded in the azldev Go binary and executed inside a mock chroot +during ``azldev component query``. It mirrors render_process.py's shape (a +ThreadPoolExecutor over per-component work, PROGRESS lines on stderr, a +results.json file in the scratch dir) so the Go-side plumbing can be shared. + +Usage:: + + python3 query_process.py + +The scratch directory must contain an ``inputs.json`` file:: + + [ + { + "name": "curl", + "specRelPath": "c/curl/curl.spec", + "srpmQueryFormat": "name=%{name}\\n...", + "subpackagesQueryFormat": "subpkg=%{name}\\n", + "with": ["foo"], + "without": ["bar"], + "defines": {"_sourcedir": "/some/path"} + }, + ... + ] + +Results are written to ``/results.json``:: + + [ + {"name": "curl", "srpmOut": "name=curl\\n...", "binOut": "subpkg=curl\\n...", "error": null}, + {"name": "broken", "srpmOut": "", "binOut": "", "error": "rpmspec --srpm failed: ..."} + ] + +Progress is reported to stderr as ``PROGRESS / ``. +""" + +import json +import os +import subprocess +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed + + +def _rpmspec_args(spec_path, query_format, srpm, with_, without, defines, arch): + """Compose an rpmspec command line. + + Always overrides _sourcedir and _specdir to the spec's own directory so + that sidecar files (e.g. `Source1: foo.azl.macros`) loaded with + `%{SOURCEN}` or `%{load:...}` resolve against the rendered spec tree + rather than mock's default /builddir/build/SOURCES. Also sets + `with_check 0` to match the legacy per-component rpmspec path. + + `_ghc_version_cache` short-circuits `%ghc_version` in ghc-rpm-macros, + which would otherwise run `ghc --numeric-version`. We don't install the + ghc compiler in the query chroot, so the lookup would fail with + "command not found", producing parse errors like: + error: line N: Version required: Requires: ghc-compiler = + We set `_ghc_version_cache` rather than the higher-priority + `ghc_version_override` because some specs (notably ghc.spec itself) + redefine `ghc_version_override` via `%global`; command-line -D macros + are sticky and would block those overrides. `_ghc_version_cache` is + consulted after `ghc_version_override` inside the macro, so any spec + setting the latter still wins, and we only intercept the shell-out + path that's broken for us. The exact value only feeds Requires/Provides + version tags; subpackage names don't depend on it, so a placeholder is + fine for our purpose. + + `arch`, when non-empty, is passed as --target=. This drives the + %_target_cpu macro family inside rpmspec so ExclusiveArch/ExcludeArch + checks and arch-conditional %ifarch blocks evaluate for the requested + target rather than the host arch. + + User-provided defines win on the rpmspec side (rpmspec honors the last + -D for a given macro), so we list ours first. + """ + spec_dir = os.path.dirname(spec_path) + args = ["rpmspec", "-q"] + if srpm: + args.append("--srpm") + if arch: + args.append(f"--target={arch}") + args += ["--queryformat", query_format] + args += ["-D", f"_sourcedir {spec_dir}"] + args += ["-D", f"_specdir {spec_dir}"] + args += ["-D", "with_check 0"] + args += ["-D", "_ghc_version_cache 0.0.0"] + for w in with_: + args += ["--with", w] + for w in without: + args += ["--without", w] + for key, value in defines.items(): + args += ["-D", f"{key} {value}"] + args.append(spec_path) + return args + + +# Per-spec rewrites that work around quirks no -D override can fix. +# +# Each entry maps a spec basename to a list of (find, replace) tuples +# applied to the spec text before rpmspec is invoked. The rewrite happens +# on a scratch copy in the scratch dir; the original file in the rendered +# specs tree is never modified. +_SPEC_REWRITES = { + "ghc.spec": [ + # ghc.spec %undefines _ghcdynlibdir (line ~475) which defeats any + # -D _ghcdynlibdir override. The %post/%postun scriptlets that + # depend on it are then emitted inside `%if "%{?_ghcdynlibdir}" != + # "%_libdir"` and break rpmspec parsing with "package ghc-base does + # not exist" when ghc-rpm-macros is loaded but the ghc compiler + # isn't installed in our query chroot. We comment these scriptlets + # out — they don't affect subpackage enumeration. + ("%post base -p /sbin/ldconfig", "# patched-out-for-azldev-query: %post base"), + ("%postun base -p /sbin/ldconfig", "# patched-out-for-azldev-query: %postun base"), + ], +} + + +def _maybe_rewrite_spec(spec_path, scratch_dir, comp_name): + """If spec_path needs known patches to parse under rpmspec, write a + rewritten copy into scratch_dir and return its path. Otherwise return + spec_path unchanged. + """ + rewrites = _SPEC_REWRITES.get(os.path.basename(spec_path)) + if not rewrites: + return spec_path + + with open(spec_path, encoding="utf-8", errors="replace") as src: + content = src.read() + + for find, replace in rewrites: + content = content.replace(find, replace) + + out_path = os.path.join(scratch_dir, f"{comp_name}.patched.spec") + with open(out_path, "w", encoding="utf-8") as dst: + dst.write(content) + + return out_path + + +# Per-invocation timeout for rpmspec, in seconds. rpmspec on a healthy spec +# completes in well under a second; this generous cap exists only to bound +# pathological cases (recursive macros, macros that shell out and block) so +# one wedged spec can't hang the whole batch. +_RPMSPEC_TIMEOUT_SECONDS = 180 + + +class _RpmspecTimeout(Exception): + """Raised when rpmspec exceeds _RPMSPEC_TIMEOUT_SECONDS.""" + + +def _run_rpmspec(args): + """Run rpmspec and return (stdout, stderr, returncode). + + Raises _RpmspecTimeout if rpmspec doesn't finish within + _RPMSPEC_TIMEOUT_SECONDS. On timeout, the child process is killed before + re-raising so it doesn't linger inside the mock chroot. + """ + try: + proc = subprocess.run( + args, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + timeout=_RPMSPEC_TIMEOUT_SECONDS, + ) + except subprocess.TimeoutExpired as exc: + # subprocess.run already terminated the child by the time TimeoutExpired + # is raised, but stdout/stderr captured up to the timeout are on the + # exception. + stdout = exc.stdout or "" + stderr = exc.stderr or "" + if isinstance(stdout, bytes): + stdout = stdout.decode("utf-8", errors="replace") + if isinstance(stderr, bytes): + stderr = stderr.decode("utf-8", errors="replace") + raise _RpmspecTimeout( + f"rpmspec timed out after {_RPMSPEC_TIMEOUT_SECONDS}s; " + f"last stderr: {stderr.strip()[-512:]}" + ) from exc + return proc.stdout, proc.stderr, proc.returncode + + +# rpmspec (unlike rpmbuild) does NOT enforce ExclusiveArch/ExcludeArch on +# its own: both --srpm and --builtrpms queries return rc=0 against a spec +# whose ExclusiveArch excludes the --target arch. To honor those tags we +# read them out of the spec via an extra block wrapped into the srpm +# queryformat and evaluate the policy ourselves before running the binary +# phase. The wrapper uses sentinel lines so we can split the probe data +# back out and hand the caller-supplied portion of srpmOut through clean. +# +# `[%{Tag} ]` queryformat lists each value separated by a space; an empty +# tag yields an empty string, so absent ExclusiveArch/ExcludeArch parses +# as an empty list (== no restriction). +_ARCH_PROBE_BEGIN = "__AZL_ARCH_PROBE_BEGIN__\n" +_ARCH_PROBE_END = "__AZL_ARCH_PROBE_END__\n" +_ARCH_PROBE_FORMAT = ( + _ARCH_PROBE_BEGIN + + "EA=[%{ExclusiveArch} ]\n" + + "XA=[%{ExcludeArch} ]\n" + + _ARCH_PROBE_END +) + + +def _wrap_srpm_format_with_arch_probe(query_format): + """Prepend the arch-probe block to the caller's srpm queryformat.""" + return _ARCH_PROBE_FORMAT + query_format + + +def _split_arch_probe(srpm_out): + """Extract (exclusive_arch_list, exclude_arch_list, cleaned_srpm_out). + + If the probe markers are absent (older callers, malformed output) the + arch lists are empty and srpm_out is returned unchanged. Lowercase the + arch tokens because rpm normalizes arch names that way and our target + arch (qemu.Arch) is always lowercase. + """ + start = srpm_out.find(_ARCH_PROBE_BEGIN) + end = srpm_out.find(_ARCH_PROBE_END) + if start < 0 or end < 0 or end < start: + return [], [], srpm_out + probe = srpm_out[start + len(_ARCH_PROBE_BEGIN):end] + cleaned = srpm_out[:start] + srpm_out[end + len(_ARCH_PROBE_END):] + ea, xa = [], [] + for line in probe.splitlines(): + if line.startswith("EA="): + ea = line[len("EA="):].lower().split() + elif line.startswith("XA="): + xa = line[len("XA="):].lower().split() + return ea, xa, cleaned + + +def _is_arch_excluded(arch, exclusive_arch, exclude_arch): + """Return True iff target arch is excluded by ExclusiveArch/ExcludeArch. + + `noarch` in ExclusiveArch means "any arch" and never excludes. With an + empty target arch (caller opted out of arch filtering) we never + exclude. + """ + if not arch: + return False + arch = arch.lower() + if exclusive_arch and "noarch" not in exclusive_arch and arch not in exclusive_arch: + return True + if arch in exclude_arch: + return True + return False + + +def process_component(specs_dir, scratch_dir, comp, arch): + """Run rpmspec --srpm + rpmspec (no --srpm) for one component. + + Trust boundary: comp["name"] and comp["specRelPath"] are validated by + BatchQuerySpecs in mockprocessor.go before this script is invoked. + arch is a target arch (e.g. "x86_64"); when non-empty it is passed to + rpmspec via --target. Specs that ExclusiveArch/ExcludeArch-exclude the + target are returned with excludedFromArch=True (not an error). + """ + name = comp["name"] + spec_path = os.path.join(specs_dir, comp["specRelPath"]) + with_ = comp.get("with", []) or [] + without = comp.get("without", []) or [] + defines = comp.get("defines", {}) or {} + + if not os.path.isfile(spec_path): + return { + "name": name, + "srpmOut": "", + "binOut": "", + "error": f"spec file not found: {comp['specRelPath']}", + } + + # Apply per-spec rewrites (e.g. ghc.spec) to a scratch copy if needed. + # _sourcedir/_specdir stay pinned to the original spec's directory via + # _rpmspec_args, so sidecar files still resolve correctly. + effective_spec = _maybe_rewrite_spec(spec_path, scratch_dir, name) + + # Source-level query (--srpm). The caller's srpmQueryFormat is wrapped + # with an arch-policy probe block (see _wrap_srpm_format_with_arch_probe); + # we split that probe back out before returning srpm_out to Go so the + # downstream parser only sees the caller-requested fields. + srpm_args = _rpmspec_args( + effective_spec, + _wrap_srpm_format_with_arch_probe(comp["srpmQueryFormat"]), + True, + with_, + without, + defines, + arch, + ) + try: + srpm_out, srpm_err, srpm_rc = _run_rpmspec(srpm_args) + except _RpmspecTimeout as exc: + return { + "name": name, + "srpmOut": "", + "binOut": "", + "error": f"rpmspec --srpm {exc}", + } + if srpm_rc != 0: + return { + "name": name, + "srpmOut": srpm_out, + "binOut": "", + "error": f"rpmspec --srpm failed: {srpm_err.strip()}", + } + + exclusive_arch, exclude_arch, srpm_out = _split_arch_probe(srpm_out) + if _is_arch_excluded(arch, exclusive_arch, exclude_arch): + return { + "name": name, + "srpmOut": srpm_out, + "binOut": "", + "error": None, + "excludedFromArch": True, + } + + # Binary subpackage enumeration (no --srpm). + # + # `--builtrpms` (vs the default `--rpms`) restricts the listing to binary + # packages that *would actually be built*, i.e. those with a `%files` + # section. This matters for specs like `wayland` whose main package has + # no `%files` and produces no binary RPM — only its subpackages + # (libwayland-client, etc.) do. Using `--builtrpms` makes the output a + # ground-truth list of the binary RPMs the spec would produce. + bin_args = _rpmspec_args( + effective_spec, + comp["subpackagesQueryFormat"], + False, + with_, + without, + defines, + arch, + ) + # Insert --builtrpms right after `-q` so it associates with the query. + bin_args.insert(2, "--builtrpms") + try: + bin_out, bin_err, bin_rc = _run_rpmspec(bin_args) + except _RpmspecTimeout as exc: + return { + "name": name, + "srpmOut": srpm_out, + "binOut": "", + "error": f"rpmspec (binary) {exc}", + } + if bin_rc != 0: + return { + "name": name, + "srpmOut": srpm_out, + "binOut": bin_out, + "error": f"rpmspec failed: {bin_err.strip()}", + } + + return { + "name": name, + "srpmOut": srpm_out, + "binOut": bin_out, + "error": None, + } + + +def main() -> int: + if len(sys.argv) != 5: + print( + f"usage: {sys.argv[0]} ", + file=sys.stderr, + ) + return 1 + + scratch_dir = sys.argv[1] + specs_dir = sys.argv[2] + max_workers = int(sys.argv[3]) + arch = sys.argv[4] + inputs_path = os.path.join(scratch_dir, "inputs.json") + + with open(inputs_path) as f: + inputs = json.load(f) + + total = len(inputs) + + with ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = { + pool.submit(process_component, specs_dir, scratch_dir, comp, arch): comp["name"] + for comp in inputs + } + + # Report progress to stderr as each component completes. + # Note: mock --chroot merges the inner command's stderr into stdout, + # so the Go caller uses SetRealTimeStdoutListener to receive these. + completed_results = {} + for idx, future in enumerate(as_completed(futures), 1): + name = futures[future] + try: + completed_results[name] = future.result() + except Exception as exc: + completed_results[name] = { + "name": name, + "srpmOut": "", + "binOut": "", + "error": str(exc), + } + + print(f"PROGRESS {idx}/{total} {name}", file=sys.stderr, flush=True) + + # Collect results in input order (as_completed returns in completion order). + results = [completed_results[comp["name"]] for comp in inputs] + + results_path = os.path.join(scratch_dir, "results.json") + with open(results_path, "w") as results_file: + json.dump(results, results_file) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/internal/app/azldev/core/sources/specquery.go b/internal/app/azldev/core/sources/specquery.go new file mode 100644 index 00000000..03260eb9 --- /dev/null +++ b/internal/app/azldev/core/sources/specquery.go @@ -0,0 +1,258 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "context" + _ "embed" + "encoding/json" + "errors" + "fmt" + "log/slog" + "path/filepath" + "strconv" + "strings" + + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/rpm" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" +) + +//go:embed query_process.py +var queryProcessScript []byte + +// SpecQueryInput describes a single rendered spec to query in the mock chroot. +// SpecRelPath is the path of the .spec relative to the specs directory bind +// mounted into the chroot (e.g. "c/curl/curl.spec"). +type SpecQueryInput struct { + Name string + SpecRelPath string + With []string + Without []string + Defines map[string]string +} + +// SpecQueryResult holds the batch-query result for one spec. +// +// Exactly one of Info, ExcludedFromArch, or Error indicates the outcome: +// - Info is populated (and Error is nil, ExcludedFromArch is false) when the +// spec was successfully queried for the requested arch. +// - ExcludedFromArch is true when rpmspec refused to evaluate the spec for +// the requested arch (ExclusiveArch/ExcludeArch policy). This is not an +// error; the component simply isn't built for that arch. +// - Error is non-nil for any other failure (rpmspec parse error, missing +// spec, timeout, etc). +type SpecQueryResult struct { + Name string + Info *rpm.SpecInfo + ExcludedFromArch bool + Error error +} + +// validateSpecQueryInputs rejects empty names, path-traversal in spec +// relative paths, absolute spec paths, and duplicate component names. +func validateSpecQueryInputs(inputs []SpecQueryInput) error { + seen := make(map[string]bool, len(inputs)) + + for _, input := range inputs { + if err := fileutils.ValidateFilename(input.Name); err != nil { + return fmt.Errorf("invalid component name %#q:\n%w", input.Name, err) + } + + if err := validateSpecRelPath(input.SpecRelPath); err != nil { + return fmt.Errorf("invalid spec path %#q for component %#q:\n%w", + input.SpecRelPath, input.Name, err) + } + + if seen[input.Name] { + return fmt.Errorf("duplicate component name %#q", input.Name) + } + + seen[input.Name] = true + } + + return nil +} + +// validateSpecRelPath rejects spec relative paths that could escape the +// specs-dir bind mount or contain control characters. +func validateSpecRelPath(relPath string) error { + if relPath == "" { + return errors.New("spec relative path cannot be empty") + } + + if filepath.IsAbs(relPath) { + return fmt.Errorf("spec path %#q must be relative", relPath) + } + + cleaned := filepath.Clean(relPath) + if cleaned != relPath { + return fmt.Errorf("spec path %#q must be in canonical form", relPath) + } + + if strings.Contains(cleaned, "..") { + return fmt.Errorf("spec path %#q must not contain path traversal", relPath) + } + + if strings.ContainsRune(relPath, 0) { + return fmt.Errorf("spec path %#q must not contain null bytes", relPath) + } + + return nil +} + +// specQueryInputJSON is the JSON-serializable form of [SpecQueryInput] +// written into inputs.json for the embedded Python helper. +type specQueryInputJSON struct { + Name string `json:"name"` + SpecRelPath string `json:"specRelPath"` + SrpmQueryFormat string `json:"srpmQueryFormat"` + SubpackagesQueryFormat string `json:"subpackagesQueryFormat"` + With []string `json:"with,omitempty"` + Without []string `json:"without,omitempty"` + Defines map[string]string `json:"defines,omitempty"` +} + +// specQueryResultJSON mirrors the per-component JSON shape written by +// query_process.py. +type specQueryResultJSON struct { + Name string `json:"name"` + SrpmOut string `json:"srpmOut"` + BinOut string `json:"binOut"` + Error *string `json:"error"` + ExcludedFromArch bool `json:"excludedFromArch,omitempty"` +} + +// BatchQuerySpecs runs `rpmspec` against multiple rendered spec files inside +// the shared mock chroot, parallelizing the per-spec invocations via an +// embedded Python helper. Returns one [SpecQueryResult] per input, in input +// order. +// +// specsDir is the host directory containing the rendered specs tree (i.e. +// the project's rendered-specs-dir). Each input's SpecRelPath is resolved +// relative to specsDir. scratchDir is a small host-side scratch directory +// used to ferry the script + inputs.json + results.json in and out of the +// chroot; it must be writable by the user the chroot runs as (mock's +// chrootuid defaults to os.getuid()). +// +// arch sets the rpmspec build target (e.g. "x86_64", "aarch64") via +// --target=. When empty, rpmspec uses its built-in default (the host +// arch). Specs that ExclusiveArch/ExcludeArch-exclude the target arch are +// surfaced via [SpecQueryResult.ExcludedFromArch] rather than as errors. +func (p *MockProcessor) BatchQuerySpecs( + ctx context.Context, events opctx.EventListener, + specsDir, scratchDir, arch string, + inputs []SpecQueryInput, + fs opctx.FS, maxWorkers int, +) ([]SpecQueryResult, error) { + if len(inputs) == 0 { + return nil, nil + } + + if err := validateSpecQueryInputs(inputs); err != nil { + return nil, err + } + + jsonInputs := make([]specQueryInputJSON, len(inputs)) + for idx, input := range inputs { + jsonInputs[idx] = specQueryInputJSON{ + Name: input.Name, + SpecRelPath: input.SpecRelPath, + SrpmQueryFormat: rpm.SrpmQueryFormat, + SubpackagesQueryFormat: rpm.SubpackagesQueryFormat, + With: input.With, + Without: input.Without, + Defines: input.Defines, + } + } + + inputsBytes, err := json.Marshal(jsonInputs) + if err != nil { + return nil, fmt.Errorf("marshaling spec query inputs:\n%w", err) + } + + slog.Info("Batch-querying rendered specs in mock chroot", "count", len(inputs)) + + const ( + chrootScratchPath = "/tmp/query" + chrootSpecsPath = "/tmp/specs" + ) + + workers := strconv.Itoa(max(1, maxWorkers)) + + rawResults, err := p.runBatchScript(ctx, events, runBatchScriptOptions{ + Mounts: []batchBindMount{ + {Host: scratchDir, InChroot: chrootScratchPath}, + {Host: specsDir, InChroot: chrootSpecsPath}, + }, + ScratchHost: scratchDir, + ScratchInChroot: chrootScratchPath, + ScriptName: "query_process.py", + ScriptBytes: queryProcessScript, + InputsJSON: inputsBytes, + ResultsName: "results.json", + ScriptArgs: []string{chrootScratchPath, chrootSpecsPath, workers, arch}, + ProgressLabel: "Querying specs in mock chroot", + ProgressTotal: int64(len(inputs)), + FS: fs, + }) + if err != nil { + return nil, err + } + + return parseSpecQueryBatchJSON(rawResults, inputs) +} + +// parseSpecQueryBatchJSON parses the JSON array produced by query_process.py +// into [SpecQueryResult] values. Per-component rpmspec failures are surfaced +// as a non-nil Error on the result; parse failures of an otherwise-successful +// rpmspec invocation are likewise surfaced per component. +func parseSpecQueryBatchJSON(raw []byte, inputs []SpecQueryInput) ([]SpecQueryResult, error) { + var jsonResults []specQueryResultJSON + if err := json.Unmarshal(raw, &jsonResults); err != nil { + return nil, fmt.Errorf("parsing spec query batch results JSON:\n%w", err) + } + + resultMap := make(map[string]*specQueryResultJSON, len(jsonResults)) + for idx := range jsonResults { + resultMap[jsonResults[idx].Name] = &jsonResults[idx] + } + + results := make([]SpecQueryResult, len(inputs)) + + for idx, input := range inputs { + results[idx].Name = input.Name + + compResult, ok := resultMap[input.Name] + if !ok { + results[idx].Error = fmt.Errorf("no result returned for %#q", input.Name) + + continue + } + + if compResult.Error != nil { + results[idx].Error = fmt.Errorf("%s", *compResult.Error) + + continue + } + + if compResult.ExcludedFromArch { + results[idx].ExcludedFromArch = true + + continue + } + + info, parseErr := rpm.ParseSrpmQueryOutput(input.SpecRelPath, compResult.SrpmOut) + if parseErr != nil { + results[idx].Error = fmt.Errorf("parsing rpmspec --srpm output:\n%w", parseErr) + + continue + } + + info.Subpackages = rpm.ParseSubpackagesOutput(compResult.BinOut) + results[idx].Info = info + } + + return results, nil +} diff --git a/internal/app/azldev/core/sources/specquery_test.go b/internal/app/azldev/core/sources/specquery_test.go new file mode 100644 index 00000000..99414fcb --- /dev/null +++ b/internal/app/azldev/core/sources/specquery_test.go @@ -0,0 +1,214 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//nolint:testpackage // Testing unexported parseSpecQueryBatchJSON. +package sources + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseSpecQueryBatchJSON_Success(t *testing.T) { + t.Parallel() + + raw := []byte(`[{ + "name": "curl", + "srpmOut": "name=curl\nepoch=(none)\nversion=8.5.0\nrelease=1.azl3\n", + "binOut": "subpkg=curl\nsubpkg=libcurl\nsubpkg=curl-devel\n", + "error": null + }]`) + + inputs := []SpecQueryInput{{Name: "curl", SpecRelPath: "c/curl/curl.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.NoError(t, results[0].Error) + require.NotNil(t, results[0].Info) + + assert.Equal(t, "curl", results[0].Info.Name) + assert.Equal(t, "8.5.0", results[0].Info.Version.Version()) + assert.Equal(t, "1.azl3", results[0].Info.Version.Release()) + assert.Equal(t, []string{"curl", "libcurl", "curl-devel"}, results[0].Info.Subpackages) +} + +func TestParseSpecQueryBatchJSON_PerComponentError(t *testing.T) { + t.Parallel() + + raw := []byte(`[ + {"name":"broken","srpmOut":"","binOut":"","error":"rpmspec --srpm failed: bad spec"} + ]`) + + inputs := []SpecQueryInput{{Name: "broken", SpecRelPath: "b/broken/broken.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.Error(t, results[0].Error) + assert.Contains(t, results[0].Error.Error(), "rpmspec --srpm failed") + assert.Nil(t, results[0].Info) +} + +func TestParseSpecQueryBatchJSON_MissingComponent(t *testing.T) { + t.Parallel() + + raw := []byte(`[]`) + inputs := []SpecQueryInput{{Name: "ghost", SpecRelPath: "g/ghost/ghost.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.Error(t, results[0].Error) + assert.Contains(t, results[0].Error.Error(), "no result returned") +} + +func TestParseSpecQueryBatchJSON_SrpmParseFailure(t *testing.T) { + t.Parallel() + + // srpmOut is missing required fields, so the per-component parser fails. + raw := []byte(`[{ + "name": "weird", + "srpmOut": "name=weird\n", + "binOut": "subpkg=weird\n", + "error": null + }]`) + + inputs := []SpecQueryInput{{Name: "weird", SpecRelPath: "w/weird/weird.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.Error(t, results[0].Error) + assert.Contains(t, results[0].Error.Error(), "parsing rpmspec --srpm output") + assert.Nil(t, results[0].Info) +} + +func TestParseSpecQueryBatchJSON_MultipleComponents(t *testing.T) { + t.Parallel() + + raw := []byte(`[ + {"name":"good","srpmOut":"name=good\nepoch=0\nversion=1.0\nrelease=1\n","binOut":"subpkg=good\n","error":null}, + {"name":"bad","srpmOut":"","binOut":"","error":"rpmspec failed: boom"} + ]`) + + inputs := []SpecQueryInput{ + {Name: "good", SpecRelPath: "g/good/good.spec"}, + {Name: "bad", SpecRelPath: "b/bad/bad.spec"}, + } + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 2) + require.NoError(t, results[0].Error) + require.NotNil(t, results[0].Info) + assert.Equal(t, []string{"good"}, results[0].Info.Subpackages) + require.Error(t, results[1].Error) + assert.Contains(t, results[1].Error.Error(), "boom") +} + +func TestParseSpecQueryBatchJSON_InvalidJSON(t *testing.T) { + t.Parallel() + + inputs := []SpecQueryInput{{Name: "any", SpecRelPath: "a/any/any.spec"}} + + _, err := parseSpecQueryBatchJSON([]byte("not json{{{"), inputs) + require.Error(t, err) + assert.Contains(t, err.Error(), "parsing spec query batch results JSON") +} + +func TestParseSpecQueryBatchJSON_ExcludedFromArch(t *testing.T) { + t.Parallel() + + raw := []byte(`[{ + "name": "shim", + "srpmOut": "", + "binOut": "", + "error": null, + "excludedFromArch": true + }]`) + + inputs := []SpecQueryInput{{Name: "shim", SpecRelPath: "s/shim/shim.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.NoError(t, results[0].Error) + assert.True(t, results[0].ExcludedFromArch) + assert.Nil(t, results[0].Info) +} + +func TestValidateSpecQueryInputs(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + inputs []SpecQueryInput + wantErr bool + errMsg string + }{ + { + name: "valid", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "c/curl/curl.spec"}}, + }, + { + name: "empty name", + inputs: []SpecQueryInput{{Name: "", SpecRelPath: "c/curl/curl.spec"}}, + wantErr: true, errMsg: "invalid component name", + }, + { + name: "slash in name", + inputs: []SpecQueryInput{{Name: "c/curl", SpecRelPath: "c/curl/curl.spec"}}, + wantErr: true, errMsg: "invalid component name", + }, + { + name: "empty rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: ""}}, + wantErr: true, errMsg: "spec relative path cannot be empty", + }, + { + name: "absolute rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "/c/curl/curl.spec"}}, + wantErr: true, errMsg: "must be relative", + }, + { + name: "traversal in rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "c/curl/../../etc/passwd"}}, + wantErr: true, errMsg: "must be in canonical form", + }, + { + name: "canonical traversal in rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "../etc/passwd"}}, + wantErr: true, errMsg: "must not contain path traversal", + }, + { + name: "non-canonical rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "c//curl/curl.spec"}}, + wantErr: true, errMsg: "must be in canonical form", + }, + { + name: "duplicate name", + inputs: []SpecQueryInput{ + {Name: "curl", SpecRelPath: "c/curl/curl.spec"}, + {Name: "curl", SpecRelPath: "c/curl/curl.spec"}, + }, + wantErr: true, errMsg: "duplicate component name", + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + err := validateSpecQueryInputs(testCase.inputs) + if testCase.wantErr { + require.Error(t, err) + assert.Contains(t, err.Error(), testCase.errMsg) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/internal/rpm/specquery.go b/internal/rpm/specquery.go index 4fda0017..f4766fe5 100644 --- a/internal/rpm/specquery.go +++ b/internal/rpm/specquery.go @@ -26,6 +26,10 @@ type SpecInfo struct { Name string Version Version RequiredFiles []string + // Subpackages lists the binary package names the spec produces, in the + // order rpmspec reports them. Empty when not queried (e.g., the + // per-component buildenv path that only requests --srpm output). + Subpackages []string } // NewSpecQuerier constructs a new [SpecQuerier] instance that will use the provided [buildenv.BuildEnv] @@ -105,7 +109,7 @@ func (q *SpecQuerier) composeRpmspecCmdline(specPath string) (result []string) { "-D", "_specdir " + specDirPath, "-D", "with_check 0", "--queryformat", - "name=%{name}\nepoch=%{epoch}\nversion=%{version}\nrelease=%{release}\n[source=%{SOURCE}\n][patch=%{PATCH}\n]", + SrpmQueryFormat, } for _, name := range q.buildOptions.With { @@ -125,6 +129,59 @@ func (q *SpecQuerier) composeRpmspecCmdline(specPath string) (result []string) { return result } +// Constants for the rpmspec queryformat strings used by both the per-component +// (legacy) path and the batched query path. Exported so the batched path in +// [github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources] +// can build the same command lines without duplicating the format strings. +const ( + // SrpmQueryFormat extracts the SRPM-level NEVR plus the source/patch files + // referenced by the spec. Used with rpmspec -q --srpm. + SrpmQueryFormat = "name=%{name}\n" + + "epoch=%{epoch}\n" + + "version=%{version}\n" + + "release=%{release}\n" + + "[source=%{SOURCE}\n]" + + "[patch=%{PATCH}\n]" + + // SubpackagesQueryFormat enumerates the binary subpackage names a spec + // would produce, one per line. Used with rpmspec -q (no --srpm). + SubpackagesQueryFormat = "subpkg=%{name}\n" +) + +// ParseSrpmQueryOutput parses the stdout of `rpmspec -q --srpm --queryformat +// SrpmQueryFormat ` and returns the populated [SpecInfo] (without +// Subpackages). Exposed so batched callers can reuse the parsing logic. +func ParseSrpmQueryOutput(specPath, output string) (*SpecInfo, error) { + return parseRpmspecOutput(specPath, output) +} + +// ParseSubpackagesOutput parses the stdout of `rpmspec -q --queryformat +// SubpackagesQueryFormat ` into the list of binary subpackage names in +// the order rpmspec emitted them. Whitespace-only lines, blank prefixes, and +// non-`subpkg=` lines (warnings, debug noise) are silently skipped. +func ParseSubpackagesOutput(output string) []string { + var result []string + + for _, line := range strings.Split(output, "\n") { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + + if strings.HasPrefix(trimmed, "error: ") || strings.HasPrefix(trimmed, "warning: ") { + slog.Debug("Ignoring rpmspec error", "line", trimmed) + + continue + } + + if after, ok := strings.CutPrefix(trimmed, "subpkg="); ok && after != "" { + result = append(result, after) + } + } + + return result +} + //nolint:cyclop // This function's complexity is due to the if/else-if cases for parsing. func parseRpmspecOutput(specPath, output string) (specInfo *SpecInfo, err error) { var name, epoch, version, release string diff --git a/internal/rpm/specquery_test.go b/internal/rpm/specquery_test.go index 2cf1a894..735fcfdb 100644 --- a/internal/rpm/specquery_test.go +++ b/internal/rpm/specquery_test.go @@ -382,3 +382,79 @@ func requireNewVersion(t *testing.T, versionStr string) rpm.Version { return *version } + +func TestParseSubpackagesOutput(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + output string + want []string + }{ + { + name: "empty output", + output: "", + want: nil, + }, + { + name: "single subpackage", + output: "subpkg=curl\n", + want: []string{"curl"}, + }, + { + name: "multiple subpackages with whitespace", + output: "subpkg=curl\nsubpkg=libcurl\n\nsubpkg=curl-devel\n", + want: []string{"curl", "libcurl", "curl-devel"}, + }, + { + name: "ignores warnings and errors", + output: "warning: some macro thing\nerror: another\nsubpkg=foo\n", + want: []string{"foo"}, + }, + { + name: "ignores unknown lines", + output: "garbage line\nsubpkg=foo\nother=bar\n", + want: []string{"foo"}, + }, + { + name: "skips empty values", + output: "subpkg=\nsubpkg=valid\n", + want: []string{"valid"}, + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + got := rpm.ParseSubpackagesOutput(testCase.output) + assert.Equal(t, testCase.want, got) + }) + } +} + +func TestParseSrpmQueryOutput_Success(t *testing.T) { + t.Parallel() + + output := "name=curl\nepoch=(none)\nversion=8.5.0\nrelease=1.azl3\n" + + "source=https://example.com/curl-8.5.0.tar.xz\npatch=fix.patch\n" + + info, err := rpm.ParseSrpmQueryOutput("/specs/c/curl/curl.spec", output) + require.NoError(t, err) + assert.Equal(t, "curl", info.Name) + assert.Equal(t, "8.5.0", info.Version.Version()) + assert.Equal(t, "1.azl3", info.Version.Release()) + assert.Equal(t, []string{"https://example.com/curl-8.5.0.tar.xz", "fix.patch"}, info.RequiredFiles) + assert.Empty(t, info.Subpackages, "Subpackages is populated by the caller, not the parser") +} + +func TestParseSrpmQueryOutput_MissingField(t *testing.T) { + t.Parallel() + + // Missing release line. + output := "name=curl\nepoch=0\nversion=8.5.0\n" + + _, err := rpm.ParseSrpmQueryOutput("/specs/c/curl/curl.spec", output) + require.Error(t, err) + assert.Contains(t, err.Error(), "missing required fields") +} diff --git a/pkg/app/azldev_cli/azldev.go b/pkg/app/azldev_cli/azldev.go index 90bd99fe..54872aa9 100644 --- a/pkg/app/azldev_cli/azldev.go +++ b/pkg/app/azldev_cli/azldev.go @@ -14,6 +14,7 @@ import ( "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/image" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/pkg" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/project" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/repo" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/version" ) @@ -41,6 +42,7 @@ func InstantiateApp() *azldev.App { image.OnAppInit(app) pkg.OnAppInit(app) project.OnAppInit(app) + repo.OnAppInit(app) version.OnAppInit(app) return app diff --git a/pkg/app/azldev_cli/azldev_test.go b/pkg/app/azldev_cli/azldev_test.go index 5a539ff7..acbcc855 100644 --- a/pkg/app/azldev_cli/azldev_test.go +++ b/pkg/app/azldev_cli/azldev_test.go @@ -29,6 +29,7 @@ func TestInstantiateApp(t *testing.T) { "image", "package", "project", + "repo", "version", }, ) diff --git a/scenario/component_query_test.go b/scenario/component_query_test.go index b8340d32..68240ba5 100644 --- a/scenario/component_query_test.go +++ b/scenario/component_query_test.go @@ -13,7 +13,8 @@ import ( "github.com/stretchr/testify/require" ) -// We test running `azldev query component` to make sure that spec parsing works as expected. +// We test running `azldev component query` to make sure that batch rpmspec +// processing against the rendered specs tree works as expected. func TestQueryingAComponent(t *testing.T) { t.Parallel() @@ -22,23 +23,28 @@ func TestQueryingAComponent(t *testing.T) { t.Skip("skipping long test") } - // Create a simple spec with a known name and version. + // Create a simple spec with a known name and version. Add a subpackage + // so we can also verify that 'query' reports the binary subpackages. spec := projecttest.NewSpec( projecttest.WithName("test-component"), projecttest.WithVersion("3.1.4.159"), + projecttest.WithSubpackage("extra"), ) - // Create a simple project with the spec, using test default configs for distro and mock configurations. + // Create a simple project with the spec, using test default configs for + // distro and mock configurations. project := projecttest.NewDynamicTestProject( projecttest.AddSpec(spec), projecttest.UseTestDefaultConfigs(), ) - // Run the component query command with test default configs copied into the container. + // 'component query' now reads from the rendered specs tree, so render + // first as a pre-command and then query. results := projecttest.NewProjectTest( project, []string{"component", "query", spec.GetName()}, projecttest.WithTestDefaultConfigs(), + projecttest.WithPreCommand("component", "render", "-a"), ).RunInContainer(t) // Get the parsed JSON output. @@ -61,4 +67,22 @@ func TestQueryingAComponent(t *testing.T) { require.True(t, ok, "Version field is not a map") require.Contains(t, versionMap, "Version") assert.Equal(t, spec.GetVersion(), versionMap["Version"]) + + // Check that subpackages were extracted. + require.Contains(t, componentOutput, "Subpackages") + subpackages, ok := componentOutput["Subpackages"].([]interface{}) + require.True(t, ok, "Subpackages should be a list") + + subpkgNames := make([]string, 0, len(subpackages)) + for _, sp := range subpackages { + name, ok := sp.(string) + require.True(t, ok, "Subpackage entry should be a string") + + subpkgNames = append(subpkgNames, name) + } + + assert.Contains(t, subpkgNames, spec.GetName(), + "Subpackages should include the main package") + assert.Contains(t, subpkgNames, spec.GetName()+"-extra", + "Subpackages should include the explicitly-added subpackage") } diff --git a/scenario/internal/projecttest/testspec.go b/scenario/internal/projecttest/testspec.go index 6104beb3..15858b09 100644 --- a/scenario/internal/projecttest/testspec.go +++ b/scenario/internal/projecttest/testspec.go @@ -15,10 +15,11 @@ const NoArch = "noarch" // TestSpec represents an RPM spec being composed for testing purposes. type TestSpec struct { - name string - version string - release string - buildArch string + name string + version string + release string + buildArch string + subpackages []string } // NewSpec creates a new [TestSpec] with the specified options. @@ -81,6 +82,15 @@ func WithBuildArch(arch string) TestSpecOption { } } +// WithSubpackage appends an additional binary subpackage (named +// "-") to the spec. The subpackage shares the main +// package's installed file so that rpmbuild would also be happy with it. +func WithSubpackage(suffix string) TestSpecOption { + return func(s *TestSpec) { + s.subpackages = append(s.subpackages, suffix) + } +} + // Render generates the spec file content as a string. func (s *TestSpec) Render() string { lines := []string{ @@ -100,6 +110,20 @@ func (s *TestSpec) Render() string { "%description", "Test component for, you know, testing.", "", + }...) + + for _, sub := range s.subpackages { + lines = append(lines, []string{ + "%package " + sub, + "Summary: A test subpackage", + "", + "%description " + sub, + "Subpackage " + sub + " for testing.", + "", + }...) + } + + lines = append(lines, []string{ "%build", "echo hello >file.txt", "", @@ -112,5 +136,13 @@ func (s *TestSpec) Render() string { "", }...) + for _, sub := range s.subpackages { + lines = append(lines, []string{ + "%files " + sub, + "%{_datadir}/test-component", + "", + }...) + } + return strings.Join(lines, "\n") }