From 5043a571653e84bbf028416cac76c990c8a391ef Mon Sep 17 00:00:00 2001 From: reuben olinsky Date: Mon, 18 May 2026 23:30:06 +0000 Subject: [PATCH 1/2] draft: bulk spec querying --- docs/user/reference/cli/azldev.md | 2 +- docs/user/reference/cli/azldev_advanced.md | 2 +- docs/user/reference/cli/azldev_completion.md | 2 +- docs/user/reference/cli/azldev_component.md | 4 +- .../reference/cli/azldev_component_query.md | 17 +- docs/user/reference/cli/azldev_config.md | 2 +- docs/user/reference/cli/azldev_docs.md | 2 +- docs/user/reference/cli/azldev_image.md | 2 +- docs/user/reference/cli/azldev_package.md | 2 +- docs/user/reference/cli/azldev_project.md | 2 +- docs/user/reference/cli/azldev_version.md | 2 +- .../app/azldev/cmds/component/mockproc.go | 96 +++++++ internal/app/azldev/cmds/component/query.go | 209 ++++++++++++-- .../cmds/component/query_internal_test.go | 153 ++++++++++ .../app/azldev/cmds/component/query_test.go | 71 ++--- internal/app/azldev/cmds/component/render.go | 24 +- .../app/azldev/core/sources/mockprocessor.go | 196 ++++++++----- .../app/azldev/core/sources/query_process.py | 271 ++++++++++++++++++ internal/app/azldev/core/sources/specquery.go | 237 +++++++++++++++ .../app/azldev/core/sources/specquery_test.go | 193 +++++++++++++ internal/rpm/specquery.go | 59 +++- internal/rpm/specquery_test.go | 76 +++++ scenario/component_query_test.go | 32 ++- scenario/internal/projecttest/testspec.go | 40 ++- 24 files changed, 1529 insertions(+), 167 deletions(-) create mode 100644 internal/app/azldev/cmds/component/mockproc.go create mode 100644 internal/app/azldev/cmds/component/query_internal_test.go create mode 100644 internal/app/azldev/core/sources/query_process.py create mode 100644 internal/app/azldev/core/sources/specquery.go create mode 100644 internal/app/azldev/core/sources/specquery_test.go diff --git a/docs/user/reference/cli/azldev.md b/docs/user/reference/cli/azldev.md index 00c8b4db..54145fed 100644 --- a/docs/user/reference/cli/azldev.md +++ b/docs/user/reference/cli/azldev.md @@ -2,7 +2,7 @@ ## azldev -🐧 Azure Linux Dev Tool +🐧 Azure Linux Dev Tool 0.0.0-devel ### Synopsis diff --git a/docs/user/reference/cli/azldev_advanced.md b/docs/user/reference/cli/azldev_advanced.md index 85dc88d0..51dbd9f4 100644 --- a/docs/user/reference/cli/azldev_advanced.md +++ b/docs/user/reference/cli/azldev_advanced.md @@ -36,7 +36,7 @@ output but fully supported. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev advanced download-sources](azldev_advanced_download-sources.md) - Download source files listed in a Fedora-format sources file * [azldev advanced mcp](azldev_advanced_mcp.md) - Run in MCP server mode * [azldev advanced mock](azldev_advanced_mock.md) - Run RPM mock tool diff --git a/docs/user/reference/cli/azldev_completion.md b/docs/user/reference/cli/azldev_completion.md index 77e5c161..3afb0763 100644 --- a/docs/user/reference/cli/azldev_completion.md +++ b/docs/user/reference/cli/azldev_completion.md @@ -34,7 +34,7 @@ See each sub-command's help for details on how to use the generated script. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev completion bash](azldev_completion_bash.md) - Generate the autocompletion script for bash * [azldev completion fish](azldev_completion_fish.md) - Generate the autocompletion script for fish * [azldev completion powershell](azldev_completion_powershell.md) - Generate the autocompletion script for powershell diff --git a/docs/user/reference/cli/azldev_component.md b/docs/user/reference/cli/azldev_component.md index 0516a5e8..be0a70e7 100644 --- a/docs/user/reference/cli/azldev_component.md +++ b/docs/user/reference/cli/azldev_component.md @@ -37,14 +37,14 @@ components defined in the project configuration. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev component add](azldev_component_add.md) - Add component(s) to this project * [azldev component build](azldev_component_build.md) - Build packages for components * [azldev component changed](azldev_component_changed.md) - Detect which components changed between two git refs * [azldev component diff-sources](azldev_component_diff-sources.md) - Show the diff that overlays apply to a component's sources * [azldev component list](azldev_component_list.md) - List components in this project * [azldev component prepare-sources](azldev_component_prepare-sources.md) - Prepare buildable sources for components -* [azldev component query](azldev_component_query.md) - Query info for components in this project +* [azldev component query](azldev_component_query.md) - Query info from locally rendered component specs * [azldev component render](azldev_component_render.md) - Render post-overlay specs and sidecar files to a checked-in directory * [azldev component update](azldev_component_update.md) - Resolve and lock source identities for components diff --git a/docs/user/reference/cli/azldev_component_query.md b/docs/user/reference/cli/azldev_component_query.md index 03a49fe1..92f2aa1e 100644 --- a/docs/user/reference/cli/azldev_component_query.md +++ b/docs/user/reference/cli/azldev_component_query.md @@ -2,16 +2,21 @@ ## azldev component query -Query info for components in this project +Query info from locally rendered component specs ### Synopsis -Query detailed information for components by fetching and parsing their spec files. +Query detailed information for components from their locally rendered specs. -Unlike 'list', which only shows configuration metadata, 'query' resolves -upstream sources and parses the RPM spec to report version, release, -subpackages, dependencies, and other spec-level details. This makes it -slower than 'list' but more informative. +This command reads the post-overlay specs from the project's rendered-specs-dir +(produced by 'azldev component render') and runs rpmspec against them in a +single shared mock chroot, batching all specs into one chroot invocation with +parallel per-spec processing. For each component, it reports the source NEVR +and the list of binary subpackages the spec would produce when built. + +The rendered-specs-dir must exist on disk; if it doesn't, run +'azldev component render' first. Components that previously failed to render +(those with a RENDER_FAILED marker file) are skipped with a warning. ``` azldev component query [flags] diff --git a/docs/user/reference/cli/azldev_config.md b/docs/user/reference/cli/azldev_config.md index f162e679..97254fcf 100644 --- a/docs/user/reference/cli/azldev_config.md +++ b/docs/user/reference/cli/azldev_config.md @@ -35,7 +35,7 @@ JSON schema used for validating TOML config files. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev config dump](azldev_config_dump.md) - Dump the current configuration * [azldev config generate-schema](azldev_config_generate-schema.md) - Generates JSON schema for validating .toml config files diff --git a/docs/user/reference/cli/azldev_docs.md b/docs/user/reference/cli/azldev_docs.md index 4b82fab1..a2e38165 100644 --- a/docs/user/reference/cli/azldev_docs.md +++ b/docs/user/reference/cli/azldev_docs.md @@ -35,6 +35,6 @@ command tree, suitable for inclusion in the user guide. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev docs markdown](azldev_docs_markdown.md) - Generates Markdown (.md) docs for this tool diff --git a/docs/user/reference/cli/azldev_image.md b/docs/user/reference/cli/azldev_image.md index 92f12315..6adbfca4 100644 --- a/docs/user/reference/cli/azldev_image.md +++ b/docs/user/reference/cli/azldev_image.md @@ -36,7 +36,7 @@ can be customized using Azure Linux Image Customizer. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev image boot](azldev_image_boot.md) - Boot an Azure Linux image in a QEMU VM * [azldev image build](azldev_image_build.md) - Build an image using kiwi-ng * [azldev image customize](azldev_image_customize.md) - Customizes a pre-built Azure Linux image diff --git a/docs/user/reference/cli/azldev_package.md b/docs/user/reference/cli/azldev_package.md index d3fefb25..23ee54c6 100644 --- a/docs/user/reference/cli/azldev_package.md +++ b/docs/user/reference/cli/azldev_package.md @@ -36,6 +36,6 @@ publish channel assignments derived from package groups and component overrides. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev package list](azldev_package_list.md) - List resolved configuration for packages (RPMs and SRPMs) diff --git a/docs/user/reference/cli/azldev_project.md b/docs/user/reference/cli/azldev_project.md index fed57377..6cfccbb0 100644 --- a/docs/user/reference/cli/azldev_project.md +++ b/docs/user/reference/cli/azldev_project.md @@ -35,7 +35,7 @@ as an Azure Linux project with a basic configuration. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel * [azldev project init](azldev_project_init.md) - Initialize the current working directory with a basic Azure Linux project config * [azldev project new](azldev_project_new.md) - Create a new Azure Linux project with basic config diff --git a/docs/user/reference/cli/azldev_version.md b/docs/user/reference/cli/azldev_version.md index f24b9ea1..5397d5a8 100644 --- a/docs/user/reference/cli/azldev_version.md +++ b/docs/user/reference/cli/azldev_version.md @@ -40,5 +40,5 @@ azldev version -O json ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel diff --git a/internal/app/azldev/cmds/component/mockproc.go b/internal/app/azldev/cmds/component/mockproc.go new file mode 100644 index 00000000..bffa9fed --- /dev/null +++ b/internal/app/azldev/cmds/component/mockproc.go @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "log/slog" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" +) + +// Required-package presets for the shared MockProcessor. +// +// Render needs rpmautospec (macro expansion), rpmdevtools (spectool), and git +// (required for rpmautospec to read commit history). python3-click is required +// by rpmautospec but not declared as an RPM dependency. Ecosystem macro +// packages (go-srpm-macros, etc.) are already present via @buildsys-build → +// azurelinux-rpm-config. +// +// Query needs rpm-build for the `rpmspec` binary. It's typically already +// pulled in via @buildsys-build, but we install it explicitly so we don't +// depend on a particular buildgroup composition. +func mockPackagesForRender() []string { + return []string{"rpmautospec", "rpmdevtools", "git", "python3-click"} +} + +func mockPackagesForQuery() []string { + // rpm-build provides rpmspec; python3 is needed to run query_process.py. + // (The render path gets python3 transitively via python3-click, but the + // query path doesn't install rpmautospec/python3-click.) + // + // Additional macro packages are installed so that build-time macros + // affecting %files / %package expansion (and therefore --builtrpms + // output) resolve during rpmspec parsing. Without these, --builtrpms + // under-reports subpackages for specs that generate their %files + // sections via macros, or that use macros like %pyproject_extras_subpkg + // to emit whole subpackage stanzas at parse time. + // + // Curated list of common macro packages that emit %package / %files in + // the Azure Linux spec corpus: + // * fonts-rpm-macros — %fontfiles, %fontfamily_subpkg, etc. + // * pyproject-rpm-macros — %pyproject_extras_subpkg + // * java-srpm-macros, javapackages-tools — %mvn_package, %mvn_install, + // auto -javadoc subpackages, + // jp_minimal bcond default + // * ghc-rpm-macros — %ghc_lib_subpackage and ghc_prof/haddock + // bcond defaults. Requires the + // ghc_version_override define set by + // query_process.py to avoid shelling out + // to a `ghc` binary that isn't installed + // in the chroot. + // + // We install `java-srpm-macros` (the actual binary RPM) rather than + // `java-rpm-macros`, which is the SRPM name; the latter has no + // `%files` section for the main package and is not a buildable binary. + // + // Macros that only affect %prep/%build/%install (e.g. %cargo_install, + // %py3_build) don't need to be added — they don't change which binary + // RPMs would be built. + return []string{ + "rpm-build", + "python3", + "fonts-rpm-macros", + "pyproject-rpm-macros", + "java-srpm-macros", + "javapackages-tools", + "ghc-rpm-macros", + } +} + +// createMockProcessor creates a [sources.MockProcessor] using the project's +// mock config. Returns nil if the mock config is not available (e.g., no project +// config loaded, or no mock config path configured). +// +// requiredPackages is the set of packages to install in the chroot on first +// use. Use one of the mockPackagesFor* presets above to pick the right set +// for the calling command. +func createMockProcessor(env *azldev.Env, requiredPackages []string) *sources.MockProcessor { + _, distroVerDef, err := env.Distro() + if err != nil { + slog.Info("Mock processor unavailable; could not resolve distro", "error", err) + + return nil + } + + if distroVerDef.MockConfigPath == "" { + slog.Info("Mock processor unavailable; no mock config path configured") + + return nil + } + + slog.Info("Mock processor available", "mockConfig", distroVerDef.MockConfigPath) + + return sources.NewMockProcessor(env, distroVerDef.MockConfigPath, requiredPackages) +} diff --git a/internal/app/azldev/cmds/component/query.go b/internal/app/azldev/cmds/component/query.go index 59985a73..f02f7c75 100644 --- a/internal/app/azldev/cmds/component/query.go +++ b/internal/app/azldev/cmds/component/query.go @@ -4,11 +4,17 @@ package component import ( + "errors" "fmt" + "log/slog" + "path/filepath" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/specs" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" "github.com/spf13/cobra" ) @@ -28,13 +34,18 @@ func NewComponentQueryCommand() *cobra.Command { cmd := &cobra.Command{ Use: "query", - Short: "Query info for components in this project", - Long: `Query detailed information for components by fetching and parsing their spec files. + Short: "Query info from locally rendered component specs", + Long: `Query detailed information for components from their locally rendered specs. -Unlike 'list', which only shows configuration metadata, 'query' resolves -upstream sources and parses the RPM spec to report version, release, -subpackages, dependencies, and other spec-level details. This makes it -slower than 'list' but more informative.`, +This command reads the post-overlay specs from the project's rendered-specs-dir +(produced by 'azldev component render') and runs rpmspec against them in a +single shared mock chroot, batching all specs into one chroot invocation with +parallel per-spec processing. For each component, it reports the source NEVR +and the list of binary subpackages the spec would produce when built. + +The rendered-specs-dir must exist on disk; if it doesn't, run +'azldev component render' first. Components that previously failed to render +(those with a RENDER_FAILED marker file) are skipped with a warning.`, Example: ` # Query a single component azldev component query -p curl @@ -58,35 +69,193 @@ type componentDetails struct { specs.ComponentSpecDetails } -// Queries env for component details, in accordance with options. Returns the found components. +// QueryComponents queries info for selected components by reading the locally +// rendered specs and running rpmspec against them in a single shared mock +// chroot. Returns one entry per successfully queried component, in the order +// returned by the resolver. Components with a RENDER_FAILED marker are +// skipped with a loud warning. Per-component rpmspec failures are surfaced +// as warnings; the corresponding entry is omitted from the result list and +// the function returns an aggregated error after attempting every component. +// +//nolint:cyclop,funlen // Linear pipeline; further splitting hurts readability. func QueryComponents( env *azldev.Env, options *QueryComponentsOptions, -) (results []*componentDetails, err error) { - var comps *components.ComponentSet +) ([]*componentDetails, error) { + renderedSpecsDir := env.Config().Project.RenderedSpecsDir + if renderedSpecsDir == "" { + return nil, errors.New( + "project.rendered-specs-dir is not configured; " + + "set it in the project config and run 'azldev component render' first") + } + + dirExists, err := fileutils.DirExists(env.FS(), renderedSpecsDir) + if err != nil { + return nil, fmt.Errorf("checking rendered-specs-dir %#q:\n%w", renderedSpecsDir, err) + } + + if !dirExists { + return nil, fmt.Errorf( + "rendered-specs-dir %#q does not exist; run 'azldev component render' first", + renderedSpecsDir) + } resolver := components.NewResolver(env) - comps, err = resolver.FindComponents(&options.ComponentFilter) + comps, err := resolver.FindComponents(&options.ComponentFilter) if err != nil { - return results, fmt.Errorf("failed to resolve components:\n%w", err) + return nil, fmt.Errorf("failed to resolve components:\n%w", err) + } + + if comps.Len() == 0 { + return nil, errors.New("no components were selected; " + + "please use command-line options to indicate which components to query") + } + + inputs, skipped, err := buildSpecQueryInputs(env, comps.Components(), renderedSpecsDir) + if err != nil { + return nil, err + } + + if len(inputs) == 0 { + return nil, fmt.Errorf("no components have a rendered spec on disk; skipped %d", skipped) } - allDetails := make([]*componentDetails, 0, comps.Len()) + mockProcessor := createMockProcessor(env, mockPackagesForQuery()) + if mockProcessor == nil { + return nil, errors.New( + "mock config required for querying; ensure the project has a valid distro with mock config") + } + + defer mockProcessor.Destroy(env) - for _, comp := range comps.Components() { - spec := comp.GetSpec() + if err := env.FS().MkdirAll(env.WorkDir(), fileperms.PublicDir); err != nil { + return nil, fmt.Errorf("creating work directory:\n%w", err) + } - specInfo, err := spec.Parse() - if err != nil { - return nil, fmt.Errorf("failed to parse spec for component %q:\n%w", comp.GetName(), err) + scratchDir, err := fileutils.MkdirTemp(env.FS(), env.WorkDir(), "azldev-query-scratch-") + if err != nil { + return nil, fmt.Errorf("creating scratch directory:\n%w", err) + } + + defer func() { + if removeErr := env.FS().RemoveAll(scratchDir); removeErr != nil { + slog.Debug("Failed to clean up scratch directory", "path", scratchDir, "error", removeErr) } + }() + + queryResults, err := mockProcessor.BatchQuerySpecs( + env, env, renderedSpecsDir, scratchDir, inputs, env.FS(), env.CPUBoundConcurrency(), + ) + if err != nil { + return nil, fmt.Errorf("batch-querying rendered specs:\n%w", err) + } + + allDetails := make([]*componentDetails, 0, len(queryResults)) + + var failed int - details := &componentDetails{ - ComponentSpecDetails: *specInfo, + for _, queryResult := range queryResults { + if queryResult.Error != nil { + slog.Warn("Failed to query rendered spec", + "component", queryResult.Name, "error", queryResult.Error) + + failed++ + + continue } - allDetails = append(allDetails, details) + allDetails = append(allDetails, &componentDetails{ + ComponentSpecDetails: specs.ComponentSpecDetails{ + SpecInfo: *queryResult.Info, + }, + }) + } + + if failed > 0 { + return allDetails, fmt.Errorf("%d component(s) failed to query (see warnings)", failed) } return allDetails, nil } + +// buildSpecQueryInputs walks the resolved components and constructs the list +// of [sources.SpecQueryInput] entries to pass to BatchQuerySpecs. Components +// whose rendered spec directory carries a RENDER_FAILED marker (or whose +// rendered .spec file is missing) are skipped with a loud warning and counted +// toward `skipped`. +func buildSpecQueryInputs( + env *azldev.Env, + componentList []components.Component, + renderedSpecsDir string, +) (inputs []sources.SpecQueryInput, skipped int, err error) { + inputs = make([]sources.SpecQueryInput, 0, len(componentList)) + + for _, comp := range componentList { + name := comp.GetName() + cfg := comp.GetConfig() + + if cfg.RenderedSpecDir == "" { + return nil, 0, fmt.Errorf( + "component %#q has no rendered-spec dir; ensure project.rendered-specs-dir is set", + name) + } + + if hasMarker, markerErr := hasRenderFailedMarker(env, cfg.RenderedSpecDir); markerErr != nil { + return nil, 0, fmt.Errorf("checking RENDER_FAILED marker for %#q:\n%w", name, markerErr) + } else if hasMarker { + slog.Warn( + "Skipping component: RENDER_FAILED marker present; run 'azldev component render' to refresh", + "component", name, "dir", cfg.RenderedSpecDir) + + skipped++ + + continue + } + + specPath := filepath.Join(cfg.RenderedSpecDir, name+".spec") + + specExists, statErr := fileutils.Exists(env.FS(), specPath) + if statErr != nil { + return nil, 0, fmt.Errorf("checking rendered spec %#q:\n%w", specPath, statErr) + } + + if !specExists { + slog.Warn( + "Skipping component: rendered spec not found; run 'azldev component render' to produce it", + "component", name, "expectedSpec", specPath) + + skipped++ + + continue + } + + relSpecPath, relErr := filepath.Rel(renderedSpecsDir, specPath) + if relErr != nil { + return nil, 0, fmt.Errorf("relativizing spec path %#q against %#q:\n%w", + specPath, renderedSpecsDir, relErr) + } + + inputs = append(inputs, sources.SpecQueryInput{ + Name: name, + SpecRelPath: relSpecPath, + With: cfg.Build.With, + Without: cfg.Build.Without, + Defines: cfg.Build.Defines, + }) + } + + return inputs, skipped, nil +} + +// hasRenderFailedMarker reports whether the given rendered-spec dir carries +// the marker file written by 'component render' on failure. +func hasRenderFailedMarker(env *azldev.Env, renderedSpecDir string) (bool, error) { + markerPath := filepath.Join(renderedSpecDir, renderErrorMarkerFile) + + exists, err := fileutils.Exists(env.FS(), markerPath) + if err != nil { + return false, fmt.Errorf("checking %#q:\n%w", markerPath, err) + } + + return exists, nil +} diff --git a/internal/app/azldev/cmds/component/query_internal_test.go b/internal/app/azldev/cmds/component/query_internal_test.go new file mode 100644 index 00000000..012d1f63 --- /dev/null +++ b/internal/app/azldev/cmds/component/query_internal_test.go @@ -0,0 +1,153 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "path/filepath" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/testutils" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// resolveComponents is a small helper to drive the component resolver in +// internal tests so we can call buildSpecQueryInputs with realistic inputs. +func resolveComponents(t *testing.T, testEnv *testutils.TestEnv, names ...string) []components.Component { + t.Helper() + + resolver := components.NewResolver(testEnv.Env) + + comps, err := resolver.FindComponents(&components.ComponentFilter{ + ComponentNamePatterns: names, + }) + require.NoError(t, err) + + return comps.Components() +} + +func TestBuildSpecQueryInputs_Happy(t *testing.T) { + const ( + componentName = "curl" + renderedSpecsDir = "/project/specs" + ) + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + testEnv.Config.Components[componentName] = projectconfig.ComponentConfig{ + Name: componentName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/project/curl.spec", + }, + Build: projectconfig.ComponentBuildConfig{ + With: []string{"foo"}, + Without: []string{"bar"}, + Defines: map[string]string{"key": "value"}, + }, + } + + // Spec source (for resolver) and rendered spec (for our path). + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), "/project/curl.spec", []byte(""), fileperms.PublicFile, + )) + + renderedDir := filepath.Join(renderedSpecsDir, "c", componentName) + require.NoError(t, fileutils.MkdirAll(testEnv.FS(), renderedDir)) + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), filepath.Join(renderedDir, componentName+".spec"), + []byte(""), fileperms.PublicFile, + )) + + resolved := resolveComponents(t, testEnv, componentName) + require.Len(t, resolved, 1) + + inputs, skipped, err := buildSpecQueryInputs(testEnv.Env, resolved, renderedSpecsDir) + require.NoError(t, err) + assert.Zero(t, skipped) + require.Len(t, inputs, 1) + + assert.Equal(t, componentName, inputs[0].Name) + assert.Equal(t, filepath.Join("c", componentName, componentName+".spec"), inputs[0].SpecRelPath) + assert.Equal(t, []string{"foo"}, inputs[0].With) + assert.Equal(t, []string{"bar"}, inputs[0].Without) + assert.Equal(t, map[string]string{"key": "value"}, inputs[0].Defines) +} + +func TestBuildSpecQueryInputs_SkipsRenderFailedMarker(t *testing.T) { + const ( + componentName = "curl" + renderedSpecsDir = "/project/specs" + ) + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + testEnv.Config.Components[componentName] = projectconfig.ComponentConfig{ + Name: componentName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/project/curl.spec", + }, + } + + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), "/project/curl.spec", []byte(""), fileperms.PublicFile, + )) + + renderedDir := filepath.Join(renderedSpecsDir, "c", componentName) + require.NoError(t, fileutils.MkdirAll(testEnv.FS(), renderedDir)) + // Spec file exists, but so does the marker — the marker wins. + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), filepath.Join(renderedDir, componentName+".spec"), + []byte(""), fileperms.PublicFile, + )) + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), filepath.Join(renderedDir, renderErrorMarkerFile), + []byte("RENDER FAILED"), fileperms.PublicFile, + )) + + resolved := resolveComponents(t, testEnv, componentName) + + inputs, skipped, err := buildSpecQueryInputs(testEnv.Env, resolved, renderedSpecsDir) + require.NoError(t, err) + assert.Empty(t, inputs) + assert.Equal(t, 1, skipped) +} + +func TestBuildSpecQueryInputs_SkipsMissingSpec(t *testing.T) { + const ( + componentName = "curl" + renderedSpecsDir = "/project/specs" + ) + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + testEnv.Config.Components[componentName] = projectconfig.ComponentConfig{ + Name: componentName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/project/curl.spec", + }, + } + + require.NoError(t, fileutils.WriteFile( + testEnv.FS(), "/project/curl.spec", []byte(""), fileperms.PublicFile, + )) + + // Rendered dir exists but the .spec inside it does not. + require.NoError(t, fileutils.MkdirAll( + testEnv.FS(), filepath.Join(renderedSpecsDir, "c", componentName), + )) + + resolved := resolveComponents(t, testEnv, componentName) + + inputs, skipped, err := buildSpecQueryInputs(testEnv.Env, resolved, renderedSpecsDir) + require.NoError(t, err) + assert.Empty(t, inputs) + assert.Equal(t, 1, skipped) +} diff --git a/internal/app/azldev/cmds/component/query_test.go b/internal/app/azldev/cmds/component/query_test.go index 9d605e02..8f61c6ee 100644 --- a/internal/app/azldev/cmds/component/query_test.go +++ b/internal/app/azldev/cmds/component/query_test.go @@ -4,15 +4,11 @@ package component_test import ( - "os/exec" "testing" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/component" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/testutils" - "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" - "github.com/microsoft/azure-linux-dev-tools/internal/rpm/mock" - "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -38,45 +34,56 @@ func TestComponentQueryCmd_NoMatch(t *testing.T) { require.Error(t, err) } -func TestQueryComponents_OneComponent(t *testing.T) { - const ( - testComponentName = "test-component" - testSpecPath = "/path/to/spec" - ) - +func TestQueryComponents_MissingRenderedSpecsDir(t *testing.T) { testEnv := testutils.NewTestEnv(t) - testEnv.Config.Components[testComponentName] = projectconfig.ComponentConfig{ - Name: testComponentName, - Spec: projectconfig.SpecSource{ - SourceType: projectconfig.SpecSourceTypeLocal, - Path: testSpecPath, + + // Test env constructProjectConfig leaves RenderedSpecsDir empty. + options := component.QueryComponentsOptions{ + ComponentFilter: components.ComponentFilter{ + ComponentNamePatterns: []string{"any"}, }, } - // Pretend mock is present. - testEnv.CmdFactory.RegisterCommandInSearchPath(mock.MockBinary) + _, err := component.QueryComponents(testEnv.Env, &options) + require.Error(t, err) + assert.Contains(t, err.Error(), "rendered-specs-dir is not configured") +} + +func TestQueryComponents_RenderedSpecsDirDoesNotExist(t *testing.T) { + const renderedSpecsDir = "/project/specs" - // Mock the rpmspec command to return valid output - // NOTE: This takes a dependency on knowing how rpmspec gets invoked. - testEnv.CmdFactory.RunAndGetOutputHandler = func(cmd *exec.Cmd) (string, error) { - // Return mock rpmspec output in the expected format: name|epoch|version|release - return "name=test-component\nepoch=0\nversion=1.0.0\nrelease=1.azl3\n", nil - } + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + // Do NOT create the directory on the test filesystem. options := component.QueryComponentsOptions{ ComponentFilter: components.ComponentFilter{ - ComponentNamePatterns: []string{testComponentName}, + ComponentNamePatterns: []string{"any"}, }, } - // Simulate the spec file existing. - err := fileutils.WriteFile(testEnv.FS(), testSpecPath, []byte("test spec content"), fileperms.PublicFile) - require.NoError(t, err) + _, err := component.QueryComponents(testEnv.Env, &options) + require.Error(t, err) + assert.Contains(t, err.Error(), "does not exist") +} + +// Smoke test: when filter matches no components, the resolver surfaces an +// error before any rendered-spec validation runs. +func TestQueryComponents_NoComponentsSelected(t *testing.T) { + const renderedSpecsDir = "/project/specs" + + testEnv := testutils.NewTestEnv(t) + testEnv.Config.Project.RenderedSpecsDir = renderedSpecsDir + + require.NoError(t, fileutils.MkdirAll(testEnv.FS(), renderedSpecsDir)) - results, err := component.QueryComponents(testEnv.Env, &options) - require.NoError(t, err) - require.Len(t, results, 1) + // No components configured at all. + options := component.QueryComponentsOptions{ + ComponentFilter: components.ComponentFilter{ + ComponentNamePatterns: []string{"nonexistent"}, + }, + } - result := results[0] - assert.Equal(t, testComponentName, result.Name) + _, err := component.QueryComponents(testEnv.Env, &options) + require.Error(t, err) } diff --git a/internal/app/azldev/cmds/component/render.go b/internal/app/azldev/cmds/component/render.go index 2f2cc54f..79477548 100644 --- a/internal/app/azldev/cmds/component/render.go +++ b/internal/app/azldev/cmds/component/render.go @@ -173,7 +173,7 @@ func RenderComponents(env *azldev.Env, options *RenderOptions) ([]*RenderResult, } // Create mock processor for rpmautospec/spectool. - mockProcessor := createMockProcessor(env) + mockProcessor := createMockProcessor(env, mockPackagesForRender()) if mockProcessor == nil { return nil, errors.New( "mock config required for rendering; ensure the project has a valid distro with mock config") @@ -1126,28 +1126,6 @@ func writeFailureMarkers( } } -// createMockProcessor creates a [sources.MockProcessor] using the project's -// mock config. Returns nil if the mock config is not available (e.g., no project -// config loaded, or no mock config path configured). -func createMockProcessor(env *azldev.Env) *sources.MockProcessor { - _, distroVerDef, err := env.Distro() - if err != nil { - slog.Info("Mock processor unavailable; could not resolve distro", "error", err) - - return nil - } - - if distroVerDef.MockConfigPath == "" { - slog.Info("Mock processor unavailable; no mock config path configured") - - return nil - } - - slog.Info("Mock processor available", "mockConfig", distroVerDef.MockConfigPath) - - return sources.NewMockProcessor(env, distroVerDef.MockConfigPath) -} - // validateCleanStaleOptions enforces the constraints around --clean-stale. // Extracted from RenderComponents to keep its complexity below the linter's // cyclomatic threshold. diff --git a/internal/app/azldev/core/sources/mockprocessor.go b/internal/app/azldev/core/sources/mockprocessor.go index 0951779e..964a7594 100644 --- a/internal/app/azldev/core/sources/mockprocessor.go +++ b/internal/app/azldev/core/sources/mockprocessor.go @@ -9,6 +9,7 @@ import ( "encoding/json" "fmt" "log/slog" + "path" "path/filepath" "strconv" "strings" @@ -29,18 +30,22 @@ var renderProcessScript []byte // first use and supports batch processing of multiple components in a single // mock invocation. type MockProcessor struct { - mu sync.Mutex - runner *mock.Runner - initialized bool - initErr error + mu sync.Mutex + runner *mock.Runner + requiredPackages []string + initialized bool + initErr error } // NewMockProcessor creates a new processor that will lazily initialize // a mock chroot using the given config path. The runner is created eagerly -// but the chroot is only initialized on first use. -func NewMockProcessor(ctx opctx.Ctx, mockConfigPath string) *MockProcessor { +// but the chroot is only initialized on first use. requiredPackages are +// installed inside the chroot on first use; pass nil/empty to skip the +// install step (rely on whatever the buildroot ships by default). +func NewMockProcessor(ctx opctx.Ctx, mockConfigPath string, requiredPackages []string) *MockProcessor { return &MockProcessor{ - runner: mock.NewRunner(ctx, mockConfigPath), + runner: mock.NewRunner(ctx, mockConfigPath), + requiredPackages: append([]string(nil), requiredPackages...), } } @@ -99,7 +104,7 @@ func (p *MockProcessor) initOnce(ctx context.Context) error { return p.initErr } - slog.Info("Initializing mock chroot for rendering") + slog.Info("Initializing mock chroot") p.runner.EnableNetwork() @@ -110,21 +115,18 @@ func (p *MockProcessor) initOnce(ctx context.Context) error { return p.initErr } - // Install rpmautospec (macro expansion), rpmdevtools (spectool), and git - // (required for rpmautospec to read commit history). - // python3-click is required by rpmautospec but not declared as an RPM dependency. - // Ecosystem macro packages (go-srpm-macros, etc.) are already present via - // @buildsys-build → azurelinux-rpm-config. - if err := p.runner.InstallPackages(ctx, []string{"rpmautospec", "rpmdevtools", "git", "python3-click"}); err != nil { - p.initErr = fmt.Errorf("failed to install packages in mock chroot:\n%w", err) - p.initialized = true + if len(p.requiredPackages) > 0 { + if err := p.runner.InstallPackages(ctx, p.requiredPackages); err != nil { + p.initErr = fmt.Errorf("failed to install packages in mock chroot:\n%w", err) + p.initialized = true - return p.initErr + return p.initErr + } } p.initialized = true - slog.Info("Mock chroot ready for rendering") + slog.Info("Mock chroot ready") return nil } @@ -141,9 +143,6 @@ func (p *MockProcessor) BatchProcess( ctx context.Context, events opctx.EventListener, stagingDir string, inputs []ComponentInput, fs opctx.FS, maxWorkers int, ) ([]ComponentMockResult, error) { - p.mu.Lock() - defer p.mu.Unlock() - if len(inputs) == 0 { return nil, nil } @@ -152,37 +151,125 @@ func (p *MockProcessor) BatchProcess( return nil, err } - if err := p.initOnce(ctx); err != nil { - return nil, err + jsonInputs := make([]componentInputJSON, len(inputs)) + for idx, input := range inputs { + jsonInputs[idx] = componentInputJSON(input) + } + + inputsBytes, err := json.Marshal(jsonInputs) + if err != nil { + return nil, fmt.Errorf("marshaling inputs:\n%w", err) } slog.Info("Batch processing components in mock chroot", "count", len(inputs)) - // Write the Python script and inputs manifest to the staging directory. - scriptPath := filepath.Join(stagingDir, "render_process.py") - if err := fileutils.WriteFile(fs, scriptPath, renderProcessScript, fileperms.PublicExecutable); err != nil { - return nil, fmt.Errorf("writing render script:\n%w", err) + const chrootStagingPath = "/tmp/render" + + workers := strconv.Itoa(max(1, maxWorkers)) // 1x CPU; mock work is CPU-bound + + rawResults, err := p.runBatchScript(ctx, events, runBatchScriptOptions{ + Mounts: []batchBindMount{{Host: stagingDir, InChroot: chrootStagingPath}}, + ScratchHost: stagingDir, + ScratchInChroot: chrootStagingPath, + ScriptName: "render_process.py", + ScriptBytes: renderProcessScript, + InputsJSON: inputsBytes, + ResultsName: "results.json", + ScriptArgs: []string{chrootStagingPath, workers}, + ProgressLabel: "Processing specs in mock chroot", + ProgressTotal: int64(len(inputs)), + FS: fs, + }) + if err != nil { + return nil, err } - if err := writeInputsManifest(fs, stagingDir, inputs); err != nil { + return parseBatchJSON(string(rawResults), inputs) +} + +// batchBindMount describes one host-to-chroot bind mount used by runBatchScript. +type batchBindMount struct { + Host string + InChroot string +} + +// runBatchScriptOptions parameterizes a single batch-script invocation. +type runBatchScriptOptions struct { + // Mounts is the full set of host-to-chroot bind mounts to add to the runner. + // The scratch dir must be reachable via one of these mounts (typically the + // first entry), so the script can locate its inputs and write results. + Mounts []batchBindMount + // ScratchHost is the host-side directory where the script, inputs manifest, + // and results file are read and written. + ScratchHost string + // ScratchInChroot is the in-chroot path that maps to ScratchHost. + ScratchInChroot string + // ScriptName is the basename used when writing the embedded Python script + // into ScratchHost (e.g. "render_process.py"). + ScriptName string + ScriptBytes []byte + // InputsJSON is the JSON-encoded inputs manifest, written as + // /inputs.json. + InputsJSON []byte + // ResultsName is the basename of the results file the script is expected + // to write into ScratchHost (e.g. "results.json"). + ResultsName string + // ScriptArgs is appended to the python3 invocation after the script path. + ScriptArgs []string + // ProgressLabel labels the progress event surfaced to the user. + ProgressLabel string + // ProgressTotal is the total used for progress reporting from PROGRESS lines. + ProgressTotal int64 + FS opctx.FS +} + +// runBatchScript executes a batched, parallelizable Python helper inside the +// shared mock chroot. It owns the lock + lazy init, writes the script and +// inputs into the host-side scratch dir, runs the script (which is expected to +// emit "PROGRESS / " lines and write a results file), and +// returns the raw results bytes. +// +// This is the shared scaffolding for BatchProcess (rendering) and +// BatchQuerySpecs (querying). Per-operation concerns (input/result shape, +// embedded script, result parsing) live in the callers. +// + +func (p *MockProcessor) runBatchScript( + ctx context.Context, events opctx.EventListener, opts runBatchScriptOptions, +) ([]byte, error) { + p.mu.Lock() + defer p.mu.Unlock() + + if err := p.initOnce(ctx); err != nil { return nil, err } - // Clone the runner and add a single bind mount for the staging directory. + // Write the Python script and inputs manifest to the scratch directory. + scriptHostPath := filepath.Join(opts.ScratchHost, opts.ScriptName) + if err := fileutils.WriteFile(opts.FS, scriptHostPath, opts.ScriptBytes, fileperms.PublicExecutable); err != nil { + return nil, fmt.Errorf("writing script %#q:\n%w", opts.ScriptName, err) + } + + inputsHostPath := filepath.Join(opts.ScratchHost, "inputs.json") + if err := fileutils.WriteFile(opts.FS, inputsHostPath, opts.InputsJSON, fileperms.PublicFile); err != nil { + return nil, fmt.Errorf("writing inputs manifest:\n%w", err) + } + + // Clone the runner and add the requested bind mounts. // WithUnprivileged drops to the mockbuild user for chroot commands, // matching how mock builds run and avoiding root-owned files in the - // bind-mounted staging directory. This is safe because mock defaults + // bind-mounted scratch directory. This is safe because mock defaults // chrootuid to os.getuid() — the mockbuild user inside the chroot has // the same UID as the host user, so bind-mounted files remain writable. runner := p.runner.Clone() runner.WithUnprivileged() - const chrootStagingPath = "/tmp/render" - runner.AddBindMount(stagingDir, chrootStagingPath) + for _, mount := range opts.Mounts { + runner.AddBindMount(mount.Host, mount.InChroot) + } - chrootScript := filepath.Join(chrootStagingPath, "render_process.py") - workers := strconv.Itoa(max(1, maxWorkers)) // 1x CPU; mock work is CPU-bound - args := []string{"python3", chrootScript, chrootStagingPath, workers} + scriptInChroot := path.Join(opts.ScratchInChroot, opts.ScriptName) + args := append([]string{"python3", scriptInChroot}, opts.ScriptArgs...) cmd, err := runner.CmdInChroot(ctx, args, false) if err != nil { @@ -193,19 +280,17 @@ func (p *MockProcessor) BatchProcess( // The script prints "PROGRESS / " to stderr, but // mock --chroot merges the inner command's stderr into stdout, so we // listen on stdout. - mockProgress := events.StartEvent("Processing specs in mock chroot", "count", len(inputs)) - mockProgress.SetLongRunning("Processing specs in mock chroot") - - defer mockProgress.End() + progress := events.StartEvent(opts.ProgressLabel, "count", opts.ProgressTotal) + progress.SetLongRunning(opts.ProgressLabel) - total := int64(len(inputs)) + defer progress.End() if listenerErr := cmd.SetRealTimeStdoutListener(func(_ context.Context, line string) { // Parse "PROGRESS / " lines. if after, found := strings.CutPrefix(line, "PROGRESS "); found { if slashIdx := strings.Index(after, "/"); slashIdx > 0 { if completed, parseErr := strconv.ParseInt(after[:slashIdx], 10, 64); parseErr == nil { - mockProgress.SetProgress(completed, total) + progress.SetProgress(completed, opts.ProgressTotal) } } } @@ -222,14 +307,14 @@ func (p *MockProcessor) BatchProcess( // Read results from the file written by the Python script. // Using a file avoids bufio.Scanner token size limits that would truncate // large JSON payloads when capturing stdout (e.g., 7k components ≈ 560KB). - resultsPath := filepath.Join(stagingDir, "results.json") + resultsHostPath := filepath.Join(opts.ScratchHost, opts.ResultsName) - resultsData, readErr := fileutils.ReadFile(fs, resultsPath) + resultsData, readErr := fileutils.ReadFile(opts.FS, resultsHostPath) if readErr != nil { - return nil, fmt.Errorf("reading batch results from %#q:\n%w", resultsPath, readErr) + return nil, fmt.Errorf("reading batch results from %#q:\n%w", resultsHostPath, readErr) } - return parseBatchJSON(string(resultsData), inputs) + return resultsData, nil } // componentInputJSON is the JSON-serializable form written to inputs.json. @@ -284,27 +369,6 @@ func parseBatchJSON(stdout string, inputs []ComponentInput) ([]ComponentMockResu return results, nil } -// writeInputsManifest writes the inputs.json manifest to the staging directory -// so it can be read by the Python script inside the mock chroot. -func writeInputsManifest(fs opctx.FS, stagingDir string, inputs []ComponentInput) error { - jsonInputs := make([]componentInputJSON, len(inputs)) - for idx, input := range inputs { - jsonInputs[idx] = componentInputJSON(input) - } - - data, err := json.Marshal(jsonInputs) - if err != nil { - return fmt.Errorf("marshaling inputs:\n%w", err) - } - - inputsPath := filepath.Join(stagingDir, "inputs.json") - if err := fileutils.WriteFile(fs, inputsPath, data, fileperms.PublicFile); err != nil { - return fmt.Errorf("writing inputs manifest:\n%w", err) - } - - return nil -} - // Destroy cleans up the mock chroot. Should be called when rendering is complete. // The processor must not be reused after Destroy — create a new MockProcessor if needed. // Attempts cleanup even if initialization partially failed (e.g., InitRoot succeeded diff --git a/internal/app/azldev/core/sources/query_process.py b/internal/app/azldev/core/sources/query_process.py new file mode 100644 index 00000000..33d0849b --- /dev/null +++ b/internal/app/azldev/core/sources/query_process.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Query RPM specs inside a mock chroot: run rpmspec twice per component (once +with --srpm for source NEVR, once without for binary subpackage names) and +write per-component results to a JSON file in the scratch directory. + +This script is embedded in the azldev Go binary and executed inside a mock chroot +during ``azldev component query``. It mirrors render_process.py's shape (a +ThreadPoolExecutor over per-component work, PROGRESS lines on stderr, a +results.json file in the scratch dir) so the Go-side plumbing can be shared. + +Usage:: + + python3 query_process.py + +The scratch directory must contain an ``inputs.json`` file:: + + [ + { + "name": "curl", + "specRelPath": "c/curl/curl.spec", + "srpmQueryFormat": "name=%{name}\\n...", + "subpackagesQueryFormat": "subpkg=%{name}\\n", + "with": ["foo"], + "without": ["bar"], + "defines": {"_sourcedir": "/some/path"} + }, + ... + ] + +Results are written to ``/results.json``:: + + [ + {"name": "curl", "srpmOut": "name=curl\\n...", "binOut": "subpkg=curl\\n...", "error": null}, + {"name": "broken", "srpmOut": "", "binOut": "", "error": "rpmspec --srpm failed: ..."} + ] + +Progress is reported to stderr as ``PROGRESS / ``. +""" + +import json +import os +import subprocess +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed + + +def _rpmspec_args(spec_path, query_format, srpm, with_, without, defines): + """Compose an rpmspec command line. + + Always overrides _sourcedir and _specdir to the spec's own directory so + that sidecar files (e.g. `Source1: foo.azl.macros`) loaded with + `%{SOURCEN}` or `%{load:...}` resolve against the rendered spec tree + rather than mock's default /builddir/build/SOURCES. Also sets + `with_check 0` to match the legacy per-component rpmspec path. + + `_ghc_version_cache` short-circuits `%ghc_version` in ghc-rpm-macros, + which would otherwise run `ghc --numeric-version`. We don't install the + ghc compiler in the query chroot, so the lookup would fail with + "command not found", producing parse errors like: + error: line N: Version required: Requires: ghc-compiler = + We set `_ghc_version_cache` rather than the higher-priority + `ghc_version_override` because some specs (notably ghc.spec itself) + redefine `ghc_version_override` via `%global`; command-line -D macros + are sticky and would block those overrides. `_ghc_version_cache` is + consulted after `ghc_version_override` inside the macro, so any spec + setting the latter still wins, and we only intercept the shell-out + path that's broken for us. The exact value only feeds Requires/Provides + version tags; subpackage names don't depend on it, so a placeholder is + fine for our purpose. + + User-provided defines win on the rpmspec side (rpmspec honors the last + -D for a given macro), so we list ours first. + """ + spec_dir = os.path.dirname(spec_path) + args = ["rpmspec", "-q"] + if srpm: + args.append("--srpm") + args += ["--queryformat", query_format] + args += ["-D", f"_sourcedir {spec_dir}"] + args += ["-D", f"_specdir {spec_dir}"] + args += ["-D", "with_check 0"] + args += ["-D", "_ghc_version_cache 0.0.0"] + for w in with_: + args += ["--with", w] + for w in without: + args += ["--without", w] + for key, value in defines.items(): + args += ["-D", f"{key} {value}"] + args.append(spec_path) + return args + + +# Per-spec rewrites that work around quirks no -D override can fix. +# +# Each entry maps a spec basename to a list of (find, replace) tuples +# applied to the spec text before rpmspec is invoked. The rewrite happens +# on a scratch copy in the scratch dir; the original file in the rendered +# specs tree is never modified. +_SPEC_REWRITES = { + "ghc.spec": [ + # ghc.spec %undefines _ghcdynlibdir (line ~475) which defeats any + # -D _ghcdynlibdir override. The %post/%postun scriptlets that + # depend on it are then emitted inside `%if "%{?_ghcdynlibdir}" != + # "%_libdir"` and break rpmspec parsing with "package ghc-base does + # not exist" when ghc-rpm-macros is loaded but the ghc compiler + # isn't installed in our query chroot. We comment these scriptlets + # out — they don't affect subpackage enumeration. + ("%post base -p /sbin/ldconfig", "# patched-out-for-azldev-query: %post base"), + ("%postun base -p /sbin/ldconfig", "# patched-out-for-azldev-query: %postun base"), + ], +} + + +def _maybe_rewrite_spec(spec_path, scratch_dir, comp_name): + """If spec_path needs known patches to parse under rpmspec, write a + rewritten copy into scratch_dir and return its path. Otherwise return + spec_path unchanged. + """ + rewrites = _SPEC_REWRITES.get(os.path.basename(spec_path)) + if not rewrites: + return spec_path + + with open(spec_path) as src: + content = src.read() + + for find, replace in rewrites: + content = content.replace(find, replace) + + out_path = os.path.join(scratch_dir, f"{comp_name}.patched.spec") + with open(out_path, "w") as dst: + dst.write(content) + + return out_path + + +def _run_rpmspec(args): + """Run rpmspec and return (stdout, stderr, returncode).""" + proc = subprocess.run(args, capture_output=True, text=True) + return proc.stdout, proc.stderr, proc.returncode + + +def process_component(specs_dir, scratch_dir, comp): + """Run rpmspec --srpm + rpmspec (no --srpm) for one component. + + Trust boundary: comp["name"] and comp["specRelPath"] are validated by + BatchQuerySpecs in mockprocessor.go before this script is invoked. + """ + name = comp["name"] + spec_path = os.path.join(specs_dir, comp["specRelPath"]) + with_ = comp.get("with", []) or [] + without = comp.get("without", []) or [] + defines = comp.get("defines", {}) or {} + + if not os.path.isfile(spec_path): + return { + "name": name, + "srpmOut": "", + "binOut": "", + "error": f"spec file not found: {comp['specRelPath']}", + } + + # Apply per-spec rewrites (e.g. ghc.spec) to a scratch copy if needed. + # _sourcedir/_specdir stay pinned to the original spec's directory via + # _rpmspec_args, so sidecar files still resolve correctly. + effective_spec = _maybe_rewrite_spec(spec_path, scratch_dir, name) + + # Source-level query (--srpm). + srpm_args = _rpmspec_args( + effective_spec, comp["srpmQueryFormat"], True, with_, without, defines + ) + srpm_out, srpm_err, srpm_rc = _run_rpmspec(srpm_args) + if srpm_rc != 0: + return { + "name": name, + "srpmOut": srpm_out, + "binOut": "", + "error": f"rpmspec --srpm failed: {srpm_err.strip()}", + } + + # Binary subpackage enumeration (no --srpm). + # + # `--builtrpms` (vs the default `--rpms`) restricts the listing to binary + # packages that *would actually be built*, i.e. those with a `%files` + # section. This matters for specs like `wayland` whose main package has + # no `%files` and produces no binary RPM — only its subpackages + # (libwayland-client, etc.) do. Using `--builtrpms` makes the output a + # ground-truth list of the binary RPMs the spec would produce. + bin_args = _rpmspec_args( + effective_spec, + comp["subpackagesQueryFormat"], + False, + with_, + without, + defines, + ) + # Insert --builtrpms right after `-q` so it associates with the query. + bin_args.insert(2, "--builtrpms") + bin_out, bin_err, bin_rc = _run_rpmspec(bin_args) + if bin_rc != 0: + return { + "name": name, + "srpmOut": srpm_out, + "binOut": bin_out, + "error": f"rpmspec failed: {bin_err.strip()}", + } + + return { + "name": name, + "srpmOut": srpm_out, + "binOut": bin_out, + "error": None, + } + + +def main() -> int: + if len(sys.argv) != 4: + print( + f"usage: {sys.argv[0]} ", + file=sys.stderr, + ) + return 1 + + scratch_dir = sys.argv[1] + specs_dir = sys.argv[2] + max_workers = int(sys.argv[3]) + inputs_path = os.path.join(scratch_dir, "inputs.json") + + with open(inputs_path) as f: + inputs = json.load(f) + + total = len(inputs) + + with ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = { + pool.submit(process_component, specs_dir, scratch_dir, comp): comp["name"] + for comp in inputs + } + + # Report progress to stderr as each component completes. + # Note: mock --chroot merges the inner command's stderr into stdout, + # so the Go caller uses SetRealTimeStdoutListener to receive these. + completed_results = {} + for idx, future in enumerate(as_completed(futures), 1): + name = futures[future] + try: + completed_results[name] = future.result() + except Exception as exc: + completed_results[name] = { + "name": name, + "srpmOut": "", + "binOut": "", + "error": str(exc), + } + + print(f"PROGRESS {idx}/{total} {name}", file=sys.stderr, flush=True) + + # Collect results in input order (as_completed returns in completion order). + results = [completed_results[comp["name"]] for comp in inputs] + + results_path = os.path.join(scratch_dir, "results.json") + with open(results_path, "w") as results_file: + json.dump(results, results_file) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/internal/app/azldev/core/sources/specquery.go b/internal/app/azldev/core/sources/specquery.go new file mode 100644 index 00000000..d139adae --- /dev/null +++ b/internal/app/azldev/core/sources/specquery.go @@ -0,0 +1,237 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "context" + _ "embed" + "encoding/json" + "errors" + "fmt" + "log/slog" + "path/filepath" + "strconv" + "strings" + + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/rpm" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" +) + +//go:embed query_process.py +var queryProcessScript []byte + +// SpecQueryInput describes a single rendered spec to query in the mock chroot. +// SpecRelPath is the path of the .spec relative to the specs directory bind +// mounted into the chroot (e.g. "c/curl/curl.spec"). +type SpecQueryInput struct { + Name string + SpecRelPath string + With []string + Without []string + Defines map[string]string +} + +// SpecQueryResult holds the batch-query result for one spec. +// Info is populated when Error is nil, and includes Subpackages. +type SpecQueryResult struct { + Name string + Info *rpm.SpecInfo + Error error +} + +// validateSpecQueryInputs rejects empty names, path-traversal in spec +// relative paths, absolute spec paths, and duplicate component names. +func validateSpecQueryInputs(inputs []SpecQueryInput) error { + seen := make(map[string]bool, len(inputs)) + + for _, input := range inputs { + if err := fileutils.ValidateFilename(input.Name); err != nil { + return fmt.Errorf("invalid component name %#q:\n%w", input.Name, err) + } + + if err := validateSpecRelPath(input.SpecRelPath); err != nil { + return fmt.Errorf("invalid spec path %#q for component %#q:\n%w", + input.SpecRelPath, input.Name, err) + } + + if seen[input.Name] { + return fmt.Errorf("duplicate component name %#q", input.Name) + } + + seen[input.Name] = true + } + + return nil +} + +// validateSpecRelPath rejects spec relative paths that could escape the +// specs-dir bind mount or contain control characters. +func validateSpecRelPath(relPath string) error { + if relPath == "" { + return errors.New("spec relative path cannot be empty") + } + + if filepath.IsAbs(relPath) { + return fmt.Errorf("spec path %#q must be relative", relPath) + } + + cleaned := filepath.Clean(relPath) + if cleaned != relPath { + return fmt.Errorf("spec path %#q must be in canonical form", relPath) + } + + if strings.Contains(cleaned, "..") { + return fmt.Errorf("spec path %#q must not contain path traversal", relPath) + } + + if strings.ContainsRune(relPath, 0) { + return fmt.Errorf("spec path %#q must not contain null bytes", relPath) + } + + return nil +} + +// specQueryInputJSON is the JSON-serializable form of [SpecQueryInput] +// written into inputs.json for the embedded Python helper. +type specQueryInputJSON struct { + Name string `json:"name"` + SpecRelPath string `json:"specRelPath"` + SrpmQueryFormat string `json:"srpmQueryFormat"` + SubpackagesQueryFormat string `json:"subpackagesQueryFormat"` + With []string `json:"with,omitempty"` + Without []string `json:"without,omitempty"` + Defines map[string]string `json:"defines,omitempty"` +} + +// specQueryResultJSON mirrors the per-component JSON shape written by +// query_process.py. +type specQueryResultJSON struct { + Name string `json:"name"` + SrpmOut string `json:"srpmOut"` + BinOut string `json:"binOut"` + Error *string `json:"error"` +} + +// BatchQuerySpecs runs `rpmspec` against multiple rendered spec files inside +// the shared mock chroot, parallelizing the per-spec invocations via an +// embedded Python helper. Returns one [SpecQueryResult] per input, in input +// order. +// +// specsDir is the host directory containing the rendered specs tree (i.e. +// the project's rendered-specs-dir). Each input's SpecRelPath is resolved +// relative to specsDir. scratchDir is a small host-side scratch directory +// used to ferry the script + inputs.json + results.json in and out of the +// chroot; it must be writable by the user the chroot runs as (mock's +// chrootuid defaults to os.getuid()). +func (p *MockProcessor) BatchQuerySpecs( + ctx context.Context, events opctx.EventListener, + specsDir, scratchDir string, + inputs []SpecQueryInput, + fs opctx.FS, maxWorkers int, +) ([]SpecQueryResult, error) { + if len(inputs) == 0 { + return nil, nil + } + + if err := validateSpecQueryInputs(inputs); err != nil { + return nil, err + } + + jsonInputs := make([]specQueryInputJSON, len(inputs)) + for idx, input := range inputs { + jsonInputs[idx] = specQueryInputJSON{ + Name: input.Name, + SpecRelPath: input.SpecRelPath, + SrpmQueryFormat: rpm.SrpmQueryFormat, + SubpackagesQueryFormat: rpm.SubpackagesQueryFormat, + With: input.With, + Without: input.Without, + Defines: input.Defines, + } + } + + inputsBytes, err := json.Marshal(jsonInputs) + if err != nil { + return nil, fmt.Errorf("marshaling spec query inputs:\n%w", err) + } + + slog.Info("Batch-querying rendered specs in mock chroot", "count", len(inputs)) + + const ( + chrootScratchPath = "/tmp/query" + chrootSpecsPath = "/tmp/specs" + ) + + workers := strconv.Itoa(max(1, maxWorkers)) + + rawResults, err := p.runBatchScript(ctx, events, runBatchScriptOptions{ + Mounts: []batchBindMount{ + {Host: scratchDir, InChroot: chrootScratchPath}, + {Host: specsDir, InChroot: chrootSpecsPath}, + }, + ScratchHost: scratchDir, + ScratchInChroot: chrootScratchPath, + ScriptName: "query_process.py", + ScriptBytes: queryProcessScript, + InputsJSON: inputsBytes, + ResultsName: "results.json", + ScriptArgs: []string{chrootScratchPath, chrootSpecsPath, workers}, + ProgressLabel: "Querying specs in mock chroot", + ProgressTotal: int64(len(inputs)), + FS: fs, + }) + if err != nil { + return nil, err + } + + return parseSpecQueryBatchJSON(rawResults, inputs) +} + +// parseSpecQueryBatchJSON parses the JSON array produced by query_process.py +// into [SpecQueryResult] values. Per-component rpmspec failures are surfaced +// as a non-nil Error on the result; parse failures of an otherwise-successful +// rpmspec invocation are likewise surfaced per component. +func parseSpecQueryBatchJSON(raw []byte, inputs []SpecQueryInput) ([]SpecQueryResult, error) { + var jsonResults []specQueryResultJSON + if err := json.Unmarshal(raw, &jsonResults); err != nil { + return nil, fmt.Errorf("parsing spec query batch results JSON:\n%w", err) + } + + resultMap := make(map[string]*specQueryResultJSON, len(jsonResults)) + for idx := range jsonResults { + resultMap[jsonResults[idx].Name] = &jsonResults[idx] + } + + results := make([]SpecQueryResult, len(inputs)) + + for idx, input := range inputs { + results[idx].Name = input.Name + + compResult, ok := resultMap[input.Name] + if !ok { + results[idx].Error = fmt.Errorf("no result returned for %#q", input.Name) + + continue + } + + if compResult.Error != nil { + results[idx].Error = fmt.Errorf("%s", *compResult.Error) + + continue + } + + info, parseErr := rpm.ParseSrpmQueryOutput(input.SpecRelPath, compResult.SrpmOut) + if parseErr != nil { + results[idx].Error = fmt.Errorf("parsing rpmspec --srpm output:\n%w", parseErr) + + continue + } + + info.Subpackages = rpm.ParseSubpackagesOutput(compResult.BinOut) + results[idx].Info = info + } + + return results, nil +} diff --git a/internal/app/azldev/core/sources/specquery_test.go b/internal/app/azldev/core/sources/specquery_test.go new file mode 100644 index 00000000..d3853269 --- /dev/null +++ b/internal/app/azldev/core/sources/specquery_test.go @@ -0,0 +1,193 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//nolint:testpackage // Testing unexported parseSpecQueryBatchJSON. +package sources + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseSpecQueryBatchJSON_Success(t *testing.T) { + t.Parallel() + + raw := []byte(`[{ + "name": "curl", + "srpmOut": "name=curl\nepoch=(none)\nversion=8.5.0\nrelease=1.azl3\n", + "binOut": "subpkg=curl\nsubpkg=libcurl\nsubpkg=curl-devel\n", + "error": null + }]`) + + inputs := []SpecQueryInput{{Name: "curl", SpecRelPath: "c/curl/curl.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.NoError(t, results[0].Error) + require.NotNil(t, results[0].Info) + + assert.Equal(t, "curl", results[0].Info.Name) + assert.Equal(t, "8.5.0", results[0].Info.Version.Version()) + assert.Equal(t, "1.azl3", results[0].Info.Version.Release()) + assert.Equal(t, []string{"curl", "libcurl", "curl-devel"}, results[0].Info.Subpackages) +} + +func TestParseSpecQueryBatchJSON_PerComponentError(t *testing.T) { + t.Parallel() + + raw := []byte(`[ + {"name":"broken","srpmOut":"","binOut":"","error":"rpmspec --srpm failed: bad spec"} + ]`) + + inputs := []SpecQueryInput{{Name: "broken", SpecRelPath: "b/broken/broken.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.Error(t, results[0].Error) + assert.Contains(t, results[0].Error.Error(), "rpmspec --srpm failed") + assert.Nil(t, results[0].Info) +} + +func TestParseSpecQueryBatchJSON_MissingComponent(t *testing.T) { + t.Parallel() + + raw := []byte(`[]`) + inputs := []SpecQueryInput{{Name: "ghost", SpecRelPath: "g/ghost/ghost.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.Error(t, results[0].Error) + assert.Contains(t, results[0].Error.Error(), "no result returned") +} + +func TestParseSpecQueryBatchJSON_SrpmParseFailure(t *testing.T) { + t.Parallel() + + // srpmOut is missing required fields, so the per-component parser fails. + raw := []byte(`[{ + "name": "weird", + "srpmOut": "name=weird\n", + "binOut": "subpkg=weird\n", + "error": null + }]`) + + inputs := []SpecQueryInput{{Name: "weird", SpecRelPath: "w/weird/weird.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.Error(t, results[0].Error) + assert.Contains(t, results[0].Error.Error(), "parsing rpmspec --srpm output") + assert.Nil(t, results[0].Info) +} + +func TestParseSpecQueryBatchJSON_MultipleComponents(t *testing.T) { + t.Parallel() + + raw := []byte(`[ + {"name":"good","srpmOut":"name=good\nepoch=0\nversion=1.0\nrelease=1\n","binOut":"subpkg=good\n","error":null}, + {"name":"bad","srpmOut":"","binOut":"","error":"rpmspec failed: boom"} + ]`) + + inputs := []SpecQueryInput{ + {Name: "good", SpecRelPath: "g/good/good.spec"}, + {Name: "bad", SpecRelPath: "b/bad/bad.spec"}, + } + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 2) + require.NoError(t, results[0].Error) + require.NotNil(t, results[0].Info) + assert.Equal(t, []string{"good"}, results[0].Info.Subpackages) + require.Error(t, results[1].Error) + assert.Contains(t, results[1].Error.Error(), "boom") +} + +func TestParseSpecQueryBatchJSON_InvalidJSON(t *testing.T) { + t.Parallel() + + inputs := []SpecQueryInput{{Name: "any", SpecRelPath: "a/any/any.spec"}} + + _, err := parseSpecQueryBatchJSON([]byte("not json{{{"), inputs) + require.Error(t, err) + assert.Contains(t, err.Error(), "parsing spec query batch results JSON") +} + +func TestValidateSpecQueryInputs(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + inputs []SpecQueryInput + wantErr bool + errMsg string + }{ + { + name: "valid", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "c/curl/curl.spec"}}, + }, + { + name: "empty name", + inputs: []SpecQueryInput{{Name: "", SpecRelPath: "c/curl/curl.spec"}}, + wantErr: true, errMsg: "invalid component name", + }, + { + name: "slash in name", + inputs: []SpecQueryInput{{Name: "c/curl", SpecRelPath: "c/curl/curl.spec"}}, + wantErr: true, errMsg: "invalid component name", + }, + { + name: "empty rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: ""}}, + wantErr: true, errMsg: "spec relative path cannot be empty", + }, + { + name: "absolute rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "/c/curl/curl.spec"}}, + wantErr: true, errMsg: "must be relative", + }, + { + name: "traversal in rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "c/curl/../../etc/passwd"}}, + wantErr: true, errMsg: "must be in canonical form", + }, + { + name: "canonical traversal in rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "../etc/passwd"}}, + wantErr: true, errMsg: "must not contain path traversal", + }, + { + name: "non-canonical rel path", + inputs: []SpecQueryInput{{Name: "curl", SpecRelPath: "c//curl/curl.spec"}}, + wantErr: true, errMsg: "must be in canonical form", + }, + { + name: "duplicate name", + inputs: []SpecQueryInput{ + {Name: "curl", SpecRelPath: "c/curl/curl.spec"}, + {Name: "curl", SpecRelPath: "c/curl/curl.spec"}, + }, + wantErr: true, errMsg: "duplicate component name", + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + err := validateSpecQueryInputs(testCase.inputs) + if testCase.wantErr { + require.Error(t, err) + assert.Contains(t, err.Error(), testCase.errMsg) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/internal/rpm/specquery.go b/internal/rpm/specquery.go index 4fda0017..f4766fe5 100644 --- a/internal/rpm/specquery.go +++ b/internal/rpm/specquery.go @@ -26,6 +26,10 @@ type SpecInfo struct { Name string Version Version RequiredFiles []string + // Subpackages lists the binary package names the spec produces, in the + // order rpmspec reports them. Empty when not queried (e.g., the + // per-component buildenv path that only requests --srpm output). + Subpackages []string } // NewSpecQuerier constructs a new [SpecQuerier] instance that will use the provided [buildenv.BuildEnv] @@ -105,7 +109,7 @@ func (q *SpecQuerier) composeRpmspecCmdline(specPath string) (result []string) { "-D", "_specdir " + specDirPath, "-D", "with_check 0", "--queryformat", - "name=%{name}\nepoch=%{epoch}\nversion=%{version}\nrelease=%{release}\n[source=%{SOURCE}\n][patch=%{PATCH}\n]", + SrpmQueryFormat, } for _, name := range q.buildOptions.With { @@ -125,6 +129,59 @@ func (q *SpecQuerier) composeRpmspecCmdline(specPath string) (result []string) { return result } +// Constants for the rpmspec queryformat strings used by both the per-component +// (legacy) path and the batched query path. Exported so the batched path in +// [github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources] +// can build the same command lines without duplicating the format strings. +const ( + // SrpmQueryFormat extracts the SRPM-level NEVR plus the source/patch files + // referenced by the spec. Used with rpmspec -q --srpm. + SrpmQueryFormat = "name=%{name}\n" + + "epoch=%{epoch}\n" + + "version=%{version}\n" + + "release=%{release}\n" + + "[source=%{SOURCE}\n]" + + "[patch=%{PATCH}\n]" + + // SubpackagesQueryFormat enumerates the binary subpackage names a spec + // would produce, one per line. Used with rpmspec -q (no --srpm). + SubpackagesQueryFormat = "subpkg=%{name}\n" +) + +// ParseSrpmQueryOutput parses the stdout of `rpmspec -q --srpm --queryformat +// SrpmQueryFormat ` and returns the populated [SpecInfo] (without +// Subpackages). Exposed so batched callers can reuse the parsing logic. +func ParseSrpmQueryOutput(specPath, output string) (*SpecInfo, error) { + return parseRpmspecOutput(specPath, output) +} + +// ParseSubpackagesOutput parses the stdout of `rpmspec -q --queryformat +// SubpackagesQueryFormat ` into the list of binary subpackage names in +// the order rpmspec emitted them. Whitespace-only lines, blank prefixes, and +// non-`subpkg=` lines (warnings, debug noise) are silently skipped. +func ParseSubpackagesOutput(output string) []string { + var result []string + + for _, line := range strings.Split(output, "\n") { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + + if strings.HasPrefix(trimmed, "error: ") || strings.HasPrefix(trimmed, "warning: ") { + slog.Debug("Ignoring rpmspec error", "line", trimmed) + + continue + } + + if after, ok := strings.CutPrefix(trimmed, "subpkg="); ok && after != "" { + result = append(result, after) + } + } + + return result +} + //nolint:cyclop // This function's complexity is due to the if/else-if cases for parsing. func parseRpmspecOutput(specPath, output string) (specInfo *SpecInfo, err error) { var name, epoch, version, release string diff --git a/internal/rpm/specquery_test.go b/internal/rpm/specquery_test.go index 66532af5..5dedaaea 100644 --- a/internal/rpm/specquery_test.go +++ b/internal/rpm/specquery_test.go @@ -421,3 +421,79 @@ func requireNewVersion(t *testing.T, versionStr string) rpm.Version { return *version } + +func TestParseSubpackagesOutput(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + output string + want []string + }{ + { + name: "empty output", + output: "", + want: nil, + }, + { + name: "single subpackage", + output: "subpkg=curl\n", + want: []string{"curl"}, + }, + { + name: "multiple subpackages with whitespace", + output: "subpkg=curl\nsubpkg=libcurl\n\nsubpkg=curl-devel\n", + want: []string{"curl", "libcurl", "curl-devel"}, + }, + { + name: "ignores warnings and errors", + output: "warning: some macro thing\nerror: another\nsubpkg=foo\n", + want: []string{"foo"}, + }, + { + name: "ignores unknown lines", + output: "garbage line\nsubpkg=foo\nother=bar\n", + want: []string{"foo"}, + }, + { + name: "skips empty values", + output: "subpkg=\nsubpkg=valid\n", + want: []string{"valid"}, + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + got := rpm.ParseSubpackagesOutput(testCase.output) + assert.Equal(t, testCase.want, got) + }) + } +} + +func TestParseSrpmQueryOutput_Success(t *testing.T) { + t.Parallel() + + output := "name=curl\nepoch=(none)\nversion=8.5.0\nrelease=1.azl3\n" + + "source=https://example.com/curl-8.5.0.tar.xz\npatch=fix.patch\n" + + info, err := rpm.ParseSrpmQueryOutput("/specs/c/curl/curl.spec", output) + require.NoError(t, err) + assert.Equal(t, "curl", info.Name) + assert.Equal(t, "8.5.0", info.Version.Version()) + assert.Equal(t, "1.azl3", info.Version.Release()) + assert.Equal(t, []string{"https://example.com/curl-8.5.0.tar.xz", "fix.patch"}, info.RequiredFiles) + assert.Empty(t, info.Subpackages, "Subpackages is populated by the caller, not the parser") +} + +func TestParseSrpmQueryOutput_MissingField(t *testing.T) { + t.Parallel() + + // Missing release line. + output := "name=curl\nepoch=0\nversion=8.5.0\n" + + _, err := rpm.ParseSrpmQueryOutput("/specs/c/curl/curl.spec", output) + require.Error(t, err) + assert.Contains(t, err.Error(), "missing required fields") +} diff --git a/scenario/component_query_test.go b/scenario/component_query_test.go index b8340d32..68240ba5 100644 --- a/scenario/component_query_test.go +++ b/scenario/component_query_test.go @@ -13,7 +13,8 @@ import ( "github.com/stretchr/testify/require" ) -// We test running `azldev query component` to make sure that spec parsing works as expected. +// We test running `azldev component query` to make sure that batch rpmspec +// processing against the rendered specs tree works as expected. func TestQueryingAComponent(t *testing.T) { t.Parallel() @@ -22,23 +23,28 @@ func TestQueryingAComponent(t *testing.T) { t.Skip("skipping long test") } - // Create a simple spec with a known name and version. + // Create a simple spec with a known name and version. Add a subpackage + // so we can also verify that 'query' reports the binary subpackages. spec := projecttest.NewSpec( projecttest.WithName("test-component"), projecttest.WithVersion("3.1.4.159"), + projecttest.WithSubpackage("extra"), ) - // Create a simple project with the spec, using test default configs for distro and mock configurations. + // Create a simple project with the spec, using test default configs for + // distro and mock configurations. project := projecttest.NewDynamicTestProject( projecttest.AddSpec(spec), projecttest.UseTestDefaultConfigs(), ) - // Run the component query command with test default configs copied into the container. + // 'component query' now reads from the rendered specs tree, so render + // first as a pre-command and then query. results := projecttest.NewProjectTest( project, []string{"component", "query", spec.GetName()}, projecttest.WithTestDefaultConfigs(), + projecttest.WithPreCommand("component", "render", "-a"), ).RunInContainer(t) // Get the parsed JSON output. @@ -61,4 +67,22 @@ func TestQueryingAComponent(t *testing.T) { require.True(t, ok, "Version field is not a map") require.Contains(t, versionMap, "Version") assert.Equal(t, spec.GetVersion(), versionMap["Version"]) + + // Check that subpackages were extracted. + require.Contains(t, componentOutput, "Subpackages") + subpackages, ok := componentOutput["Subpackages"].([]interface{}) + require.True(t, ok, "Subpackages should be a list") + + subpkgNames := make([]string, 0, len(subpackages)) + for _, sp := range subpackages { + name, ok := sp.(string) + require.True(t, ok, "Subpackage entry should be a string") + + subpkgNames = append(subpkgNames, name) + } + + assert.Contains(t, subpkgNames, spec.GetName(), + "Subpackages should include the main package") + assert.Contains(t, subpkgNames, spec.GetName()+"-extra", + "Subpackages should include the explicitly-added subpackage") } diff --git a/scenario/internal/projecttest/testspec.go b/scenario/internal/projecttest/testspec.go index 6104beb3..15858b09 100644 --- a/scenario/internal/projecttest/testspec.go +++ b/scenario/internal/projecttest/testspec.go @@ -15,10 +15,11 @@ const NoArch = "noarch" // TestSpec represents an RPM spec being composed for testing purposes. type TestSpec struct { - name string - version string - release string - buildArch string + name string + version string + release string + buildArch string + subpackages []string } // NewSpec creates a new [TestSpec] with the specified options. @@ -81,6 +82,15 @@ func WithBuildArch(arch string) TestSpecOption { } } +// WithSubpackage appends an additional binary subpackage (named +// "-") to the spec. The subpackage shares the main +// package's installed file so that rpmbuild would also be happy with it. +func WithSubpackage(suffix string) TestSpecOption { + return func(s *TestSpec) { + s.subpackages = append(s.subpackages, suffix) + } +} + // Render generates the spec file content as a string. func (s *TestSpec) Render() string { lines := []string{ @@ -100,6 +110,20 @@ func (s *TestSpec) Render() string { "%description", "Test component for, you know, testing.", "", + }...) + + for _, sub := range s.subpackages { + lines = append(lines, []string{ + "%package " + sub, + "Summary: A test subpackage", + "", + "%description " + sub, + "Subpackage " + sub + " for testing.", + "", + }...) + } + + lines = append(lines, []string{ "%build", "echo hello >file.txt", "", @@ -112,5 +136,13 @@ func (s *TestSpec) Render() string { "", }...) + for _, sub := range s.subpackages { + lines = append(lines, []string{ + "%files " + sub, + "%{_datadir}/test-component", + "", + }...) + } + return strings.Join(lines, "\n") } From 090fa3137555eab3980e875978d214218fa8b61f Mon Sep 17 00:00:00 2001 From: Nan Liu Date: Thu, 21 May 2026 22:12:02 +0000 Subject: [PATCH 2/2] feat(component query): add --arch flag with ExclusiveArch/ExcludeArch enforcement Add an --arch flag to 'component query' (default x86_64) that drives rpmspec's --target and enforces ExclusiveArch/ExcludeArch policy via a probe queryformat wrapped around the srpm query. Specs excluded by the selected arch surface as ExcludedFromArch entries and are summarized in a per-run log line. Harden the per-spec query subprocess: - 180s rpmspec timeout with a dedicated _RpmspecTimeout exception - UTF-8 decoding with errors=replace on file IO and subprocess output Plumb the arch parameter through BatchQuerySpecs and add ExcludedFromArch to SpecQueryResult / its JSON mapping. Teach runFuncInternal to render partial results when the inner func returns both a value and an error, so per-component query failures still print the successful rows while exiting non-zero. Add javapackages-common to the mock package list so %javadoc_package (from macros.fjava) doesn't silently disappear. Bump scenario AZL 4.0 upstream + mock releasever to Fedora 43; java-srpm-macros is missing from f42-build koji. Regenerate CLI docs. --- docs/user/reference/cli/azldev.md | 2 +- docs/user/reference/cli/azldev_advanced.md | 2 +- docs/user/reference/cli/azldev_completion.md | 2 +- docs/user/reference/cli/azldev_component.md | 2 +- .../reference/cli/azldev_component_query.md | 1 + docs/user/reference/cli/azldev_config.md | 2 +- docs/user/reference/cli/azldev_docs.md | 2 +- docs/user/reference/cli/azldev_image.md | 2 +- docs/user/reference/cli/azldev_package.md | 2 +- docs/user/reference/cli/azldev_project.md | 2 +- docs/user/reference/cli/azldev_version.md | 2 +- .../app/azldev/cmds/component/mockproc.go | 27 ++- internal/app/azldev/cmds/component/query.go | 62 +++++- internal/app/azldev/command.go | 27 +++ .../app/azldev/core/sources/query_process.py | 199 ++++++++++++++++-- internal/app/azldev/core/sources/specquery.go | 41 +++- .../app/azldev/core/sources/specquery_test.go | 21 ++ scenario/internal/projecttest/testspec.go | 15 +- .../distros/azl/azl.distro.toml | 2 +- .../distros/azl/mock/azurelinux-4.tpl | 2 +- 20 files changed, 367 insertions(+), 50 deletions(-) diff --git a/docs/user/reference/cli/azldev.md b/docs/user/reference/cli/azldev.md index 54145fed..00c8b4db 100644 --- a/docs/user/reference/cli/azldev.md +++ b/docs/user/reference/cli/azldev.md @@ -2,7 +2,7 @@ ## azldev -🐧 Azure Linux Dev Tool 0.0.0-devel +🐧 Azure Linux Dev Tool ### Synopsis diff --git a/docs/user/reference/cli/azldev_advanced.md b/docs/user/reference/cli/azldev_advanced.md index 51dbd9f4..85dc88d0 100644 --- a/docs/user/reference/cli/azldev_advanced.md +++ b/docs/user/reference/cli/azldev_advanced.md @@ -36,7 +36,7 @@ output but fully supported. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev advanced download-sources](azldev_advanced_download-sources.md) - Download source files listed in a Fedora-format sources file * [azldev advanced mcp](azldev_advanced_mcp.md) - Run in MCP server mode * [azldev advanced mock](azldev_advanced_mock.md) - Run RPM mock tool diff --git a/docs/user/reference/cli/azldev_completion.md b/docs/user/reference/cli/azldev_completion.md index 3afb0763..77e5c161 100644 --- a/docs/user/reference/cli/azldev_completion.md +++ b/docs/user/reference/cli/azldev_completion.md @@ -34,7 +34,7 @@ See each sub-command's help for details on how to use the generated script. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev completion bash](azldev_completion_bash.md) - Generate the autocompletion script for bash * [azldev completion fish](azldev_completion_fish.md) - Generate the autocompletion script for fish * [azldev completion powershell](azldev_completion_powershell.md) - Generate the autocompletion script for powershell diff --git a/docs/user/reference/cli/azldev_component.md b/docs/user/reference/cli/azldev_component.md index be0a70e7..2ba04107 100644 --- a/docs/user/reference/cli/azldev_component.md +++ b/docs/user/reference/cli/azldev_component.md @@ -37,7 +37,7 @@ components defined in the project configuration. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev component add](azldev_component_add.md) - Add component(s) to this project * [azldev component build](azldev_component_build.md) - Build packages for components * [azldev component changed](azldev_component_changed.md) - Detect which components changed between two git refs diff --git a/docs/user/reference/cli/azldev_component_query.md b/docs/user/reference/cli/azldev_component_query.md index 92f2aa1e..688fdc35 100644 --- a/docs/user/reference/cli/azldev_component_query.md +++ b/docs/user/reference/cli/azldev_component_query.md @@ -36,6 +36,7 @@ azldev component query [flags] ``` -a, --all-components Include all components + --arch arch Target architecture passed to rpmspec via --target (x86_64, aarch64). Defaults to x86_64. Specs that ExclusiveArch/ExcludeArch-exclude the target are emitted with only the component name populated rather than as errors. (default x86_64) -p, --component stringArray Component name pattern -g, --component-group stringArray Component group name -h, --help help for query diff --git a/docs/user/reference/cli/azldev_config.md b/docs/user/reference/cli/azldev_config.md index 97254fcf..f162e679 100644 --- a/docs/user/reference/cli/azldev_config.md +++ b/docs/user/reference/cli/azldev_config.md @@ -35,7 +35,7 @@ JSON schema used for validating TOML config files. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev config dump](azldev_config_dump.md) - Dump the current configuration * [azldev config generate-schema](azldev_config_generate-schema.md) - Generates JSON schema for validating .toml config files diff --git a/docs/user/reference/cli/azldev_docs.md b/docs/user/reference/cli/azldev_docs.md index a2e38165..4b82fab1 100644 --- a/docs/user/reference/cli/azldev_docs.md +++ b/docs/user/reference/cli/azldev_docs.md @@ -35,6 +35,6 @@ command tree, suitable for inclusion in the user guide. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev docs markdown](azldev_docs_markdown.md) - Generates Markdown (.md) docs for this tool diff --git a/docs/user/reference/cli/azldev_image.md b/docs/user/reference/cli/azldev_image.md index 6adbfca4..92f12315 100644 --- a/docs/user/reference/cli/azldev_image.md +++ b/docs/user/reference/cli/azldev_image.md @@ -36,7 +36,7 @@ can be customized using Azure Linux Image Customizer. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev image boot](azldev_image_boot.md) - Boot an Azure Linux image in a QEMU VM * [azldev image build](azldev_image_build.md) - Build an image using kiwi-ng * [azldev image customize](azldev_image_customize.md) - Customizes a pre-built Azure Linux image diff --git a/docs/user/reference/cli/azldev_package.md b/docs/user/reference/cli/azldev_package.md index 23ee54c6..d3fefb25 100644 --- a/docs/user/reference/cli/azldev_package.md +++ b/docs/user/reference/cli/azldev_package.md @@ -36,6 +36,6 @@ publish channel assignments derived from package groups and component overrides. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev package list](azldev_package_list.md) - List resolved configuration for packages (RPMs and SRPMs) diff --git a/docs/user/reference/cli/azldev_project.md b/docs/user/reference/cli/azldev_project.md index 6cfccbb0..fed57377 100644 --- a/docs/user/reference/cli/azldev_project.md +++ b/docs/user/reference/cli/azldev_project.md @@ -35,7 +35,7 @@ as an Azure Linux project with a basic configuration. ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev project init](azldev_project_init.md) - Initialize the current working directory with a basic Azure Linux project config * [azldev project new](azldev_project_new.md) - Create a new Azure Linux project with basic config diff --git a/docs/user/reference/cli/azldev_version.md b/docs/user/reference/cli/azldev_version.md index 5397d5a8..f24b9ea1 100644 --- a/docs/user/reference/cli/azldev_version.md +++ b/docs/user/reference/cli/azldev_version.md @@ -40,5 +40,5 @@ azldev version -O json ### SEE ALSO -* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool 0.0.0-devel +* [azldev](azldev.md) - 🐧 Azure Linux Dev Tool diff --git a/internal/app/azldev/cmds/component/mockproc.go b/internal/app/azldev/cmds/component/mockproc.go index bffa9fed..6c83a91f 100644 --- a/internal/app/azldev/cmds/component/mockproc.go +++ b/internal/app/azldev/cmds/component/mockproc.go @@ -41,15 +41,25 @@ func mockPackagesForQuery() []string { // the Azure Linux spec corpus: // * fonts-rpm-macros — %fontfiles, %fontfamily_subpkg, etc. // * pyproject-rpm-macros — %pyproject_extras_subpkg - // * java-srpm-macros, javapackages-tools — %mvn_package, %mvn_install, - // auto -javadoc subpackages, - // jp_minimal bcond default + // * java-srpm-macros, javapackages-tools, javapackages-common — + // %mvn_package, %mvn_install, + // %javadoc_package (auto + // -javadoc subpackages, from + // macros.fjava in + // javapackages-common), + // jp_minimal bcond default. + // javapackages-common is + // normally pulled in via + // javapackages-tools, but we + // install it explicitly so + // %javadoc_package never + // silently disappears. // * ghc-rpm-macros — %ghc_lib_subpackage and ghc_prof/haddock - // bcond defaults. Requires the - // ghc_version_override define set by - // query_process.py to avoid shelling out - // to a `ghc` binary that isn't installed - // in the chroot. + // bcond defaults. Requires + // query_process.py to prime + // _ghc_version_cache so the macros don't + // shell out to a `ghc` binary that isn't + // installed in the chroot. // // We install `java-srpm-macros` (the actual binary RPM) rather than // `java-rpm-macros`, which is the SRPM name; the latter has no @@ -65,6 +75,7 @@ func mockPackagesForQuery() []string { "pyproject-rpm-macros", "java-srpm-macros", "javapackages-tools", + "javapackages-common", "ghc-rpm-macros", } } diff --git a/internal/app/azldev/cmds/component/query.go b/internal/app/azldev/cmds/component/query.go index f02f7c75..e94d5056 100644 --- a/internal/app/azldev/cmds/component/query.go +++ b/internal/app/azldev/cmds/component/query.go @@ -13,8 +13,10 @@ import ( "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/specs" + "github.com/microsoft/azure-linux-dev-tools/internal/rpm" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/qemu" "github.com/spf13/cobra" ) @@ -22,6 +24,12 @@ import ( type QueryComponentsOptions struct { // Standard filter for selecting components. ComponentFilter components.ComponentFilter + + // Target architecture passed to rpmspec via --target. Defaults to + // x86_64. Drives ExclusiveArch/ExcludeArch evaluation; specs that + // exclude the target are emitted with only SpecInfo.Name populated + // (no Version/Subpackages) rather than as errors. + Arch qemu.Arch } func queryOnAppInit(_ *azldev.App, parentCmd *cobra.Command) { @@ -30,7 +38,9 @@ func queryOnAppInit(_ *azldev.App, parentCmd *cobra.Command) { // Constructs a [cobra.Command] for "component query" CLI subcommand. func NewComponentQueryCommand() *cobra.Command { - options := &QueryComponentsOptions{} + options := &QueryComponentsOptions{ + Arch: qemu.Arch(qemu.ArchX86_64), + } cmd := &cobra.Command{ Use: "query", @@ -61,10 +71,25 @@ The rendered-specs-dir must exist on disk; if it doesn't, run components.AddComponentFilterOptionsToCommand(cmd, &options.ComponentFilter) + cmd.Flags().Var(&options.Arch, "arch", + "Target architecture passed to rpmspec via --target (x86_64, aarch64). "+ + "Defaults to x86_64. Specs that ExclusiveArch/ExcludeArch-exclude the "+ + "target are emitted with only the component name populated rather than "+ + "as errors.") + _ = cmd.RegisterFlagCompletionFunc("arch", + func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return qemu.SupportedArchitectures(), cobra.ShellCompDirectiveNoFileComp + }) + return cmd } // componentDetails encapsulates detailed information about a component. +// +// Components that the spec excludes for the requested arch +// (ExclusiveArch/ExcludeArch) are emitted with only the embedded SpecInfo.Name +// populated (Version and Subpackages stay at their zero values); the per-arch +// summary is reported via the excludedCount log line. type componentDetails struct { specs.ComponentSpecDetails } @@ -143,8 +168,11 @@ func QueryComponents( } }() + archStr := options.Arch.String() + queryResults, err := mockProcessor.BatchQuerySpecs( - env, env, renderedSpecsDir, scratchDir, inputs, env.FS(), env.CPUBoundConcurrency(), + env, env, renderedSpecsDir, scratchDir, archStr, + inputs, env.FS(), env.CPUBoundConcurrency(), ) if err != nil { return nil, fmt.Errorf("batch-querying rendered specs:\n%w", err) @@ -152,7 +180,10 @@ func QueryComponents( allDetails := make([]*componentDetails, 0, len(queryResults)) - var failed int + var ( + failed int + excluded int + ) for _, queryResult := range queryResults { if queryResult.Error != nil { @@ -164,6 +195,22 @@ func QueryComponents( continue } + if queryResult.ExcludedFromArch { + // Per-component logging here would flood stderr on cross-arch + // queries (e.g. --arch aarch64 against an x86_64-heavy distro + // excludes thousands of specs); a single summary log is emitted + // below the loop instead. + excluded++ + + allDetails = append(allDetails, &componentDetails{ + ComponentSpecDetails: specs.ComponentSpecDetails{ + SpecInfo: rpm.SpecInfo{Name: queryResult.Name}, + }, + }) + + continue + } + allDetails = append(allDetails, &componentDetails{ ComponentSpecDetails: specs.ComponentSpecDetails{ SpecInfo: *queryResult.Info, @@ -171,7 +218,16 @@ func QueryComponents( }) } + if excluded > 0 { + slog.Info("Some components excluded from arch by spec", + "arch", archStr, "excludedCount", excluded) + } + if failed > 0 { + // Return the successfully-queried components alongside the error so + // runFuncInternal still renders the partial results table. Per-component + // failures are already surfaced via the slog.Warn above; this return + // ensures the process exits non-zero so scripts/CI can detect it. return allDetails, fmt.Errorf("%d component(s) failed to query (see warnings)", failed) } diff --git a/internal/app/azldev/command.go b/internal/app/azldev/command.go index ab40fed0..68182827 100644 --- a/internal/app/azldev/command.go +++ b/internal/app/azldev/command.go @@ -124,6 +124,16 @@ func runFuncInternal(innerFunc CmdWithExtraArgsFuncType, requireConfig bool) cob command.SilenceUsage = false } + // Inner funcs may return partial results alongside an error + // (e.g. some items succeeded, some failed). Render what we have + // before propagating so callers see the partial output and the + // process still exits non-zero. + if results != nil && !isNilValue(results) { + if reportErr := reportResults(env, results); reportErr != nil { + return errors.Join(err, reportErr) + } + } + return err } @@ -131,6 +141,23 @@ func runFuncInternal(innerFunc CmdWithExtraArgsFuncType, requireConfig bool) cob } } +// isNilValue returns true if the interface holds a typed nil (e.g. a nil +// slice, map, or pointer). reportResults' reflectable path panics on typed +// nils, so we guard partial-result rendering against them. +func isNilValue(v interface{}) bool { + if v == nil { + return true + } + + rv := reflect.ValueOf(v) + switch rv.Kind() { //nolint:exhaustive // only nilable kinds matter; others fall through to false + case reflect.Chan, reflect.Func, reflect.Map, reflect.Ptr, reflect.Slice, reflect.Interface: + return rv.IsNil() + default: + return false + } +} + // Helper to retrieve the [Env] from the context of a [cobra.Command]. func GetEnvFromCommand(cmd *cobra.Command) (*Env, error) { ctx := cmd.Context() diff --git a/internal/app/azldev/core/sources/query_process.py b/internal/app/azldev/core/sources/query_process.py index 33d0849b..d4245884 100644 --- a/internal/app/azldev/core/sources/query_process.py +++ b/internal/app/azldev/core/sources/query_process.py @@ -47,7 +47,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed -def _rpmspec_args(spec_path, query_format, srpm, with_, without, defines): +def _rpmspec_args(spec_path, query_format, srpm, with_, without, defines, arch): """Compose an rpmspec command line. Always overrides _sourcedir and _specdir to the spec's own directory so @@ -71,6 +71,11 @@ def _rpmspec_args(spec_path, query_format, srpm, with_, without, defines): version tags; subpackage names don't depend on it, so a placeholder is fine for our purpose. + `arch`, when non-empty, is passed as --target=. This drives the + %_target_cpu macro family inside rpmspec so ExclusiveArch/ExcludeArch + checks and arch-conditional %ifarch blocks evaluate for the requested + target rather than the host arch. + User-provided defines win on the rpmspec side (rpmspec honors the last -D for a given macro), so we list ours first. """ @@ -78,6 +83,8 @@ def _rpmspec_args(spec_path, query_format, srpm, with_, without, defines): args = ["rpmspec", "-q"] if srpm: args.append("--srpm") + if arch: + args.append(f"--target={arch}") args += ["--queryformat", query_format] args += ["-D", f"_sourcedir {spec_dir}"] args += ["-D", f"_specdir {spec_dir}"] @@ -123,30 +130,159 @@ def _maybe_rewrite_spec(spec_path, scratch_dir, comp_name): if not rewrites: return spec_path - with open(spec_path) as src: + with open(spec_path, encoding="utf-8", errors="replace") as src: content = src.read() for find, replace in rewrites: content = content.replace(find, replace) out_path = os.path.join(scratch_dir, f"{comp_name}.patched.spec") - with open(out_path, "w") as dst: + with open(out_path, "w", encoding="utf-8") as dst: dst.write(content) return out_path +# Per-invocation timeout for rpmspec, in seconds. rpmspec on a healthy spec +# completes in well under a second; this generous cap exists only to bound +# pathological cases (recursive macros, macros that shell out and block) so +# one wedged spec can't hang the whole batch. +_RPMSPEC_TIMEOUT_SECONDS = 180 + + +class _RpmspecTimeout(Exception): + """Raised when rpmspec exceeds _RPMSPEC_TIMEOUT_SECONDS.""" + + def _run_rpmspec(args): - """Run rpmspec and return (stdout, stderr, returncode).""" - proc = subprocess.run(args, capture_output=True, text=True) + """Run rpmspec and return (stdout, stderr, returncode). + + Raises _RpmspecTimeout if rpmspec doesn't finish within + _RPMSPEC_TIMEOUT_SECONDS. On timeout, the child process is killed before + re-raising so it doesn't linger inside the mock chroot. + """ + try: + proc = subprocess.run( + args, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + timeout=_RPMSPEC_TIMEOUT_SECONDS, + ) + except subprocess.TimeoutExpired as exc: + # subprocess.run already terminated the child by the time TimeoutExpired + # is raised, but stdout/stderr captured up to the timeout are on the + # exception. + stdout = exc.stdout or "" + stderr = exc.stderr or "" + if isinstance(stdout, bytes): + stdout = stdout.decode("utf-8", errors="replace") + if isinstance(stderr, bytes): + stderr = stderr.decode("utf-8", errors="replace") + raise _RpmspecTimeout( + f"rpmspec timed out after {_RPMSPEC_TIMEOUT_SECONDS}s; " + f"last stderr: {stderr.strip()[-512:]}" + ) from exc return proc.stdout, proc.stderr, proc.returncode -def process_component(specs_dir, scratch_dir, comp): +# rpmspec (unlike rpmbuild) does NOT enforce ExclusiveArch/ExcludeArch on +# its own: both --srpm and --builtrpms queries return rc=0 against a spec +# whose ExclusiveArch excludes the --target arch. To honor those tags we +# read them out of the spec via an extra block wrapped into the srpm +# queryformat and evaluate the policy ourselves before running the binary +# phase. The wrapper uses sentinel lines so we can split the probe data +# back out and hand the caller-supplied portion of srpmOut through clean. +# +# `[%{Tag} ]` queryformat lists each value separated by a space; an empty +# tag yields an empty string, so absent ExclusiveArch/ExcludeArch parses +# as an empty list (== no restriction). +_ARCH_PROBE_BEGIN = "__AZL_ARCH_PROBE_BEGIN__\n" +_ARCH_PROBE_END = "__AZL_ARCH_PROBE_END__\n" +_ARCH_PROBE_FORMAT = ( + _ARCH_PROBE_BEGIN + + "EA=[%{ExclusiveArch} ]\n" + + "XA=[%{ExcludeArch} ]\n" + + _ARCH_PROBE_END +) + + +def _wrap_srpm_format_with_arch_probe(query_format): + """Prepend the arch-probe block to the caller's srpm queryformat.""" + return _ARCH_PROBE_FORMAT + query_format + + +def _split_arch_probe(srpm_out): + """Extract (exclusive_arch_list, exclude_arch_list, cleaned_srpm_out). + + If the probe markers are absent (older callers, malformed output) the + arch lists are empty and srpm_out is returned unchanged. Lowercase the + arch tokens because rpm normalizes arch names that way and our target + arch (qemu.Arch) is always lowercase. + """ + start = srpm_out.find(_ARCH_PROBE_BEGIN) + end = srpm_out.find(_ARCH_PROBE_END) + if start < 0 or end < 0 or end < start: + return [], [], srpm_out + probe = srpm_out[start + len(_ARCH_PROBE_BEGIN):end] + cleaned = srpm_out[:start] + srpm_out[end + len(_ARCH_PROBE_END):] + ea, xa = [], [] + for line in probe.splitlines(): + if line.startswith("EA="): + ea = line[len("EA="):].lower().split() + elif line.startswith("XA="): + xa = line[len("XA="):].lower().split() + return ea, xa, cleaned + + +# rpm canonicalizes a handful of arch aliases before comparing against +# ExclusiveArch/ExcludeArch. Mirror just the pairs that matter for arches +# azldev supports today. +_ARCH_ALIASES = { + "amd64": "x86_64", + "arm64": "aarch64", +} + + +def _canonicalize_arch_token(token): + return _ARCH_ALIASES.get(token, token) + + +def _is_arch_excluded(arch, exclusive_arch, exclude_arch): + """Return True iff target arch is excluded by ExclusiveArch/ExcludeArch. + + `noarch` in ExclusiveArch means "any arch" and never excludes. With an + empty target arch (caller opted out of arch filtering) we never + exclude. + + Spec tokens and the target arch are canonicalized through a small alias + map first (amd64 -> x86_64, arm64 -> aarch64) so specs that spell their + arches the Debian/Ubuntu way aren't silently dropped. We mirror only + the pairs that matter for arches azldev supports + (qemu.SupportedArchitectures: x86_64, aarch64); the ix86 family and + other rpm aliases are intentionally not modeled. + """ + if not arch: + return False + arch = _canonicalize_arch_token(arch.lower()) + exclusive_arch = [_canonicalize_arch_token(a) for a in exclusive_arch] + exclude_arch = [_canonicalize_arch_token(a) for a in exclude_arch] + if exclusive_arch and "noarch" not in exclusive_arch and arch not in exclusive_arch: + return True + if arch in exclude_arch: + return True + return False + + +def process_component(specs_dir, scratch_dir, comp, arch): """Run rpmspec --srpm + rpmspec (no --srpm) for one component. Trust boundary: comp["name"] and comp["specRelPath"] are validated by BatchQuerySpecs in mockprocessor.go before this script is invoked. + arch is a target arch (e.g. "x86_64"); when non-empty it is passed to + rpmspec via --target. Specs that ExclusiveArch/ExcludeArch-exclude the + target are returned with excludedFromArch=True (not an error). """ name = comp["name"] spec_path = os.path.join(specs_dir, comp["specRelPath"]) @@ -167,11 +303,28 @@ def process_component(specs_dir, scratch_dir, comp): # _rpmspec_args, so sidecar files still resolve correctly. effective_spec = _maybe_rewrite_spec(spec_path, scratch_dir, name) - # Source-level query (--srpm). + # Source-level query (--srpm). The caller's srpmQueryFormat is wrapped + # with an arch-policy probe block (see _wrap_srpm_format_with_arch_probe); + # we split that probe back out before returning srpm_out to Go so the + # downstream parser only sees the caller-requested fields. srpm_args = _rpmspec_args( - effective_spec, comp["srpmQueryFormat"], True, with_, without, defines + effective_spec, + _wrap_srpm_format_with_arch_probe(comp["srpmQueryFormat"]), + True, + with_, + without, + defines, + arch, ) - srpm_out, srpm_err, srpm_rc = _run_rpmspec(srpm_args) + try: + srpm_out, srpm_err, srpm_rc = _run_rpmspec(srpm_args) + except _RpmspecTimeout as exc: + return { + "name": name, + "srpmOut": "", + "binOut": "", + "error": f"rpmspec --srpm {exc}", + } if srpm_rc != 0: return { "name": name, @@ -180,6 +333,16 @@ def process_component(specs_dir, scratch_dir, comp): "error": f"rpmspec --srpm failed: {srpm_err.strip()}", } + exclusive_arch, exclude_arch, srpm_out = _split_arch_probe(srpm_out) + if _is_arch_excluded(arch, exclusive_arch, exclude_arch): + return { + "name": name, + "srpmOut": srpm_out, + "binOut": "", + "error": None, + "excludedFromArch": True, + } + # Binary subpackage enumeration (no --srpm). # # `--builtrpms` (vs the default `--rpms`) restricts the listing to binary @@ -195,10 +358,19 @@ def process_component(specs_dir, scratch_dir, comp): with_, without, defines, + arch, ) # Insert --builtrpms right after `-q` so it associates with the query. bin_args.insert(2, "--builtrpms") - bin_out, bin_err, bin_rc = _run_rpmspec(bin_args) + try: + bin_out, bin_err, bin_rc = _run_rpmspec(bin_args) + except _RpmspecTimeout as exc: + return { + "name": name, + "srpmOut": srpm_out, + "binOut": "", + "error": f"rpmspec (binary) {exc}", + } if bin_rc != 0: return { "name": name, @@ -216,9 +388,9 @@ def process_component(specs_dir, scratch_dir, comp): def main() -> int: - if len(sys.argv) != 4: + if len(sys.argv) != 5: print( - f"usage: {sys.argv[0]} ", + f"usage: {sys.argv[0]} ", file=sys.stderr, ) return 1 @@ -226,6 +398,7 @@ def main() -> int: scratch_dir = sys.argv[1] specs_dir = sys.argv[2] max_workers = int(sys.argv[3]) + arch = sys.argv[4] inputs_path = os.path.join(scratch_dir, "inputs.json") with open(inputs_path) as f: @@ -235,7 +408,7 @@ def main() -> int: with ThreadPoolExecutor(max_workers=max_workers) as pool: futures = { - pool.submit(process_component, specs_dir, scratch_dir, comp): comp["name"] + pool.submit(process_component, specs_dir, scratch_dir, comp, arch): comp["name"] for comp in inputs } diff --git a/internal/app/azldev/core/sources/specquery.go b/internal/app/azldev/core/sources/specquery.go index d139adae..03260eb9 100644 --- a/internal/app/azldev/core/sources/specquery.go +++ b/internal/app/azldev/core/sources/specquery.go @@ -34,11 +34,20 @@ type SpecQueryInput struct { } // SpecQueryResult holds the batch-query result for one spec. -// Info is populated when Error is nil, and includes Subpackages. +// +// Exactly one of Info, ExcludedFromArch, or Error indicates the outcome: +// - Info is populated (and Error is nil, ExcludedFromArch is false) when the +// spec was successfully queried for the requested arch. +// - ExcludedFromArch is true when rpmspec refused to evaluate the spec for +// the requested arch (ExclusiveArch/ExcludeArch policy). This is not an +// error; the component simply isn't built for that arch. +// - Error is non-nil for any other failure (rpmspec parse error, missing +// spec, timeout, etc). type SpecQueryResult struct { - Name string - Info *rpm.SpecInfo - Error error + Name string + Info *rpm.SpecInfo + ExcludedFromArch bool + Error error } // validateSpecQueryInputs rejects empty names, path-traversal in spec @@ -108,10 +117,11 @@ type specQueryInputJSON struct { // specQueryResultJSON mirrors the per-component JSON shape written by // query_process.py. type specQueryResultJSON struct { - Name string `json:"name"` - SrpmOut string `json:"srpmOut"` - BinOut string `json:"binOut"` - Error *string `json:"error"` + Name string `json:"name"` + SrpmOut string `json:"srpmOut"` + BinOut string `json:"binOut"` + Error *string `json:"error"` + ExcludedFromArch bool `json:"excludedFromArch,omitempty"` } // BatchQuerySpecs runs `rpmspec` against multiple rendered spec files inside @@ -125,9 +135,14 @@ type specQueryResultJSON struct { // used to ferry the script + inputs.json + results.json in and out of the // chroot; it must be writable by the user the chroot runs as (mock's // chrootuid defaults to os.getuid()). +// +// arch sets the rpmspec build target (e.g. "x86_64", "aarch64") via +// --target=. When empty, rpmspec uses its built-in default (the host +// arch). Specs that ExclusiveArch/ExcludeArch-exclude the target arch are +// surfaced via [SpecQueryResult.ExcludedFromArch] rather than as errors. func (p *MockProcessor) BatchQuerySpecs( ctx context.Context, events opctx.EventListener, - specsDir, scratchDir string, + specsDir, scratchDir, arch string, inputs []SpecQueryInput, fs opctx.FS, maxWorkers int, ) ([]SpecQueryResult, error) { @@ -177,7 +192,7 @@ func (p *MockProcessor) BatchQuerySpecs( ScriptBytes: queryProcessScript, InputsJSON: inputsBytes, ResultsName: "results.json", - ScriptArgs: []string{chrootScratchPath, chrootSpecsPath, workers}, + ScriptArgs: []string{chrootScratchPath, chrootSpecsPath, workers, arch}, ProgressLabel: "Querying specs in mock chroot", ProgressTotal: int64(len(inputs)), FS: fs, @@ -222,6 +237,12 @@ func parseSpecQueryBatchJSON(raw []byte, inputs []SpecQueryInput) ([]SpecQueryRe continue } + if compResult.ExcludedFromArch { + results[idx].ExcludedFromArch = true + + continue + } + info, parseErr := rpm.ParseSrpmQueryOutput(input.SpecRelPath, compResult.SrpmOut) if parseErr != nil { results[idx].Error = fmt.Errorf("parsing rpmspec --srpm output:\n%w", parseErr) diff --git a/internal/app/azldev/core/sources/specquery_test.go b/internal/app/azldev/core/sources/specquery_test.go index d3853269..99414fcb 100644 --- a/internal/app/azldev/core/sources/specquery_test.go +++ b/internal/app/azldev/core/sources/specquery_test.go @@ -119,6 +119,27 @@ func TestParseSpecQueryBatchJSON_InvalidJSON(t *testing.T) { assert.Contains(t, err.Error(), "parsing spec query batch results JSON") } +func TestParseSpecQueryBatchJSON_ExcludedFromArch(t *testing.T) { + t.Parallel() + + raw := []byte(`[{ + "name": "shim", + "srpmOut": "", + "binOut": "", + "error": null, + "excludedFromArch": true + }]`) + + inputs := []SpecQueryInput{{Name: "shim", SpecRelPath: "s/shim/shim.spec"}} + + results, err := parseSpecQueryBatchJSON(raw, inputs) + require.NoError(t, err) + require.Len(t, results, 1) + require.NoError(t, results[0].Error) + assert.True(t, results[0].ExcludedFromArch) + assert.Nil(t, results[0].Info) +} + func TestValidateSpecQueryInputs(t *testing.T) { t.Parallel() diff --git a/scenario/internal/projecttest/testspec.go b/scenario/internal/projecttest/testspec.go index 15858b09..810bfefb 100644 --- a/scenario/internal/projecttest/testspec.go +++ b/scenario/internal/projecttest/testspec.go @@ -128,18 +128,25 @@ func (s *TestSpec) Render() string { "echo hello >file.txt", "", "%install", - "mkdir -p %{buildroot}/%{_datadir}/test-component", - "cp file.txt %{buildroot}/%{_datadir}/test-component/file.txt", + "mkdir -p %{buildroot}/%{_datadir}/" + s.name, + "cp file.txt %{buildroot}/%{_datadir}/" + s.name + "/file.txt", + }...) + for _, sub := range s.subpackages { + lines = append(lines, "echo "+sub+" >%{buildroot}/%{_datadir}/"+s.name+"/"+sub+".txt") + } + + lines = append(lines, []string{ "", "%files", - "%{_datadir}/test-component", + "%dir %{_datadir}/" + s.name, + "%{_datadir}/" + s.name + "/file.txt", "", }...) for _, sub := range s.subpackages { lines = append(lines, []string{ "%files " + sub, - "%{_datadir}/test-component", + "%{_datadir}/" + s.name + "/" + sub + ".txt", "", }...) } diff --git a/scenario/testdata/defaultconfigs/distros/azl/azl.distro.toml b/scenario/testdata/defaultconfigs/distros/azl/azl.distro.toml index be529e28..29722e33 100644 --- a/scenario/testdata/defaultconfigs/distros/azl/azl.distro.toml +++ b/scenario/testdata/defaultconfigs/distros/azl/azl.distro.toml @@ -19,7 +19,7 @@ mock-config-x86_64 = "mock/azurelinux-4-x86_64.cfg" mock-config-aarch64 = "mock/azurelinux-4-aarch64.cfg" [distros.azurelinux.versions.'4.0'.default-component-config] -spec = { type = "upstream", upstream-distro = { name = "fedora", version = "42" } } +spec = { type = "upstream", upstream-distro = { name = "fedora", version = "43" } } [distros.azurelinux.versions.'4.0'.default-component-config.build.defines] dist = ".azl4" diff --git a/scenario/testdata/defaultconfigs/distros/azl/mock/azurelinux-4.tpl b/scenario/testdata/defaultconfigs/distros/azl/mock/azurelinux-4.tpl index f6e476ce..4b0c0ffd 100644 --- a/scenario/testdata/defaultconfigs/distros/azl/mock/azurelinux-4.tpl +++ b/scenario/testdata/defaultconfigs/distros/azl/mock/azurelinux-4.tpl @@ -16,7 +16,7 @@ config_opts['cleanup_on_failure'] = False config_opts['dist'] = 'azl4' config_opts['extra_chroot_dirs'] = ['/run/lock'] -config_opts['releasever'] = '42' +config_opts['releasever'] = '43' config_opts['package_manager'] = 'dnf5' config_opts['update_before_build'] = False config_opts['root'] = 'azl-4.0-{{ target_arch }}'