diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml new file mode 100644 index 0000000000..61219c536f --- /dev/null +++ b/.github/workflows/build-image.yml @@ -0,0 +1,39 @@ +name: Build CI image + +on: + push: + branches: + - main + paths: + - docker/Dockerfile.ci + - pyproject.toml + - uv.lock + - build/requirements.sh + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v4 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push CI image + uses: docker/build-push-action@v6 + with: + context: . + file: docker/Dockerfile.ci + push: true + tags: ghcr.io/${{ github.repository }}:latest + cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:latest + cache-to: type=inline diff --git a/.github/workflows/prod.yml b/.github/workflows/prod.yml index e448bdf42b..adfc10eccc 100644 --- a/.github/workflows/prod.yml +++ b/.github/workflows/prod.yml @@ -4,67 +4,218 @@ on: push: branches: - main - - dev + - parallel jobs: - pages: - name: Render-Blog + + # ── 1. Parallel chunk rendering ───────────────────────────────────────────── + # Each chunk runs independently: installs deps, executes Python in its .qmd + # files, and uploads the resulting _freeze/ entries as an artifact. + # All 41 files from _quarto-prod.yml are covered across the 7 chunks. + render-chunk: + name: Render (${{ matrix.chunk }}) runs-on: ubuntu-latest if: ${{ !github.event.pull_request.head.repo.fork }} - steps: + container: + image: ghcr.io/${{ github.repository }}:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + strategy: + matrix: + include: + - chunk: light + files: >- + index.qmd + 404.qmd + content/getting-started/index.qmd + content/getting-started/01_environment.qmd + content/getting-started/02_data_analysis.qmd + content/getting-started/03_revisions.qmd + content/annexes/about.qmd + content/annexes/evaluation.qmd + content/annexes/corrections.qmd + content/git/index.qmd + content/git/introgit.qmd + content/git/exogit.qmd + + - chunk: manip-1 + files: >- + content/manipulation/index.qmd + content/manipulation/01_numpy.qmd + content/manipulation/02_pandas_intro.qmd + content/manipulation/02_pandas_suite.qmd + content/manipulation/02a_pandas_tutorial.qmd + content/manipulation/02b_pandas_TP.qmd + + - chunk: manip-2 + files: >- + content/manipulation/03_geopandas_intro.qmd + content/manipulation/03_geopandas_tutorial.qmd + content/manipulation/03_geopandas_TP.qmd + content/manipulation/04a_webscraping_TP.qmd + content/manipulation/04c_API_TP.qmd + content/manipulation/04b_regex_TP.qmd + content/manipulation/05_parquet_s3.qmd + + - chunk: visu + files: >- + content/visualisation/index.qmd + content/visualisation/matplotlib.qmd + content/visualisation/maps.qmd + + - chunk: model-1 + files: >- + content/modelisation/index.qmd + content/modelisation/0_preprocessing.qmd + content/modelisation/1_modelevaluation.qmd + content/modelisation/2_classification.qmd + + - chunk: model-2 + files: >- + content/modelisation/3_regression.qmd + content/modelisation/4_featureselection.qmd + content/modelisation/5_clustering.qmd + content/modelisation/6_pipeline.qmd + content/modelisation/7_mlapi.qmd + + - chunk: nlp + files: >- + content/NLP/index.qmd + content/NLP/01_intro.qmd + content/NLP/02_exoclean.qmd + content/NLP/03_embedding.qmd + steps: - uses: actions/checkout@v4 with: fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} - repository: ${{github.event.pull_request.head.repo.full_name}} + repository: ${{ github.event.pull_request.head.repo.full_name }} - - name: Configure safe.directory # Workaround for actions/checkout#760 + - name: Configure safe.directory run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist - - name: Install system dependencies + - name: Restore environment + run: uv sync + + - name: Install SpaCy corpus and check Quarto version run: | - sudo ./build/requirements.sh + uv run spacy download en_core_web_sm + uv run quarto --version - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 + - name: Prepare config + run: | + rm _quarto.yml + cp _quarto-prod.yml _quarto.yml + + - name: Prepare environment metadata + env: + TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }} + run: uv run build/append-environment/append_environment.py + + - name: Render chunk files + env: + TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }} + run: | + for file in ${{ matrix.files }}; do + echo "::group::Rendering $file" + uv run quarto render "$file" --profile fr + echo "::endgroup::" + done + + # _freeze/ entries are the execution results Quarto needs to skip + # re-execution in the assembly step. Profile is not part of the freeze + # path, so fr freeze entries are reused by the en render as well. + - name: Upload freeze artifacts + uses: actions/upload-artifact@v4 with: - version: "latest" + name: freeze-chunk-${{ matrix.chunk }} + path: _freeze + if-no-files-found: warn + + # ── 2. Site assembly and publication ──────────────────────────────────────── + # Downloads all freeze artifacts, merges them, then runs the two-profile + # site render without re-executing any Python (--freeze flag). + # sidebar.py modifies .qmd frontmatter between the two renders; --freeze + # forces cache use regardless of source-file hash, which is required here + # because append_environment.py and sidebar.py both modify the .qmd files. + pages: + name: Assemble and publish + runs-on: ubuntu-latest + needs: render-chunk + if: ${{ !github.event.pull_request.head.repo.fork }} + container: + image: ghcr.io/${{ github.repository }}:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + + - name: Configure safe.directory + run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist - name: Restore environment run: uv sync - # Step 3: Set up Quarto - - name: Set up quarto - uses: quarto-dev/quarto-actions/setup@v2 - with: - version: 1.8.26 - - - name: Install SpaCy corpus and check quarto version + - name: Install SpaCy corpus and check Quarto version run: | uv run spacy download en_core_web_sm uv run quarto --version - - - name: Prepare directory + + - name: Prepare config run: | rm _quarto.yml cp _quarto-prod.yml _quarto.yml - - name: Render website + - name: Prepare environment metadata env: TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }} + run: uv run build/append-environment/append_environment.py + + - name: Download all freeze artifacts + uses: actions/download-artifact@v4 + with: + pattern: freeze-chunk-* + path: freeze-artifacts/ + merge-multiple: false + + - name: Merge freeze artifacts into _freeze/ run: | - uv run build/append-environment/append_environment.py - uv run quarto render --profile fr --to html - uv run build/sidebar.py --to english - uv run quarto render --profile en --to html - uv run build/sidebar.py --to french + mkdir -p _freeze + for dir in freeze-artifacts/freeze-chunk-*/; do + if [ -d "${dir}_freeze" ]; then + cp -r "${dir}_freeze/." _freeze/ + fi + done + + - name: Render website (fr) + env: + TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }} + run: uv run quarto render --profile fr --to html --freeze + + - name: Switch sidebar to English + run: uv run build/sidebar.py --to english + + - name: Render website (en) + env: + TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }} + run: uv run quarto render --profile en --to html --freeze + + - name: Reset sidebar to French + run: uv run build/sidebar.py --to french + - name: Archive build as artifacts uses: actions/upload-artifact@v4 with: name: sitedir - path: | - _site + path: _site + - name: Publish to Pages if: github.ref == 'refs/heads/main' run: | @@ -72,24 +223,23 @@ jobs: git config --global user.name "Quarto GHA Workflow Runner" quarto publish gh-pages . --no-render --no-browser + # ── 3. Exercises and corrections (unchanged, run in parallel with above) ──── enonces: name: Render exercises runs-on: ubuntu-latest if: ${{ !github.event.pull_request.head.repo.fork }} steps: - - uses: actions/checkout@v4 with: fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} repository: ${{github.event.pull_request.head.repo.full_name}} - - name: Configure safe.directory # Workaround for actions/checkout#760 + - name: Configure safe.directory run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist - name: Install system dependencies - run: | - sudo ./build/requirements.sh + run: sudo ./build/requirements.sh - name: Install the latest version of uv uses: astral-sh/setup-uv@v7 @@ -99,7 +249,6 @@ jobs: - name: Restore environment run: uv sync - # Step 3: Set up Quarto - name: Set up quarto uses: quarto-dev/quarto-actions/setup@v2 with: @@ -110,7 +259,6 @@ jobs: uv run spacy download en_core_web_sm uv run quarto --version - - name: Build and push notebooks uses: linogaliana/python-datascientist-actions@main with: @@ -124,19 +272,17 @@ jobs: runs-on: ubuntu-latest if: ${{ !github.event.pull_request.head.repo.fork }} steps: - - uses: actions/checkout@v4 with: fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} repository: ${{github.event.pull_request.head.repo.full_name}} - - name: Configure safe.directory # Workaround for actions/checkout#760 + - name: Configure safe.directory run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist - name: Install system dependencies - run: | - sudo ./build/requirements.sh + run: sudo ./build/requirements.sh - name: Install the latest version of uv uses: astral-sh/setup-uv@v7 @@ -146,7 +292,6 @@ jobs: - name: Restore environment run: uv sync - # Step 3: Set up Quarto - name: Set up quarto uses: quarto-dev/quarto-actions/setup@v2 with: @@ -157,7 +302,6 @@ jobs: uv run spacy download en_core_web_sm uv run quarto --version - - name: Build and push notebooks uses: linogaliana/python-datascientist-actions-corrections@main with: @@ -166,5 +310,3 @@ jobs: env: TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }} API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }} - - diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci new file mode 100644 index 0000000000..316fe1b766 --- /dev/null +++ b/docker/Dockerfile.ci @@ -0,0 +1,31 @@ +FROM ubuntu:24.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Mirror build/requirements.sh + add curl for uv install +RUN apt-get update && apt-get install -y \ + graphviz wget curl git \ + build-essential libmagic-dev libgdal-dev \ + imagemagick && \ + rm -rf /var/lib/apt/lists/* + +# uv manages Python 3.13 itself (Ubuntu 24.04 ships 3.12) +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:$PATH" + +# Quarto +ARG QUARTO_VERSION=1.8.26 +RUN wget -q "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.tar.gz" && \ + tar xzf "quarto-${QUARTO_VERSION}-linux-amd64.tar.gz" && \ + mv "quarto-${QUARTO_VERSION}" /opt/quarto && \ + ln -s /opt/quarto/bin/quarto /usr/local/bin/quarto && \ + rm "quarto-${QUARTO_VERSION}-linux-amd64.tar.gz" + +# Pre-populate the uv download cache from the lock file. +# The cache lands in /root/.cache/uv/ and persists in the image layers. +# CI jobs that run uv sync will install from this cache (no PyPI download). +WORKDIR /tmp/build +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen + +WORKDIR /