Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .github/workflows/build-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Build CI image

on:
push:
branches:
- main
paths:
- docker/Dockerfile.ci
- pyproject.toml
- uv.lock
- build/requirements.sh
workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- uses: actions/checkout@v4

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push CI image
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.ci
push: true
tags: ghcr.io/${{ github.repository }}:latest
cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:latest
cache-to: type=inline
226 changes: 184 additions & 42 deletions .github/workflows/prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,92 +4,242 @@ on:
push:
branches:
- main
- dev
- parallel

jobs:
pages:
name: Render-Blog

# ── 1. Parallel chunk rendering ─────────────────────────────────────────────
# Each chunk runs independently: installs deps, executes Python in its .qmd
# files, and uploads the resulting _freeze/ entries as an artifact.
# All 41 files from _quarto-prod.yml are covered across the 7 chunks.
render-chunk:
name: Render (${{ matrix.chunk }})
runs-on: ubuntu-latest
if: ${{ !github.event.pull_request.head.repo.fork }}
steps:
container:
image: ghcr.io/${{ github.repository }}:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
include:
- chunk: light
files: >-
index.qmd
404.qmd
content/getting-started/index.qmd
content/getting-started/01_environment.qmd
content/getting-started/02_data_analysis.qmd
content/getting-started/03_revisions.qmd
content/annexes/about.qmd
content/annexes/evaluation.qmd
content/annexes/corrections.qmd
content/git/index.qmd
content/git/introgit.qmd
content/git/exogit.qmd

- chunk: manip-1
files: >-
content/manipulation/index.qmd
content/manipulation/01_numpy.qmd
content/manipulation/02_pandas_intro.qmd
content/manipulation/02_pandas_suite.qmd
content/manipulation/02a_pandas_tutorial.qmd
content/manipulation/02b_pandas_TP.qmd

- chunk: manip-2
files: >-
content/manipulation/03_geopandas_intro.qmd
content/manipulation/03_geopandas_tutorial.qmd
content/manipulation/03_geopandas_TP.qmd
content/manipulation/04a_webscraping_TP.qmd
content/manipulation/04c_API_TP.qmd
content/manipulation/04b_regex_TP.qmd
content/manipulation/05_parquet_s3.qmd

- chunk: visu
files: >-
content/visualisation/index.qmd
content/visualisation/matplotlib.qmd
content/visualisation/maps.qmd

- chunk: model-1
files: >-
content/modelisation/index.qmd
content/modelisation/0_preprocessing.qmd
content/modelisation/1_modelevaluation.qmd
content/modelisation/2_classification.qmd

- chunk: model-2
files: >-
content/modelisation/3_regression.qmd
content/modelisation/4_featureselection.qmd
content/modelisation/5_clustering.qmd
content/modelisation/6_pipeline.qmd
content/modelisation/7_mlapi.qmd

- chunk: nlp
files: >-
content/NLP/index.qmd
content/NLP/01_intro.qmd
content/NLP/02_exoclean.qmd
content/NLP/03_embedding.qmd

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{github.event.pull_request.head.repo.full_name}}
repository: ${{ github.event.pull_request.head.repo.full_name }}

- name: Configure safe.directory # Workaround for actions/checkout#760
- name: Configure safe.directory
run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist

- name: Install system dependencies
- name: Restore environment
run: uv sync

- name: Install SpaCy corpus and check Quarto version
run: |
sudo ./build/requirements.sh
uv run spacy download en_core_web_sm
uv run quarto --version

- name: Install the latest version of uv
uses: astral-sh/setup-uv@v7
- name: Prepare config
run: |
rm _quarto.yml
cp _quarto-prod.yml _quarto.yml

- name: Prepare environment metadata
env:
TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }}
run: uv run build/append-environment/append_environment.py

- name: Render chunk files
env:
TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }}
run: |
for file in ${{ matrix.files }}; do
echo "::group::Rendering $file"
uv run quarto render "$file" --profile fr
echo "::endgroup::"
done

# _freeze/ entries are the execution results Quarto needs to skip
# re-execution in the assembly step. Profile is not part of the freeze
# path, so fr freeze entries are reused by the en render as well.
- name: Upload freeze artifacts
uses: actions/upload-artifact@v4
with:
version: "latest"
name: freeze-chunk-${{ matrix.chunk }}
path: _freeze
if-no-files-found: warn

# ── 2. Site assembly and publication ────────────────────────────────────────
# Downloads all freeze artifacts, merges them, then runs the two-profile
# site render without re-executing any Python (--freeze flag).
# sidebar.py modifies .qmd frontmatter between the two renders; --freeze
# forces cache use regardless of source-file hash, which is required here
# because append_environment.py and sidebar.py both modify the .qmd files.
pages:
name: Assemble and publish
runs-on: ubuntu-latest
needs: render-chunk
if: ${{ !github.event.pull_request.head.repo.fork }}
container:
image: ghcr.io/${{ github.repository }}:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}

- name: Configure safe.directory
run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist

- name: Restore environment
run: uv sync

# Step 3: Set up Quarto
- name: Set up quarto
uses: quarto-dev/quarto-actions/setup@v2
with:
version: 1.8.26

- name: Install SpaCy corpus and check quarto version
- name: Install SpaCy corpus and check Quarto version
run: |
uv run spacy download en_core_web_sm
uv run quarto --version
- name: Prepare directory

- name: Prepare config
run: |
rm _quarto.yml
cp _quarto-prod.yml _quarto.yml

- name: Render website
- name: Prepare environment metadata
env:
TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }}
run: uv run build/append-environment/append_environment.py

- name: Download all freeze artifacts
uses: actions/download-artifact@v4
with:
pattern: freeze-chunk-*
path: freeze-artifacts/
merge-multiple: false

- name: Merge freeze artifacts into _freeze/
run: |
uv run build/append-environment/append_environment.py
uv run quarto render --profile fr --to html
uv run build/sidebar.py --to english
uv run quarto render --profile en --to html
uv run build/sidebar.py --to french
mkdir -p _freeze
for dir in freeze-artifacts/freeze-chunk-*/; do
if [ -d "${dir}_freeze" ]; then
cp -r "${dir}_freeze/." _freeze/
fi
done

- name: Render website (fr)
env:
TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }}
run: uv run quarto render --profile fr --to html --freeze

- name: Switch sidebar to English
run: uv run build/sidebar.py --to english

- name: Render website (en)
env:
TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }}
run: uv run quarto render --profile en --to html --freeze

- name: Reset sidebar to French
run: uv run build/sidebar.py --to french

- name: Archive build as artifacts
uses: actions/upload-artifact@v4
with:
name: sitedir
path: |
_site
path: _site

- name: Publish to Pages
if: github.ref == 'refs/heads/main'
run: |
git config --global user.email quarto-github-actions-publish@example.com
git config --global user.name "Quarto GHA Workflow Runner"
quarto publish gh-pages . --no-render --no-browser

# ── 3. Exercises and corrections (unchanged, run in parallel with above) ────
enonces:
name: Render exercises
runs-on: ubuntu-latest
if: ${{ !github.event.pull_request.head.repo.fork }}
steps:

- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{github.event.pull_request.head.repo.full_name}}

- name: Configure safe.directory # Workaround for actions/checkout#760
- name: Configure safe.directory
run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist

- name: Install system dependencies
run: |
sudo ./build/requirements.sh
run: sudo ./build/requirements.sh

- name: Install the latest version of uv
uses: astral-sh/setup-uv@v7
Expand All @@ -99,7 +249,6 @@ jobs:
- name: Restore environment
run: uv sync

# Step 3: Set up Quarto
- name: Set up quarto
uses: quarto-dev/quarto-actions/setup@v2
with:
Expand All @@ -110,7 +259,6 @@ jobs:
uv run spacy download en_core_web_sm
uv run quarto --version


- name: Build and push notebooks
uses: linogaliana/python-datascientist-actions@main
with:
Expand All @@ -124,19 +272,17 @@ jobs:
runs-on: ubuntu-latest
if: ${{ !github.event.pull_request.head.repo.fork }}
steps:

- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{github.event.pull_request.head.repo.full_name}}

- name: Configure safe.directory # Workaround for actions/checkout#760
- name: Configure safe.directory
run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist

- name: Install system dependencies
run: |
sudo ./build/requirements.sh
run: sudo ./build/requirements.sh

- name: Install the latest version of uv
uses: astral-sh/setup-uv@v7
Expand All @@ -146,7 +292,6 @@ jobs:
- name: Restore environment
run: uv sync

# Step 3: Set up Quarto
- name: Set up quarto
uses: quarto-dev/quarto-actions/setup@v2
with:
Expand All @@ -157,7 +302,6 @@ jobs:
uv run spacy download en_core_web_sm
uv run quarto --version


- name: Build and push notebooks
uses: linogaliana/python-datascientist-actions-corrections@main
with:
Expand All @@ -166,5 +310,3 @@ jobs:
env:
TOKEN_API_INSEE: ${{ secrets.TOKEN_API_INSEE }}
API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }}


Loading
Loading