From d8dc8730d44eaf1828fce48071ddbd77711a0ab6 Mon Sep 17 00:00:00 2001 From: Radovan Fuchs Date: Fri, 5 Jun 2026 10:29:22 +0200 Subject: [PATCH 1/2] prevent HF from downloading --- .github/workflows/e2e_tests.yaml | 14 ++++++++++++++ docker-compose-library.yaml | 3 +++ docker-compose.yaml | 3 +++ 3 files changed, 20 insertions(+) diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index ab53a66b3..377154950 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -114,6 +114,20 @@ jobs: echo "=== lightspeed-stack.yaml ===" grep -A 3 "llama_stack:" lightspeed-stack.yaml + - name: Cache HuggingFace embedding model + uses: actions/cache@v4 + with: + path: /tmp/hf-cache + key: hf-sentence-transformers-all-mpnet-base-v2 + + - name: Pre-download HuggingFace embedding model + env: + HF_HOME: /tmp/hf-cache + run: | + pip install -q sentence-transformers + python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-mpnet-base-v2')" + echo "HF_CACHE_PATH=/tmp/hf-cache" >> $GITHUB_ENV + - name: Docker Login for quay access if: matrix.mode == 'server' env: diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index e268e4aef..f0e075848 100755 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -19,6 +19,7 @@ services: - ./run.yaml:/app-root/run.yaml:Z - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z + - ${HF_CACHE_PATH:-./tmp/.hf-cache}:/opt/app-root/src/.cache/huggingface - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z environment: @@ -57,6 +58,8 @@ services: - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} # FAISS test and inline RAG config - FAISS_VECTOR_STORE_ID=${FAISS_VECTOR_STORE_ID:-} + # Prevent HuggingFace Hub update checks (HTTP 429 rate-limiting in CI from parallel jobs). + - HF_HUB_OFFLINE=1 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/liveness"] interval: 10s # how often to run the check diff --git a/docker-compose.yaml b/docker-compose.yaml index d65f7158c..3551c0e8a 100755 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -21,6 +21,7 @@ services: - llama-storage:/opt/app-root/src/.llama/storage - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:z - mock-tls-certs:/certs:ro + - ${HF_CACHE_PATH:-./tmp/.hf-cache}:/opt/app-root/src/.cache/huggingface environment: - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} @@ -57,6 +58,8 @@ services: - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} # FAISS test - FAISS_VECTOR_STORE_ID=${FAISS_VECTOR_STORE_ID:-} + # Prevent HuggingFace Hub update checks (HTTP 429 rate-limiting in CI from parallel jobs). + - HF_HUB_OFFLINE=1 # OKP/Solr RAG - RH_SERVER_OKP=${RH_SERVER_OKP:-} - SOLR_URL=${SOLR_URL:-} From 36fd4ff9fedebf31a3db017bd368b8d54467db20 Mon Sep 17 00:00:00 2001 From: Radovan Fuchs Date: Fri, 5 Jun 2026 10:37:28 +0200 Subject: [PATCH 2/2] added other workflows --- .github/workflows/e2e_tests_providers.yaml | 14 ++++++++++++++ .github/workflows/e2e_tests_rhaiis.yaml | 14 ++++++++++++++ .github/workflows/e2e_tests_rhelai.yaml | 14 ++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml index f42051edd..1aaaf9aa2 100644 --- a/.github/workflows/e2e_tests_providers.yaml +++ b/.github/workflows/e2e_tests_providers.yaml @@ -192,6 +192,20 @@ jobs: echo "=== lightspeed-stack.yaml ===" grep -A 3 "llama_stack:" lightspeed-stack.yaml + - name: Cache HuggingFace embedding model + uses: actions/cache@v4 + with: + path: /tmp/hf-cache + key: hf-sentence-transformers-all-mpnet-base-v2 + + - name: Pre-download HuggingFace embedding model + env: + HF_HOME: /tmp/hf-cache + run: | + pip install -q sentence-transformers + python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-mpnet-base-v2')" + echo "HF_CACHE_PATH=/tmp/hf-cache" >> $GITHUB_ENV + - name: Docker Login for quay access env: QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_DOWNSTREAM_USERNAME }} diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml index 9e2b3fc48..0e847ff44 100644 --- a/.github/workflows/e2e_tests_rhaiis.yaml +++ b/.github/workflows/e2e_tests_rhaiis.yaml @@ -129,6 +129,20 @@ jobs: echo "$BODY" [ "$HTTP_CODE" = "200" ] + - name: Cache HuggingFace embedding model + uses: actions/cache@v4 + with: + path: /tmp/hf-cache + key: hf-sentence-transformers-all-mpnet-base-v2 + + - name: Pre-download HuggingFace embedding model + env: + HF_HOME: /tmp/hf-cache + run: | + pip install -q sentence-transformers + python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-mpnet-base-v2')" + echo "HF_CACHE_PATH=/tmp/hf-cache" >> $GITHUB_ENV + - name: Docker Login for quay access if: matrix.mode == 'server' env: diff --git a/.github/workflows/e2e_tests_rhelai.yaml b/.github/workflows/e2e_tests_rhelai.yaml index b57614350..25110d7e0 100644 --- a/.github/workflows/e2e_tests_rhelai.yaml +++ b/.github/workflows/e2e_tests_rhelai.yaml @@ -143,6 +143,20 @@ jobs: echo "$BODY" [ "$HTTP_CODE" = "200" ] + - name: Cache HuggingFace embedding model + uses: actions/cache@v4 + with: + path: /tmp/hf-cache + key: hf-sentence-transformers-all-mpnet-base-v2 + + - name: Pre-download HuggingFace embedding model + env: + HF_HOME: /tmp/hf-cache + run: | + pip install -q sentence-transformers + python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-mpnet-base-v2')" + echo "HF_CACHE_PATH=/tmp/hf-cache" >> $GITHUB_ENV + - name: Docker Login for quay access env: QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_DOWNSTREAM_USERNAME }}