Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,45 @@ jobs:
- name: Validate raw manifests
run: find manifests/ -name '*.yaml' -print0 | xargs -0 kubeconform -strict -summary -ignore-missing-schemas

gitops:
name: GitOps (rollout + app-of-apps render)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install kubeconform + helm
run: |
curl -sSL -o /tmp/kubeconform.tar.gz https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz
tar -xzf /tmp/kubeconform.tar.gz -C /tmp/
chmod +x /tmp/kubeconform && sudo mv /tmp/kubeconform /usr/local/bin/kubeconform
curl -fsSL -o /tmp/helm.tar.gz https://get.helm.sh/helm-v3.15.0-linux-amd64.tar.gz
tar -xzf /tmp/helm.tar.gz -C /tmp/
chmod +x /tmp/linux-amd64/helm && sudo mv /tmp/linux-amd64/helm /usr/local/bin/helm

- name: Lint + render infra wrapper charts (argo-rollouts, argocd)
# These pin the upstream chart via Chart.yaml dependencies; build
# the dependency before lint/template. -ignore-missing-schemas
# because the upstream charts ship CRDs (Rollout, Application).
run: |
# Both wrapper charts pin upstream charts from the argo-helm repo;
# register it so `helm dependency build` can resolve them.
helm repo add argo https://argoproj.github.io/argo-helm
helm repo update
for chart in infra/argo-rollouts infra/argocd; do
helm dependency build "$chart"
helm lint "$chart"
helm template "$chart" | kubeconform -strict -summary -ignore-missing-schemas
done

- name: Render rollout-enabled buyerchat + validate
# values.dev.yaml sets rollout.enabled=true, so this renders the
# Argo Rollout + AnalysisTemplate path (CRD-typed → needs
# -ignore-missing-schemas).
run: helm template helm/buyerchat -f helm/buyerchat/values.dev.yaml | kubeconform -strict -summary -ignore-missing-schemas

- name: Validate app-of-apps manifests
run: kubeconform -strict -summary -ignore-missing-schemas argocd/root-app.yaml argocd/apps/*.yaml

build-docs:
name: Build docs site
runs-on: ubuntu-latest
Expand Down
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,10 @@ dist/
.DS_Store
coverage/
*.tmp
kubeconform-cache/
kubeconform-cache/

# Vendored Helm chart dependencies (pulled by `helm dependency build`).
# Chart.lock is committed to pin exact versions; the .tgz archives are
# build artifacts and re-fetched in CI.
infra/*/charts/
helm/*/charts/
32 changes: 26 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: up down smoke lint help
.PHONY: up down smoke lint rollout-status help

KIND_CLUSTER := stackup
HELM_CHART := helm/buyerchat
Expand All @@ -7,10 +7,11 @@ NAMESPACE := app
help:
@echo "stackup Makefile"
@echo ""
@echo " make up Full bring-up: create kind cluster + install all platform components + buyerchat"
@echo " make down Tear down: delete kind cluster (clean)"
@echo " make smoke Run smoke tests (requires cluster up)"
@echo " make lint Lint all YAML files + Helm charts"
@echo " make up Full bring-up: create kind cluster + install all platform components + buyerchat"
@echo " make down Tear down: delete kind cluster (clean)"
@echo " make smoke Run smoke tests (requires cluster up)"
@echo " make lint Lint all YAML files + Helm charts"
@echo " make rollout-status Watch the buyerchat Argo Rollout canary progress"
@echo ""
@echo "Prerequisites: docker, kind, helm >=3.15, kubectl, git"

Expand All @@ -30,12 +31,28 @@ up:
helm upgrade --install --create-namespace --namespace $$(basename $$chart) $$chart $$chart --timeout 120s --wait --debug 2>&1 | tail -3 || true; \
done

@echo "=== Installing Argo Rollouts + ArgoCD ==="
@# Wrapper charts (Chart.yaml dependency on the upstream chart) — pull
@# the pinned dependency, then install. argo-rollouts first so the
@# Rollout CRDs exist before buyerchat renders a Rollout; argocd last.
@for chart in infra/argo-rollouts infra/argocd; do \
echo " Installing $$chart..."; \
helm dependency build $$chart >/dev/null 2>&1 || true; \
helm upgrade --install --create-namespace --namespace $$(basename $$chart) $$chart $$chart --timeout 300s --wait --debug 2>&1 | tail -3 || true; \
done

@echo "=== Installing buyerchat Helm chart ==="
helm upgrade --install buyerchat $(HELM_CHART) \
--namespace $(NAMESPACE) --create-namespace \
--values $(HELM_CHART)/values.dev.yaml \
--timeout 180s --wait

@echo "=== Registering the ArgoCD app-of-apps root ==="
@# From here on ArgoCD reconciles every component from git (automated
@# sync + prune + self-heal). The helm installs above bootstrap the
@# cluster on a clean machine; root-app.yaml is the GitOps takeover.
kubectl apply -f argocd/root-app.yaml

@echo ""
@echo "=== Cluster ready ==="
@kubectl get pods -A --no-headers | grep -v Running | grep -v Completed && echo "All pods running ✓" || true
Expand Down Expand Up @@ -66,4 +83,7 @@ lint:
@echo ""
@echo "=== Helm lint ==="
@helm lint $(HELM_CHART) --quiet && echo "✓ helm lint passed" || echo "✗ helm lint failed"
@helm template buyerchat $(HELM_CHART) > /dev/null 2>&1 && echo "✓ helm template passed" || echo "✗ helm template failed"
@helm template buyerchat $(HELM_CHART) > /dev/null 2>&1 && echo "✓ helm template passed" || echo "✗ helm template failed"

rollout-status:
kubectl argo rollouts get rollout buyerchat -n $(NAMESPACE) --watch
28 changes: 16 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

Managed Kubernetes costs $200+/month minimum on cloud providers. Stackup runs the full production stack on kind, on your laptop, for free.

What "full production stack" means: a real ArgoCD app-of-apps with 8 child applications, Argo Rollouts canary progressive delivery, Prometheus + Loki + Tempo observability, cert-manager TLS, Sealed Secrets encrypted in git, Calico NetworkPolicy enforcement, and Pod Security Standards `restricted` on every workload namespace.
What "full production stack" means: a real ArgoCD app-of-apps with 6 child applications, Argo Rollouts canary progressive delivery, Prometheus + Grafana observability, cert-manager TLS, Sealed Secrets encrypted in git, Calico NetworkPolicy enforcement, and Pod Security Standards `restricted` on every workload namespace.

The buyerchat workload deliberately runs degraded (no DB). That's intentional. The cluster is the demo — not the app.

Expand All @@ -23,8 +23,8 @@ The buyerchat workload deliberately runs degraded (no DB). That's intentional. T
|---|---|---|
| **Cluster** | kind on Docker | 3-node K8s in containers |
| **CNI** | Calico | NetworkPolicy enforcement |
| **GitOps** | ArgoCD (app-of-apps) | One root app manages 8 children; automated sync + prune + self-heal |
| **Progressive delivery** | Argo Rollouts | Canary 25→50→75→100%, auto-rollback on error spike |
| **GitOps** | ArgoCD (app-of-apps) | One root app manages 6 children; automated sync + prune + self-heal |
| **Progressive delivery** | Argo Rollouts | Canary 25→50→75→100%, analysis gate at 25% with auto-rollback |
| **Ingress** | ingress-nginx | TLS termination, hostPort 80/443 |
| **TLS** | cert-manager | Self-signed ClusterIssuer (swap to ACME in one line for prod) |
| **Secrets** | Sealed Secrets | Encrypted secrets in git, decrypted in-cluster |
Expand Down Expand Up @@ -56,19 +56,22 @@ Then open:

- **[https://buyerchat.local.stackup.dev](https://buyerchat.local.stackup.dev)** — workload, returns 503 degraded (no DB — expected)
- **[https://grafana.local.stackup.dev](https://grafana.local.stackup.dev)** — RED metrics + Loki logs + Tempo traces
- **[https://argocd.local.stackup.dev](https://argocd.local.stackup.dev)** — GitOps tree of 8 child apps
- **[https://argocd.local.stackup.dev](https://argocd.local.stackup.dev)** — GitOps tree of 6 child apps

---

## What it actually shows you

Push a commit that bumps `helm/buyerchat/values.yaml` image.tag. ArgoCD notices. Argo Rollouts applies the new Rollout resource. Watch:
Push a commit that bumps `helm/buyerchat/values.yaml` image.tag. ArgoCD notices and syncs. Argo Rollouts applies the new Rollout revision. Watch it advance:

```bash
kubectl argo rollouts get rollout buyerchat -n app --watch
make rollout-status
# same as: kubectl argo rollouts get rollout buyerchat -n app --watch
```

The canary scales to 25% replicas. Prometheus watches error rate for 60 seconds. If clean, advances to 50%. Then 75%. Then 100%. If error rate spikes, automatic rollback. This is the pattern Lyft and Netflix run in production. Running on your laptop. Free.
The canary shifts 25% of traffic to the new version, pauses, then runs an analysis step: an `AnalysisTemplate` queries Prometheus three times over 90 seconds. If the success condition holds, the rollout advances to 50%, then 75%, then 100%. If the analysis fails, Argo Rollouts aborts and rolls back to the previous revision. This is the canary pattern teams run in production, on your laptop, for free.

The current analysis query is a conservative liveness check (is the canary up and being scraped). Once the buyerchat image exports request counters on `/api/metrics`, swap it for a real success-rate ratio — the template carries a `TODO` marking the one line to change.

---

Expand All @@ -81,7 +84,7 @@ graph TD
Kind --> W1[Worker 1]
Kind --> W2[Worker 2]
CP --> Argo[ArgoCD]
Argo --> Apps[8 child apps]
Argo --> Apps[6 child apps]
Apps --> Rollout[Argo Rollouts CRD]
Rollout --> Pods[Canary pods]
Pods --> Prom[Prometheus]
Expand All @@ -100,10 +103,11 @@ For full topology + sequence diagrams, see [docs/architecture.md](docs/architect

```bash
make help # Show all targets
make up # Full bring-up: create cluster + install platform + buyerchat
make down # Tear down kind cluster (clean)
make smoke # Run smoke tests (requires cluster up)
make lint # Lint all YAML + Helm charts
make up # Full bring-up: create cluster + install platform + buyerchat
make down # Tear down kind cluster (clean)
make smoke # Run smoke tests (requires cluster up)
make lint # Lint all YAML + Helm charts
make rollout-status # Watch the buyerchat Argo Rollout canary progress
```

---
Expand Down
27 changes: 27 additions & 0 deletions argocd/apps/argo-rollouts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Child app: argo-rollouts.
#
# Points at this repo's self-contained wrapper chart at
# infra/argo-rollouts (Chart.yaml pins the upstream chart as a
# dependency). ArgoCD builds the dependency and renders it in place.
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: argo-rollouts
namespace: argocd
labels:
app.kubernetes.io/part-of: stackup
spec:
project: default
source:
repoURL: https://github.com/ykstorm/stackup
path: infra/argo-rollouts
targetRevision: main
destination:
server: https://kubernetes.default.svc
namespace: argo-rollouts
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
32 changes: 32 additions & 0 deletions argocd/apps/buyerchat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Child app: buyerchat.
#
# Points at this repo's helm/buyerchat chart with the dev values file,
# which sets rollout.enabled=true — so ArgoCD renders the Argo Rollout +
# AnalysisTemplate (not the plain Deployment). Installed into the
# `app` namespace (matches the Makefile NAMESPACE and the documented
# `kubectl argo rollouts get rollout buyerchat -n app` command).
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: buyerchat
namespace: argocd
labels:
app.kubernetes.io/part-of: stackup
spec:
project: default
source:
repoURL: https://github.com/ykstorm/stackup
path: helm/buyerchat
targetRevision: main
helm:
valueFiles:
- values.dev.yaml
destination:
server: https://kubernetes.default.svc
namespace: app
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
33 changes: 33 additions & 0 deletions argocd/apps/cert-manager.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Child app: cert-manager.
#
# Upstream chart (pinned) with installCRDs=true, matching the documented
# install in infra/cert-manager/README.md. The selfsigned ClusterIssuer
# (infra/cert-manager/clusterissuer-selfsigned.yaml) is applied out of
# band by bring-up — it is a cert-manager CRD object, not part of this
# chart's render.
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cert-manager
namespace: argocd
labels:
app.kubernetes.io/part-of: stackup
spec:
project: default
source:
repoURL: https://charts.jetstack.io
chart: cert-manager
targetRevision: v1.20.2
helm:
parameters:
- name: installCRDs
value: "true"
destination:
server: https://kubernetes.default.svc
namespace: cert-manager
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
34 changes: 34 additions & 0 deletions argocd/apps/ingress-nginx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Child app: ingress-nginx.
#
# Multi-source Application: the upstream chart (pinned) plus this repo's
# values-only overlay at infra/ingress-nginx/values.yaml, referenced via
# the $values source ref. This mirrors the documented install in
# infra/ingress-nginx/README.md.
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: ingress-nginx
namespace: argocd
labels:
app.kubernetes.io/part-of: stackup
spec:
project: default
sources:
- repoURL: https://kubernetes.github.io/ingress-nginx
chart: ingress-nginx
targetRevision: 4.15.1
helm:
valueFiles:
- $values/infra/ingress-nginx/values.yaml
- repoURL: https://github.com/ykstorm/stackup
targetRevision: main
ref: values
destination:
server: https://kubernetes.default.svc
namespace: ingress-nginx
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
37 changes: 37 additions & 0 deletions argocd/apps/kube-prometheus-stack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Child app: kube-prometheus-stack.
#
# Multi-source: the upstream chart (pinned) plus this repo's values-only
# overlay at infra/kube-prometheus-stack/values.yaml. The release name
# `kps` is load-bearing — ServiceMonitors elsewhere carry `release: kps`
# (see infra/kube-prometheus-stack/README.md). The AnalysisTemplate's
# Prometheus address also depends on this release name.
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: kube-prometheus-stack
namespace: argocd
labels:
app.kubernetes.io/part-of: stackup
spec:
project: default
sources:
- repoURL: https://prometheus-community.github.io/helm-charts
chart: kube-prometheus-stack
targetRevision: 84.5.0
helm:
releaseName: kps
valueFiles:
- $values/infra/kube-prometheus-stack/values.yaml
- repoURL: https://github.com/ykstorm/stackup
targetRevision: main
ref: values
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
27 changes: 27 additions & 0 deletions argocd/apps/sealed-secrets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Child app: sealed-secrets.
#
# Upstream chart (pinned) installed into kube-system. The release name
# `sealed-secrets` and namespace `kube-system` are load-bearing — the
# repo's kubeseal invocations assume them (see infra/sealed-secrets/README.md).
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: sealed-secrets
namespace: argocd
labels:
app.kubernetes.io/part-of: stackup
spec:
project: default
source:
repoURL: https://bitnami-labs.github.io/sealed-secrets
chart: sealed-secrets
targetRevision: 2.18.6
helm:
releaseName: sealed-secrets
destination:
server: https://kubernetes.default.svc
namespace: kube-system
syncPolicy:
automated:
prune: true
selfHeal: true
Loading
Loading