From f614c2b90045642875dc04de3e1cc7ddebe9fe75 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 21 May 2026 09:16:58 +0000 Subject: [PATCH] Handle GitHub metadata timeouts Co-authored-by: Armen Zambrano G. --- src/github_sdk.py | 95 ++++++++++++++++++++++++++++++---------- tests/test_github_sdk.py | 61 ++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 23 deletions(-) diff --git a/src/github_sdk.py b/src/github_sdk.py index cf28d01..d648f7b 100644 --- a/src/github_sdk.py +++ b/src/github_sdk.py @@ -6,11 +6,14 @@ import logging import uuid from datetime import datetime +from urllib.parse import urlparse import requests from sentry_sdk.envelope import Envelope from sentry_sdk.utils import format_timestamp +GITHUB_API_TIMEOUT = (3.05, 10) + class GithubSentryError(Exception): pass @@ -42,35 +45,47 @@ def __init__(self, token, dsn, dry_run=False) -> None: def _fetch_github(self, url): headers = {"Authorization": f"token {self.token}"} - req = requests.get(url, headers=headers) + req = requests.get(url, headers=headers, timeout=GITHUB_API_TIMEOUT) req.raise_for_status() return req def _get_extra_metadata(self, job): # XXX: This is the slowest call - runs = self._fetch_github(job["run_url"]).json() - workflow = self._fetch_github(runs["workflow_url"]).json() + meta = _metadata_from_job(job) + try: + runs = self._fetch_github(job["run_url"]).json() + except requests.exceptions.Timeout: + logging.warning( + "Timed out fetching Github run metadata for job %s.", + job.get("id"), + ) + return meta + + workflow_name = runs.get("name") + try: + workflow = self._fetch_github(runs["workflow_url"]).json() + workflow_name = workflow["path"].rsplit("/")[-1] + except requests.exceptions.Timeout: + logging.warning( + "Timed out fetching Github workflow metadata for run %s.", + runs.get("id"), + ) + repo = runs["repository"]["full_name"] - meta = { - # "workflow_name": workflow["name"], - "author": runs["head_commit"]["author"], - # https://getsentry.atlassian.net/browse/TET-22 - # Tags are not linkified externally, plain text data can be selected in browsers and opened - "data": { - "job": job["html_url"], - }, - "tags": { - # e.g. success, failure, skipped - "job_status": job["conclusion"], - "branch": runs["head_branch"], - "commit": runs["head_sha"], - "repo": repo, - "run_attempt": runs["run_attempt"], # Rerunning a job - "event": runs["event"], - # It allows querying jobs within the same workflow (e.g. foo.yml) - "workflow": workflow["path"].rsplit("/")[-1], - }, - } + meta["author"] = runs["head_commit"]["author"] + meta["tags"].update( + _without_empty_values( + { + "branch": runs["head_branch"], + "commit": runs["head_sha"], + "repo": repo, + "run_attempt": runs["run_attempt"], # Rerunning a job + "event": runs["event"], + # It allows querying jobs within the same workflow (e.g. foo.yml) + "workflow": workflow_name, + } + ) + ) if runs.get("pull_requests"): pr_number = runs["pull_requests"][0]["number"] meta["data"]["pr"] = f"https://github.com/{repo}/pull/{pr_number}" @@ -178,6 +193,40 @@ def _base_transaction(job): } +def _metadata_from_job(job): + # https://getsentry.atlassian.net/browse/TET-22 + # Tags are not linkified externally, plain text data can be selected in browsers and opened + return { + "author": {}, + "data": { + "job": job["html_url"], + }, + "tags": _without_empty_values( + { + # e.g. success, failure, skipped + "job_status": job["conclusion"], + "commit": job.get("head_sha"), + "repo": _repo_from_run_url(job.get("run_url")), + "run_attempt": job.get("run_attempt"), # Rerunning a job + "workflow": job.get("workflow_name"), + } + ), + } + + +def _repo_from_run_url(run_url): + if not run_url: + return None + path_parts = urlparse(run_url).path.strip("/").split("/") + if len(path_parts) < 3 or path_parts[0] != "repos": + return None + return f"{path_parts[1]}/{path_parts[2]}" + + +def _without_empty_values(items): + return {key: value for key, value in items.items() if value is not None} + + # https://develop.sentry.dev/sdk/event-payloads/span/ def _generate_spans(steps, parent_span_id, trace_id): spans = [] diff --git a/tests/test_github_sdk.py b/tests/test_github_sdk.py index 7f7e401..c4a98d2 100644 --- a/tests/test_github_sdk.py +++ b/tests/test_github_sdk.py @@ -11,6 +11,7 @@ from requests import HTTPError from sentry_sdk.utils import format_timestamp +from src.github_sdk import GITHUB_API_TIMEOUT from src.github_sdk import GithubClient DSN = "https://foo@random.ingest.sentry.io/bar" @@ -59,6 +60,21 @@ def test_ensure_raise_error_on_github_api_failure(): ) +@patch("src.github_sdk.requests.get") +def test_fetch_github_sets_timeout(mock_get): + url = "https://api.github.com/repos/getsentry/sentry/actions/runs/2104746951" + client = GithubClient(dsn=DSN, token=TOKEN) + + client._fetch_github(url) + + mock_get.assert_called_once_with( + url, + headers={"Authorization": f"token {TOKEN}"}, + timeout=GITHUB_API_TIMEOUT, + ) + mock_get.return_value.raise_for_status.assert_called_once() + + @freeze_time() @responses.activate @patch("src.github_sdk.get_uuid") @@ -110,6 +126,51 @@ def test_trace_generation_with_failing_steps( assert trace["tags"]["event"] == "push" +@responses.activate +def test_trace_generation_with_run_metadata_timeout(jobA_job): + responses.get( + jobA_job["run_url"], + body=requests.exceptions.ConnectTimeout(), + ) + + client = GithubClient(dsn=DSN, token=TOKEN) + trace = client._generate_trace(jobA_job) + + assert trace["user"] == {} + assert trace["contexts"]["trace"]["data"] == {"job": jobA_job["html_url"]} + assert trace["tags"] == { + "job_status": "success", + "commit": jobA_job["head_sha"], + "repo": "getsentry/sentry", + "run_attempt": 1, + } + + +@responses.activate +def test_trace_generation_with_workflow_metadata_timeout( + jobA_job, + jobA_runs, +): + responses.get( + jobA_job["run_url"], + json=jobA_runs, + ) + responses.get( + jobA_runs["workflow_url"], + body=requests.exceptions.ConnectTimeout(), + ) + + client = GithubClient(dsn=DSN, token=TOKEN) + trace = client._generate_trace(jobA_job) + + assert trace["user"] == jobA_runs["head_commit"]["author"] + assert trace["tags"]["branch"] == jobA_runs["head_branch"] + assert trace["tags"]["commit"] == jobA_runs["head_sha"] + assert trace["tags"]["event"] == jobA_runs["event"] + assert trace["tags"]["repo"] == jobA_runs["repository"]["full_name"] + assert trace["tags"]["workflow"] == jobA_runs["name"] + + @freeze_time() @responses.activate @patch("src.github_sdk.get_uuid")