From be20c48e9d66cd8f27f36fb265a3955fe6594ae7 Mon Sep 17 00:00:00 2001 From: Shivang Trivedi <69757440+Shivang0@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:31:22 +0200 Subject: [PATCH] Add social-vision plugin (Marketing Growth) --- .claude-plugin/marketplace.json | 34 +- README.md | 3 +- .../social-vision/.claude-plugin/plugin.json | 21 + plugins/social-vision/.gitignore | 3 + plugins/social-vision/LICENSE | 21 + plugins/social-vision/README.md | 147 +++++++ plugins/social-vision/commands/analyze.md | 15 + plugins/social-vision/scripts/analyze.py | 378 ++++++++++++++++++ plugins/social-vision/scripts/bootstrap.py | 240 +++++++++++ plugins/social-vision/scripts/transcribe.py | 111 +++++ .../skills/analyze-social/SKILL.md | 60 +++ 11 files changed, 1025 insertions(+), 8 deletions(-) create mode 100644 plugins/social-vision/.claude-plugin/plugin.json create mode 100644 plugins/social-vision/.gitignore create mode 100644 plugins/social-vision/LICENSE create mode 100644 plugins/social-vision/README.md create mode 100644 plugins/social-vision/commands/analyze.md create mode 100644 plugins/social-vision/scripts/analyze.py create mode 100644 plugins/social-vision/scripts/bootstrap.py create mode 100644 plugins/social-vision/scripts/transcribe.py create mode 100644 plugins/social-vision/skills/analyze-social/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 6f29b6b..1917cf9 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -5,7 +5,7 @@ "email": "support@claudecodeplugins.dev" }, "metadata": { - "description": "Awesome Claude Code plugins — a curated list of slash commands, subagents, MCP servers, and hooks for Claude Code", + "description": "Awesome Claude Code plugins \u2014 a curated list of slash commands, subagents, MCP servers, and hooks for Claude Code", "version": "0.0.1", "homepage": "https://claudecodeplugins.dev" }, @@ -73,7 +73,7 @@ { "name": "ultrathink", "source": "./plugins/ultrathink", - "description": "Use /ultrathink to launch a Coordinator Agent that directs four specialist sub-agents—Architect, Research, Coder, and Tester—to analyze, design, implement, and validate your coding task. The process breaks the task into clear steps, gathers insights, and synthesizes a cohesive solution with actionable outputs. Relevant files can be referenced ad-hoc using @ filename syntax.", + "description": "Use /ultrathink to launch a Coordinator Agent that directs four specialist sub-agents\u2014Architect, Research, Coder, and Tester\u2014to analyze, design, implement, and validate your coding task. The process breaks the task into clear steps, gathers insights, and synthesizes a cohesive solution with actionable outputs. Relevant files can be referenced ad-hoc using @ filename syntax.", "version": "1.0.0", "author": { "name": "Jeronim Morina" @@ -847,7 +847,7 @@ "description": "Use this agent when setting up CI/CD pipelines, configuring Docker containers, deploying applications to cloud platforms, setting up Kubernetes clusters, implementing infrastructure as code, or automating deployment workflows. Examples: Context: User is setting up a new project and needs deployment automation. user: \"I've built a FastAPI application and need to deploy it to production with proper CI/CD\" assistant: \"I'll use the deployment-engineer agent to set up a complete deployment pipeline with Docker, GitHub Actions, and production-ready configurations.\" Context: User mentions containerization or deployment issues. user: \"Our deployment process is manual and error-prone. We need to automate it.\" assistant: \"Let me use the deployment-engineer agent to design an automated CI/CD pipeline that eliminates manual steps and ensures reliable deployments.\"", "version": "1.0.0", "author": { - "name": "Jure Šunić" + "name": "Jure \u0160uni\u0107" }, "category": "Automation DevOps", "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/deployment-engineer", @@ -1141,7 +1141,7 @@ "description": "Use this agent when you need to design, build, or validate n8n automation workflows. This agent specializes in creating efficient n8n workflows using proper validation techniques and MCP tools integration.\\n\\nExamples:\\n- \\n Context: User wants to create a Slack notification workflow when a new GitHub issue is created.\\n user: \"I need to create an n8n workflow that sends a Slack message whenever a new GitHub issue is opened\"\\n assistant: \"I'll use the n8n-workflow-builder agent to design and build this GitHub-to-Slack automation workflow with proper validation.\"\\n \\n The user needs n8n workflow creation, so use the n8n-workflow-builder agent to handle the complete workflow design, validation, and deployment process.\\n \\n\\n- \\n Context: User has an existing n8n workflow that needs debugging and optimization.\\n user: \"My n8n workflow keeps failing at the HTTP Request node, can you help me fix it?\"\\n assistant: \"I'll use the n8n-workflow-builder agent to analyze and debug your workflow, focusing on the HTTP Request node configuration.\"\\n \\n Since this involves n8n workflow troubleshooting and validation, use the n8n-workflow-builder agent to diagnose and fix the issue.\\n \\n\\n- \\n Context: User wants to understand n8n best practices and available nodes for a specific use case.\\n user: \"What are the best n8n nodes for processing CSV data and sending email reports?\"\\n assistant: \"I'll use the n8n-workflow-builder agent to explore the available nodes and recommend the best approach for CSV processing and email automation.\"\\n \\n This requires n8n expertise and node discovery, so use the n8n-workflow-builder agent to provide comprehensive guidance.\\n \\n", "version": "1.0.0", "author": { - "name": "Jure Šunić" + "name": "Jure \u0160uni\u0107" }, "category": "Automation DevOps", "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/n8n-workflow-builder", @@ -1197,7 +1197,7 @@ "description": "Use this agent when you need to create comprehensive Product Requirements Documents (PRDs) that combine business strategy, technical architecture, and user research. Examples: Context: The user needs to create a PRD for a new feature or product launch. user: \"I need to create a PRD for our new user authentication system that will support SSO and multi-factor authentication\" assistant: \"I'll use the prd-specialist agent to create a comprehensive PRD that covers the strategic foundation, technical requirements, and implementation blueprint for your authentication system.\" Context: The user is planning a major product initiative and needs strategic documentation. user: \"We're launching a mobile app for our e-commerce platform and need a detailed PRD to guide development\" assistant: \"Let me engage the prd-specialist agent to develop a thorough PRD that includes market analysis, user research integration, technical architecture, and implementation roadmap for your mobile app initiative.\"", "version": "1.0.0", "author": { - "name": "Jure Šunić" + "name": "Jure \u0160uni\u0107" }, "category": "Project & Product Management", "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/prd-specialist", @@ -1281,7 +1281,7 @@ "description": "Use this agent when working with Python code that requires advanced features, performance optimization, or comprehensive refactoring. Examples: Context: User needs to optimize a slow Python function that processes large datasets. user: \"This function is taking too long to process our data, can you help optimize it?\" assistant: \"I'll use the python-expert agent to analyze and optimize your Python code with advanced techniques and performance profiling.\" Context: User wants to implement async/await patterns in their existing synchronous Python code. user: \"I need to convert this synchronous code to use async/await for better performance\" assistant: \"Let me use the python-expert agent to refactor your code with proper async/await patterns and concurrent programming techniques.\" Context: User needs help implementing complex Python design patterns. user: \"I want to implement a factory pattern with decorators for my API endpoints\" assistant: \"I'll use the python-expert agent to implement advanced Python patterns with decorators and proper design principles.\"", "version": "1.0.0", "author": { - "name": "Jure Šunić" + "name": "Jure \u0160uni\u0107" }, "category": "Development Engineering", "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/python-expert", @@ -1670,6 +1670,26 @@ "security", "compliance" ] + }, + { + "name": "social-vision", + "source": "./plugins/social-vision", + "description": "Paste an Instagram, TikTok, YouTube (Shorts), or X/Twitter link and Claude watches it for you \u2014 transcribes the audio, reads on-screen text and visuals, and explains what it's about. Local and cross-platform.", + "version": "1.0.0", + "author": { + "name": "Shivang Trivedi", + "url": "https://github.com/Shivang0" + }, + "category": "Marketing Growth", + "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/social-vision", + "keywords": [ + "video", + "instagram", + "tiktok", + "youtube", + "twitter", + "transcription" + ] } ] -} \ No newline at end of file +} diff --git a/README.md b/README.md index e4de615..7f74f6e 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,7 @@ Install or disable them dynamically with the `/plugin` command — enabling you - [growth-hacker](./plugins/growth-hacker) - [instagram-curator](./plugins/instagram-curator) - [reddit-community-builder](./plugins/reddit-community-builder) +- [social-vision](./plugins/social-vision) - [tiktok-strategist](./plugins/tiktok-strategist) - [twitter-engager](./plugins/twitter-engager) @@ -212,4 +213,4 @@ Example: ## Contributing Contributions are welcome! - You can add your favorite plugins, share best practices, or submit your own marketplace. \ No newline at end of file + You can add your favorite plugins, share best practices, or submit your own marketplace. diff --git a/plugins/social-vision/.claude-plugin/plugin.json b/plugins/social-vision/.claude-plugin/plugin.json new file mode 100644 index 0000000..ab06d66 --- /dev/null +++ b/plugins/social-vision/.claude-plugin/plugin.json @@ -0,0 +1,21 @@ +{ + "name": "social-vision", + "version": "1.0.0", + "description": "Paste an Instagram, TikTok, YouTube (Shorts), or X/Twitter link and Claude watches it for you — transcribes the audio, reads the on-screen text and visuals, and explains what it's actually about. Runs locally and cross-platform.", + "author": { + "name": "Shivang Trivedi" + }, + "homepage": "https://github.com/Shivang0/social-vision", + "repository": "https://github.com/Shivang0/social-vision", + "license": "MIT", + "keywords": [ + "video", + "instagram", + "tiktok", + "youtube", + "twitter", + "transcription", + "whisper", + "vision" + ] +} diff --git a/plugins/social-vision/.gitignore b/plugins/social-vision/.gitignore new file mode 100644 index 0000000..b908d4c --- /dev/null +++ b/plugins/social-vision/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.pyc +.DS_Store diff --git a/plugins/social-vision/LICENSE b/plugins/social-vision/LICENSE new file mode 100644 index 0000000..4233b4e --- /dev/null +++ b/plugins/social-vision/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Shivang Trivedi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plugins/social-vision/README.md b/plugins/social-vision/README.md new file mode 100644 index 0000000..296c701 --- /dev/null +++ b/plugins/social-vision/README.md @@ -0,0 +1,147 @@ +# 🎬 social-vision + +**Paste a social media link. Claude watches the video for you.** + +social-vision is a Claude Code plugin that lets you drop in an Instagram, TikTok, +YouTube, or X (Twitter) link — and Claude will *actually understand it*. It +listens to the audio, reads the text and graphics on screen, reads the caption, +and then tells you what the video is really about. + +No copy-pasting transcripts. No "I can't watch videos." Just paste and ask. + +--- + +## What it can do + +- 🎧 **Transcribe the audio** — word for word, with timestamps. +- 👀 **See what's on screen** — on-screen captions, charts, graphics, products, the setting — Claude looks at the actual frames. +- 📝 **Read the caption** — including hashtags, mentions, and links. +- 🧠 **Explain it** — a full breakdown: what's said, what's shown, and what it's trying to get you to do. +- 🖼️ **Handle photo posts too** — Instagram carousels and X image posts are read slide by slide. + +### Supported links + +| Platform | Reels / Videos / Shorts | Photo posts & carousels | +|----------|:----------------------:|:-----------------------:| +| Instagram | ✅ | ✅ | +| TikTok | ✅ | — | +| YouTube + Shorts | ✅ | — | +| X / Twitter | ✅ | ✅ | +| A video file on your computer | ✅ | — | + +--- + +## Install + +In Claude Code: + +``` +/plugin marketplace add Shivang0/social-vision +/plugin install social-vision@social-vision +/reload-plugins +``` + +That's it. No keys, no accounts, no sign-ups. + +> Prefer to run it from a local checkout? Clone the repo and +> `/plugin marketplace add /path/to/social-vision` instead. + +--- + +## Use it + +Just paste a link into Claude Code: + +> analyze https://www.instagram.com/reel/XXXXXXXX/ + +or use the command: + +> /analyze https://www.tiktok.com/@someone/video/123456789 + +Claude takes care of the rest and gives you the full breakdown. You can then ask +follow-ups like *"just give me the hook,"* *"list every stat it mentioned,"* or +*"summarize it in 3 bullets."* + +### Example + +**You:** + +> /analyze https://www.instagram.com/reel/XXXXXXXX/ + +**social-vision** (after ~30–60s): + +> **Reel by @creator — "How the new LinkedIn algorithm works"** +> Instagram · 99s · 1,975 likes +> +> **TL;DR** — A talking-head reel arguing likes no longer matter; saves, comments, and dwell time now drive reach. +> +> **Full transcript (verbatim, with timestamps)** +> `[00:00]` I grew my LinkedIn followers to over 210,000 by using the new algorithm to my advantage… +> `[00:16]` LinkedIn replaced its engagement-based system with a 150-billion-parameter model… +> *…complete, nothing dropped…* +> +> **On screen** — creator at a podcast mic; text overlays "saves 5×", "comments 15×"; a ranked chart: Text 2–4% ‹ Video 5.6% ‹ Carousel 6.6%. +> +> **Caption** — "LinkedIn's new algorithm doesn't care how many likes you get… comment PLAYBOOK 👇 #linkedintips" +> +> **Takeaway** — Educational hook + lead magnet: optimize for saves / long comments / carousels; "comment PLAYBOOK" funnels to a paid program. + +Then keep asking: *"list every stat,"* *"just the hook,"* *"summarize in 3 bullets,"* *"what's the call to action?"* + +> Works the same for a **TikTok**, **YouTube Short**, **X post**, or a local `.mp4`. Image carousels come back slide-by-slide. + +### First run takes a few minutes ⏳ + +The very first time you use it, social-vision quietly sets itself up — it +installs the small tools it needs to download and transcribe videos. You may be +asked to approve an install or two. **This happens once.** Every run after that +is fast. + +It automatically picks the best transcription engine for your computer (Apple +Silicon Macs, NVIDIA GPUs, and regular laptops are all handled), and everything +runs **on your own machine** — your videos are never uploaded anywhere. + +--- + +## 🔑 Private or login-only content + +Some posts can only be viewed when you're logged in — most **Instagram photo +posts and carousels**, private accounts, and many **X/Twitter posts**. + +To analyze those, **stay logged into that platform in your normal web browser** +(Chrome, Firefox, Safari, Edge, or Brave). When Claude hits a login wall, it will +ask which browser you use, then borrow your existing login to fetch the post. + +- It only does this **when you say yes** — it never touches your browser otherwise. +- On a Mac, your system may pop up a keychain prompt to allow it; that's normal. +- TikTok and YouTube are usually public, so they work without any of this. + +--- + +## 🔒 Privacy + +- Everything runs **locally on your computer**. Videos and audio are not sent to any third party. +- Your browser login is only used when you explicitly approve it for a private post, and it stays on your machine. +- Downloaded videos and results are saved under `~/.social-vision/` so you can find or delete them anytime. + +--- + +## Troubleshooting + +| Problem | Fix | +|---------|-----| +| "ffmpeg could not be installed" | Run the one-line command Claude shows you (e.g. `brew install ffmpeg` on Mac, `winget install Gyan.FFmpeg` on Windows, `sudo apt install ffmpeg` on Linux). | +| A link suddenly stops downloading | Platforms change often. Ask Claude to update the downloader — it can refresh the tool automatically. | +| "Needs login" on a public-looking post | Tell Claude which browser you're logged into; it'll retry using your session. | +| Transcript looks repetitive/empty | The video probably has little or no speech (music only). The on-screen visuals still get analyzed. | + +--- + +## Notes + +- This tool downloads content for your own personal analysis. Respect each + platform's terms of service and the rights of content creators. + +## License + +MIT — see [LICENSE](LICENSE). diff --git a/plugins/social-vision/commands/analyze.md b/plugins/social-vision/commands/analyze.md new file mode 100644 index 0000000..6f8a46c --- /dev/null +++ b/plugins/social-vision/commands/analyze.md @@ -0,0 +1,15 @@ +--- +description: Watch & analyze a social video/post — Instagram, TikTok, YouTube (Shorts), or X/Twitter URL, or a local video file +argument-hint: +--- + +Analyze the social video/post at: $ARGUMENTS + +Follow the `analyze-social` skill workflow: + +1. Run `python3 "${CLAUDE_PLUGIN_ROOT}/scripts/analyze.py" "$ARGUMENTS"` (warn the user the first run auto-installs tools and may take a few minutes). +2. Handle any `ERROR_CODE` (e.g. `NEEDS_LOGIN` → ask which browser they're logged into, re-run with `--cookies-from-browser `). +3. Read `manifest.json`, `meta.txt`, `transcript.txt`, and view the frames from the bundle. +4. Produce the thorough breakdown defined in the skill's output template (header, TL;DR, full verbatim transcript with timestamps, on-screen text & visuals, verbatim caption, beat-by-beat, takeaway & intent, notable details), then stay open for follow-ups. + +If `$ARGUMENTS` is empty, ask the user for a link or file path. diff --git a/plugins/social-vision/scripts/analyze.py b/plugins/social-vision/scripts/analyze.py new file mode 100644 index 0000000..12e9492 --- /dev/null +++ b/plugins/social-vision/scripts/analyze.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +""" +analyze.py — social-vision pipeline orchestrator (pure standard library). + +Takes one Instagram / TikTok / YouTube(-Shorts) / X(Twitter) URL or a local video +file and produces a "bundle" directory that Claude then reads: + + / + frames/frame_###.jpg sampled video frames or carousel slides + transcript.txt / .srt spoken audio (if any) + meta.txt human-readable metadata + caption + manifest.json machine-readable map of the bundle (+ error codes) + SUMMARY.md index + +It shells out to the tools installed by bootstrap.py (yt-dlp, gallery-dl, ffmpeg, +and the venv's transcribe.py). It never hard-crashes: if it can salvage any +frames or a caption it writes a partial bundle and exits 0. +""" + +import argparse +import json +import re +import shutil +import subprocess +import sys +from datetime import datetime +from pathlib import Path + +import bootstrap # same directory + +SCRIPT_DIR = Path(__file__).resolve().parent +VIDEO_EXTS = (".mp4", ".mkv", ".webm", ".mov", ".m4v") +IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp") + +LOGIN_PATTERNS = [ + "login required", "log in", "logged in", "sign in to confirm", "use --cookies", + "requested content is not available", "account is private", "this account is private", + "only available for registered", "rate-limit reached", "cookies are no longer valid", +] +RATE_PATTERNS = ["429", "too many requests", "please wait a few minutes", "rate limit", "rate-limit"] +STALE_PATTERNS = ["unable to extract", "unsupported url", "unable to download webpage", + "failed to parse json"] + + +def run(cmd): + return subprocess.run([str(c) for c in cmd], capture_output=True, text=True) + + +def platform_of(url): + u = url.lower() + if "instagram.com" in u: + return "Instagram" + if "tiktok.com" in u: + return "TikTok" + if "youtube.com" in u or "youtu.be" in u: + return "YouTube" + if "x.com" in u or "twitter.com" in u: + return "X" + return "Web" + + +def classify_error(stderr): + s = (stderr or "").lower() + if any(p in s for p in RATE_PATTERNS): + return "RATE_LIMITED" + if any(p in s for p in LOGIN_PATTERNS): + return "NEEDS_LOGIN" + if any(p in s for p in STALE_PATTERNS): + return "EXTRACTOR_STALE" + return None + + +# --------------------------------------------------------------------------- # +# metadata + routing +# --------------------------------------------------------------------------- # +def ytdlp_json(deps, url, cookies): + cmd = [deps["yt_dlp"], "-J", "--no-warnings", "--no-playlist", *cookies, url] + proc = run(cmd) + if proc.returncode == 0 and proc.stdout.strip(): + try: + return json.loads(proc.stdout), None + except json.JSONDecodeError: + return None, "EXTRACTOR_STALE" + return None, classify_error(proc.stderr) or "EXTRACTOR_STALE" + + +def has_video(meta): + if not meta: + return False + if meta.get("duration"): + return True + for f in meta.get("formats", []) or []: + if f.get("vcodec") and f.get("vcodec") != "none": + return True + return meta.get("_type") == "video" + + +def meta_fields(meta, url): + return { + "title": (meta or {}).get("title"), + "uploader": (meta or {}).get("uploader") or (meta or {}).get("channel"), + "upload_date": (meta or {}).get("upload_date"), + "duration": (meta or {}).get("duration"), + "like_count": (meta or {}).get("like_count"), + "view_count": (meta or {}).get("view_count"), + "caption": (meta or {}).get("description") or "", + "source_url": url, + } + + +# --------------------------------------------------------------------------- # +# acquisition +# --------------------------------------------------------------------------- # +def download_video(deps, url, cookies, bundle): + cmd = [deps["yt_dlp"], "-f", "bv*+ba/best", "--merge-output-format", "mp4", + "--no-playlist", "--no-warnings", *cookies, + "-o", str(bundle / "video.%(ext)s"), url] + proc = run(cmd) + for ext in VIDEO_EXTS: + hit = list(bundle.glob("video" + ext)) + if hit: + return hit[0], None + return None, classify_error(proc.stderr) or "DOWNLOAD_FAILED" + + +def download_images(deps, url, cookies, bundle): + media = bundle / "media" + media.mkdir(exist_ok=True) + proc = run([deps["gallery_dl"], "--dest", str(media), "-o", "directory=[]", + *cookies, url]) + imgs = sorted(p for p in media.iterdir() if p.suffix.lower() in IMAGE_EXTS) + if imgs: + return imgs, None + return [], classify_error(proc.stderr) or "DOWNLOAD_FAILED" + + +def gallery_caption(deps, url, cookies): + proc = run([deps["gallery_dl"], "-j", *cookies, url]) + if proc.returncode != 0 or not proc.stdout.strip(): + return {} + try: + data = json.loads(proc.stdout) + except json.JSONDecodeError: + return {} + for item in data: + if isinstance(item, list) and len(item) >= 2 and isinstance(item[-1], dict): + d = item[-1] + return { + "caption": d.get("description") or d.get("content") or d.get("tweet_text") or "", + "uploader": d.get("username") or d.get("uploader") or d.get("author", {}).get("name"), + "title": d.get("title"), + } + return {} + + +# --------------------------------------------------------------------------- # +# frames + transcription +# --------------------------------------------------------------------------- # +def probe_duration(deps, video): + proc = run([deps["ffprobe"], "-v", "error", "-show_entries", "format=duration", + "-of", "csv=p=0", str(video)]) + try: + return float(proc.stdout.strip()) + except ValueError: + return 0.0 + + +def has_audio(deps, video): + proc = run([deps["ffprobe"], "-v", "error", "-select_streams", "a", + "-show_entries", "stream=index", "-of", "csv=p=0", str(video)]) + return bool(proc.stdout.strip()) + + +def extract_frames(deps, video, frames_dir, target_frames, width): + dur = probe_duration(deps, video) + fps = 1.0 if dur <= 0 else max(0.2, min(2.0, target_frames / dur)) + run([deps["ffmpeg"], "-hide_banner", "-loglevel", "error", "-y", "-i", str(video), + "-vf", f"fps={fps:.4f},scale={width}:-2", "-q:v", "3", + str(frames_dir / "frame_%03d.jpg")]) + return dur, sorted(frames_dir.glob("frame_*.jpg")) + + +def slides_to_frames(deps, images, frames_dir, width): + out = [] + for i, img in enumerate(images, 1): + dst = frames_dir / f"frame_{i:03d}.jpg" + run([deps["ffmpeg"], "-hide_banner", "-loglevel", "error", "-y", "-i", str(img), + "-vf", f"scale={width}:-2", "-q:v", "3", str(dst)]) + if dst.exists(): + out.append(dst) + return out + + +def transcribe(deps, video, bundle, lang): + proc = run([deps["venv_python"], str(SCRIPT_DIR / "transcribe.py"), + str(video), str(bundle), lang or "auto"]) + if proc.returncode == 0 and (bundle / "transcript.txt").exists(): + return True + print(proc.stderr, file=sys.stderr) + return False + + +# --------------------------------------------------------------------------- # +# bundle writers +# --------------------------------------------------------------------------- # +def write_bundle(bundle, manifest): + (bundle / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8") + + m = manifest + lines = [] + for k in ("title", "uploader", "upload_date", "duration", + "like_count", "view_count", "source_url"): + if m.get(k) not in (None, ""): + lines.append(f"{k}: {m[k]}") + if m.get("caption"): + lines.append("\n--- caption / description ---\n" + m["caption"]) + (bundle / "meta.txt").write_text("\n".join(lines) + "\n", encoding="utf-8") + + summary = [ + "# social-vision bundle", "", + f"- platform: {m.get('platform')}", + f"- type: {m.get('type')}", + f"- source: {m.get('source_url')}", + f"- frames: {m.get('n_frames')} -> {m.get('frames_dir')}", + f"- transcript: {m.get('transcript_path') or '(none)'}", + f"- meta: {bundle / 'meta.txt'}", + ] + if m.get("error_code"): + summary.append(f"- error_code: {m['error_code']}") + (bundle / "SUMMARY.md").write_text("\n".join(summary) + "\n", encoding="utf-8") + + +def emit(manifest, bundle=None): + print("\n=== social-vision result ===") + print(f"ERROR_CODE: {manifest.get('error_code') or 'none'}") + if bundle: + print(f"BUNDLE: {bundle}") + print(f"MANIFEST: {bundle / 'manifest.json'}") + print(f"TYPE: {manifest.get('type')}") + print(f"FRAMES: {manifest.get('n_frames', 0)}") + print(f"TRANSCRIPT: {'yes' if manifest.get('transcript_path') else 'no'}") + if manifest.get("message"): + print("MESSAGE: " + manifest["message"]) + + +# --------------------------------------------------------------------------- # +# main +# --------------------------------------------------------------------------- # +def main(): + ap = argparse.ArgumentParser(description="Analyze a social video/post for Claude.") + ap.add_argument("input", help="URL (Instagram/TikTok/YouTube/X) or local video file") + ap.add_argument("--cookies-from-browser", dest="cookies", default=None, + help="chrome|firefox|safari|edge|brave — for login-walled posts") + ap.add_argument("--target-frames", type=int, default=40) + ap.add_argument("--frame-width", type=int, default=512) + ap.add_argument("--image-width", type=int, default=768) + ap.add_argument("--model", default=None, help="override transcription model") + ap.add_argument("--lang", default=None, help="force language code (else auto)") + ap.add_argument("--no-transcribe", action="store_true") + ap.add_argument("--outdir", default=None) + args = ap.parse_args() + + deps = bootstrap.ensure_ready() + if not deps["ok"]: + print("\n=== social-vision result ===") + print("ERROR_CODE: SETUP_FAILED") + print("MESSAGE: " + (deps.get("error") or "dependency setup failed")) + sys.exit(1) + if args.model: + deps["model"] = args.model # informational; transcribe.py reads engine.json + + cookies = ["--cookies-from-browser", args.cookies] if args.cookies else [] + + base = Path(args.outdir) if args.outdir else Path(deps["home"]) / "out" + bundle = base / datetime.now().strftime("%Y%m%d-%H%M%S") + frames_dir = bundle / "frames" + frames_dir.mkdir(parents=True, exist_ok=True) + + is_local = Path(args.input).is_file() + platform = "Local" if is_local else platform_of(args.input) + errors = [] + + # ---- local file: straight to frames + transcribe ---- + if is_local: + video = bundle / ("video" + Path(args.input).suffix.lower()) + shutil.copy(args.input, video) + meta = {"source_url": args.input, "caption": "", "title": Path(args.input).name} + return finish_video(args, deps, bundle, frames_dir, video, meta, platform, errors) + + # ---- remote: metadata + routing ---- + meta_json, err = ytdlp_json(deps, args.input, cookies) + + if err == "EXTRACTOR_STALE": # try a one-shot yt-dlp self-update, then retry + run([deps["venv_python"], "-m", "pip", "install", "-q", "-U", "yt-dlp"]) + meta_json, err = ytdlp_json(deps, args.input, cookies) + + if err in ("NEEDS_LOGIN", "RATE_LIMITED") and not cookies: + manifest = {**meta_fields(meta_json, args.input), "platform": platform, + "type": "text", "n_frames": 0, "transcript_path": None, + "error_code": err, + "message": ("This content needs you to be logged in. Re-run with " + "--cookies-from-browser ." + if err == "NEEDS_LOGIN" else + "Rate-limited by the platform. Wait a few minutes, or pass " + "--cookies-from-browser to use your logged-in session.")} + emit(manifest) + sys.exit(0) + + fields = meta_fields(meta_json, args.input) + + if has_video(meta_json): + video, derr = download_video(deps, args.input, cookies, bundle) + if not video: + errors.append(derr) + ec = derr if derr in ("NEEDS_LOGIN", "RATE_LIMITED") else "DOWNLOAD_FAILED" + manifest = {**fields, "platform": platform, "type": "text", "n_frames": 0, + "transcript_path": None, "error_code": ec, "errors": errors, + "message": "Could not download the video."} + write_bundle(bundle, manifest) + emit(manifest, bundle) + sys.exit(0) + return finish_video(args, deps, bundle, frames_dir, video, fields, platform, errors) + + # ---- image post (carousel / photo tweet) ---- + images, derr = download_images(deps, args.input, cookies, bundle) + if not images: + ec = derr if derr in ("NEEDS_LOGIN", "RATE_LIMITED") else "NO_MEDIA" + msg = ("This post needs login — re-run with --cookies-from-browser ." + if ec == "NEEDS_LOGIN" else + "No downloadable images or video were found at this URL.") + if not fields["caption"]: + fields.update({k: v for k, v in gallery_caption(deps, args.input, cookies).items() if v}) + manifest = {**fields, "platform": platform, + "type": "text" if fields.get("caption") else "text", + "n_frames": 0, "transcript_path": None, "error_code": ec, + "errors": errors, "message": msg} + write_bundle(bundle, manifest) + emit(manifest, bundle) + sys.exit(0) + + if not fields["caption"]: + fields.update({k: v for k, v in gallery_caption(deps, args.input, cookies).items() if v}) + + frames = slides_to_frames(deps, images, frames_dir, args.image_width) + manifest = {**fields, "platform": platform, "type": "image", + "n_frames": len(frames), "frames_dir": str(frames_dir), + "frame_files": [f.name for f in frames], + "has_audio": False, "transcript_path": None, "errors": errors, + "error_code": None} + write_bundle(bundle, manifest) + emit(manifest, bundle) + + +def finish_video(args, deps, bundle, frames_dir, video, fields, platform, errors): + dur, frames = extract_frames(deps, video, frames_dir, + args.target_frames, args.frame_width) + audio = has_audio(deps, video) + transcript_path = None + if audio and not args.no_transcribe: + if transcribe(deps, video, bundle, args.lang): + transcript_path = str(bundle / "transcript.txt") + else: + errors.append("ASR_FAILED") + fields["duration"] = fields.get("duration") or round(dur, 2) + manifest = {**fields, "platform": platform, "type": "video", + "n_frames": len(frames), "frames_dir": str(frames_dir), + "frame_files": [f.name for f in frames], + "has_audio": audio, "transcript_path": transcript_path, + "transcript_srt": str(bundle / "transcript.srt") if transcript_path else None, + "engine": deps["engine"], "model": deps["model"], + "errors": errors, + "error_code": "ASR_FAILED" if ("ASR_FAILED" in errors and not transcript_path) else None} + write_bundle(bundle, manifest) + emit(manifest, bundle) + + +if __name__ == "__main__": + main() diff --git a/plugins/social-vision/scripts/bootstrap.py b/plugins/social-vision/scripts/bootstrap.py new file mode 100644 index 0000000..08ffdd9 --- /dev/null +++ b/plugins/social-vision/scripts/bootstrap.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +""" +bootstrap.py — first-run dependency setup for social-vision. + +Pure standard library. Detects the platform, creates a plugin-local virtualenv, +installs the Python tools (yt-dlp, gallery-dl, and the best Whisper engine for +this machine), and makes sure ffmpeg is available. Idempotent: after the first +successful run it just reads the cached engine.json. + +Engine choice: + - macOS Apple Silicon -> mlx-whisper (Metal / Neural Engine) + - NVIDIA GPU (Win/Linux) -> faster-whisper (CUDA, float16) + - everything else (CPU) -> faster-whisper (CPU, int8) +""" + +import json +import os +import platform +import shutil +import subprocess +import sys +from pathlib import Path + +VERSION = "1.0.0" # bump to force a re-bootstrap on upgrade + +HOME = Path.home() / ".social-vision" +VENV = HOME / "venv" +ENGINE_JSON = HOME / "engine.json" +MARKER = HOME / ".deps-ok" + +IS_WINDOWS = os.name == "nt" + + +# --------------------------------------------------------------------------- # +# small helpers +# --------------------------------------------------------------------------- # +def log(msg): + print(f"[bootstrap] {msg}", file=sys.stderr, flush=True) + + +def venv_bin(name): + """Path to an executable inside the venv (handles Windows layout).""" + if IS_WINDOWS: + exe = VENV / "Scripts" / (name + ".exe") + return exe if exe.exists() else VENV / "Scripts" / name + return VENV / "bin" / name + + +def venv_python(): + return str(venv_bin("python")) + + +def run(cmd, **kw): + """Run a command, returning CompletedProcess (never raises on non-zero).""" + return subprocess.run(cmd, capture_output=True, text=True, **kw) + + +def total_ram_gb(): + """Best-effort total RAM in GB (stdlib only). Defaults to 8 if unknown.""" + try: + if sys.platform == "darwin": + out = run(["sysctl", "-n", "hw.memsize"]).stdout.strip() + return int(out) / (1024 ** 3) + if sys.platform.startswith("linux"): + pages = os.sysconf("SC_PHYS_PAGES") + page_size = os.sysconf("SC_PAGE_SIZE") + return pages * page_size / (1024 ** 3) + if IS_WINDOWS: + import ctypes + + class MemStatus(ctypes.Structure): + _fields_ = [ + ("dwLength", ctypes.c_ulong), + ("dwMemoryLoad", ctypes.c_ulong), + ("ullTotalPhys", ctypes.c_ulonglong), + ("ullAvailPhys", ctypes.c_ulonglong), + ("ullTotalPageFile", ctypes.c_ulonglong), + ("ullAvailPageFile", ctypes.c_ulonglong), + ("ullTotalVirtual", ctypes.c_ulonglong), + ("ullAvailVirtual", ctypes.c_ulonglong), + ("ullAvailExtendedVirtual", ctypes.c_ulonglong), + ] + + stat = MemStatus() + stat.dwLength = ctypes.sizeof(MemStatus) + ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(stat)) + return stat.ullTotalPhys / (1024 ** 3) + except Exception: + pass + return 8.0 + + +def has_nvidia_gpu(): + if shutil.which("nvidia-smi") is None: + return False + return run(["nvidia-smi"]).returncode == 0 + + +# --------------------------------------------------------------------------- # +# engine selection +# --------------------------------------------------------------------------- # +def choose_engine(): + ram = total_ram_gb() + is_apple_silicon = sys.platform == "darwin" and platform.machine() == "arm64" + + if is_apple_silicon: + model = ( + "mlx-community/whisper-large-v3-turbo" + if ram >= 8 + else "mlx-community/whisper-small-mlx" + ) + return {"engine": "mlx", "model": model, "device": "metal", + "compute_type": "float16", "pip": ["mlx-whisper"]} + + if has_nvidia_gpu(): + return {"engine": "faster-whisper", "model": "large-v3", "device": "cuda", + "compute_type": "float16", "pip": ["faster-whisper"]} + + # CPU fallback — small is fast and decent; base on low-RAM boxes + model = "small" if ram >= 8 else "base" + return {"engine": "faster-whisper", "model": model, "device": "cpu", + "compute_type": "int8", "pip": ["faster-whisper"]} + + +# --------------------------------------------------------------------------- # +# ffmpeg +# --------------------------------------------------------------------------- # +def ensure_ffmpeg(): + """Return (ok, message). Auto-installs only via non-sudo managers.""" + if shutil.which("ffmpeg") and shutil.which("ffprobe"): + return True, "ffmpeg present" + + log("ffmpeg not found — attempting install") + if sys.platform == "darwin" and shutil.which("brew"): + run(["brew", "install", "ffmpeg"]) + elif IS_WINDOWS and shutil.which("winget"): + run(["winget", "install", "--silent", "--accept-package-agreements", + "--accept-source-agreements", "-e", "--id", "Gyan.FFmpeg"]) + elif IS_WINDOWS and shutil.which("scoop"): + run(["scoop", "install", "ffmpeg"]) + # Linux package managers need sudo — don't run silently. + + if shutil.which("ffmpeg") and shutil.which("ffprobe"): + return True, "ffmpeg installed" + + if sys.platform.startswith("linux"): + cmd = "sudo apt install -y ffmpeg # (or: sudo dnf install ffmpeg / sudo pacman -S ffmpeg)" + elif sys.platform == "darwin": + cmd = "brew install ffmpeg # (install Homebrew first: https://brew.sh)" + else: + cmd = "winget install Gyan.FFmpeg # (or: scoop install ffmpeg)" + return False, f"ffmpeg is required but could not be auto-installed. Please run:\n {cmd}" + + +# --------------------------------------------------------------------------- # +# venv + python tools +# --------------------------------------------------------------------------- # +def ensure_venv(): + if not venv_bin("python").exists(): + log("creating virtualenv") + run([sys.executable, "-m", "venv", str(VENV)]) + run([venv_python(), "-m", "pip", "install", "-q", "--upgrade", "pip"]) + + +def pip_install(packages): + log("installing: " + ", ".join(packages)) + proc = run([venv_python(), "-m", "pip", "install", "-q", "--upgrade", *packages]) + if proc.returncode != 0: + log("pip install failed:\n" + (proc.stderr or proc.stdout)) + return proc.returncode == 0 + + +# --------------------------------------------------------------------------- # +# orchestration +# --------------------------------------------------------------------------- # +def already_ok(): + if not MARKER.exists() or not ENGINE_JSON.exists(): + return False + try: + return MARKER.read_text().strip() == VERSION and venv_bin("python").exists() + except Exception: + return False + + +def ensure_ready(force=False): + """ + Make sure everything is installed. Returns a dict: + {ok, engine, model, device, compute_type, venv_python, + yt_dlp, gallery_dl, ffmpeg, ffprobe, error} + """ + HOME.mkdir(parents=True, exist_ok=True) + + if not force and already_ok(): + engine = json.loads(ENGINE_JSON.read_text()) + return _paths(engine, ok=True) + + engine = choose_engine() + log(f"platform={sys.platform}/{platform.machine()} -> engine={engine['engine']} " + f"model={engine['model']} device={engine['device']}") + + ensure_venv() + pip_ok = pip_install(["yt-dlp", "gallery-dl", *engine["pip"]]) + ff_ok, ff_msg = ensure_ffmpeg() + + ENGINE_JSON.write_text(json.dumps(engine, indent=2)) + + if pip_ok and ff_ok: + MARKER.write_text(VERSION) + return _paths(engine, ok=True) + + err = [] + if not pip_ok: + err.append("Failed to install Python tools into the virtualenv.") + if not ff_ok: + err.append(ff_msg) + return _paths(engine, ok=False, error="\n".join(err)) + + +def _paths(engine, ok, error=None): + return { + "ok": ok, + "error": error, + "engine": engine["engine"], + "model": engine["model"], + "device": engine["device"], + "compute_type": engine["compute_type"], + "venv_python": venv_python(), + "yt_dlp": str(venv_bin("yt-dlp")), + "gallery_dl": str(venv_bin("gallery-dl")), + "ffmpeg": shutil.which("ffmpeg") or "ffmpeg", + "ffprobe": shutil.which("ffprobe") or "ffprobe", + "home": str(HOME), + } + + +if __name__ == "__main__": + force = "--force" in sys.argv + result = ensure_ready(force=force) + print(json.dumps(result, indent=2)) + sys.exit(0 if result["ok"] else 1) diff --git a/plugins/social-vision/scripts/transcribe.py b/plugins/social-vision/scripts/transcribe.py new file mode 100644 index 0000000..a5c9cf5 --- /dev/null +++ b/plugins/social-vision/scripts/transcribe.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +""" +transcribe.py — runs INSIDE the social-vision venv (so it can import the +Whisper engine). Reads ~/.social-vision/engine.json, transcribes one audio/video +file, and writes transcript.txt + transcript.srt into the output directory. + +Usage: + transcribe.py [language] + +Both engines are configured to avoid the classic Whisper "silence hallucination" +(repeated phantom lines) via VAD / no-condition-on-previous-text settings. +""" + +import json +import sys +from pathlib import Path + +HOME = Path.home() / ".social-vision" +ENGINE_JSON = HOME / "engine.json" + + +def fmt_ts(seconds): + if seconds is None or seconds < 0: + seconds = 0 + ms = int(round(seconds * 1000)) + h, ms = divmod(ms, 3600_000) + m, ms = divmod(ms, 60_000) + s, ms = divmod(ms, 1000) + return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" + + +def write_outputs(out_dir, segments): + """segments: list of (start, end, text).""" + out_dir = Path(out_dir) + txt_lines, srt_blocks = [], [] + for i, (start, end, text) in enumerate(segments, 1): + text = (text or "").strip() + if not text: + continue + txt_lines.append(text) + srt_blocks.append(f"{i}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n") + (out_dir / "transcript.txt").write_text("\n".join(txt_lines) + "\n", encoding="utf-8") + (out_dir / "transcript.srt").write_text("\n".join(srt_blocks) + "\n", encoding="utf-8") + return len(txt_lines) + + +def transcribe_mlx(media, model, language): + import mlx_whisper + result = mlx_whisper.transcribe( + str(media), + path_or_hf_repo=model, + language=language, + condition_on_previous_text=False, # kills the repeat-line hallucination + no_speech_threshold=0.6, + compression_ratio_threshold=2.4, + hallucination_silence_threshold=2.0, + verbose=False, + ) + segs = [(s.get("start"), s.get("end"), s.get("text")) for s in result.get("segments", [])] + if not segs and result.get("text"): + segs = [(0, 0, result["text"])] + return segs + + +def transcribe_faster(media, model, device, compute_type, language): + from faster_whisper import WhisperModel + + def load(dev, ct): + return WhisperModel(model, device=dev, compute_type=ct) + + try: + wm = load(device, compute_type) + except Exception as e: + print(f"[transcribe] {device}/{compute_type} unavailable ({e}); " + f"falling back to CPU int8", file=sys.stderr) + wm = load("cpu", "int8") + + segments, _info = wm.transcribe( + str(media), + language=language, + vad_filter=True, # drop silence -> no hallucinated lines + condition_on_previous_text=False, + no_speech_threshold=0.6, + ) + return [(s.start, s.end, s.text) for s in segments] + + +def main(): + if len(sys.argv) < 3: + print("usage: transcribe.py [language]", file=sys.stderr) + sys.exit(2) + + media, out_dir = sys.argv[1], sys.argv[2] + language = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] not in ("", "auto") else None + + engine = json.loads(ENGINE_JSON.read_text()) + eng = engine["engine"] + model = engine["model"] + + if eng == "mlx": + segs = transcribe_mlx(media, model, language) + else: + segs = transcribe_faster(media, model, engine["device"], + engine["compute_type"], language) + + n = write_outputs(out_dir, segs) + print(f"[transcribe] wrote {n} lines using {eng}:{model}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/plugins/social-vision/skills/analyze-social/SKILL.md b/plugins/social-vision/skills/analyze-social/SKILL.md new file mode 100644 index 0000000..fc684d1 --- /dev/null +++ b/plugins/social-vision/skills/analyze-social/SKILL.md @@ -0,0 +1,60 @@ +--- +name: analyze-social +description: Use when the user shares a social video/post link or local video and wants it understood — an Instagram (instagram.com), TikTok (tiktok.com), YouTube or YouTube Shorts (youtube.com/youtu.be), or X/Twitter (x.com/twitter.com) URL, or a local .mp4/.mov file, OR when they say "analyze/watch/transcribe this video/reel/post." Downloads it, transcribes the audio, extracts frames Claude can see, and produces a thorough breakdown. +--- + +# Analyze Social (watch a video/post for the user) + +## Overview + +Turns a social link (or local video) into material you can actually read: sampled +frames you view with native vision, a full transcript, and the post's caption. +Then you write a thorough breakdown. A bundled cross-platform Python pipeline does +the fetching/transcribing; you do the understanding. + +## When to use + +- The user pastes an **Instagram / TikTok / YouTube / YouTube Shorts / X (Twitter)** URL. +- The user gives a **local video file** path. +- The user asks to **watch / analyze / transcribe / summarize** a video, reel, short, or post. + +## Workflow + +1. **Run the pipeline** (use the Bash tool): + ```bash + python3 "${CLAUDE_PLUGIN_ROOT}/scripts/analyze.py" "" + ``` + - The **first run auto-installs** its tools (yt-dlp, gallery-dl, a Whisper engine, and ffmpeg). Tell the user this one-time setup can take a few minutes and may ask them to approve installs. Subsequent runs are fast. + - Useful flags: `--no-transcribe` (frames + caption only, faster), `--target-frames N`, `--lang `. + +2. **Read the printed result block.** It ends with `ERROR_CODE`, `BUNDLE`, `TYPE`, `FRAMES`, `TRANSCRIPT`. Handle the error code: + - `NEEDS_LOGIN` → the content is private/login-walled. Ask the user: *"Which browser are you logged into that platform on? (chrome / firefox / safari / edge / brave)"* then re-run with `--cookies-from-browser `. Only do this when the user has opted in — reading browser cookies is sensitive and may trigger a keychain prompt. + - `RATE_LIMITED` → tell them to wait a few minutes, or retry with `--cookies-from-browser` to use their logged-in session. + - `SETUP_FAILED` → show the `MESSAGE` (usually a one-line command to install ffmpeg) and offer to run it. + - `none` → success, continue. + +3. **Load the bundle.** Read `manifest.json` (the contract) from the `BUNDLE` path, then: + - Read `meta.txt` (caption + metadata) and, if present, `transcript.txt`. + - **View the frames** with the Read tool — a spread across the timeline (first, several middle, last) for video; **all slides in order** for an image carousel. You read on-screen text/captions directly from the frames; no separate OCR. + +4. **Write the analysis** using the template below. + +## Output template (be thorough — a faithful record, not a teaser) + +- **Header** — platform · creator handle/name · date · type · duration · engagement (views/likes/comments if present) · URL. +- **TL;DR** — 1–2 sentences: what this is. +- **Full transcript — verbatim, everything.** The complete spoken audio word-for-word, with timestamps from `transcript.srt`. Do not summarize or drop lines. The only allowed cleanup is collapsing obvious hallucinated repeat-lines (e.g. "Okay. Okay. Okay."), never real speech. If there's no audio/speech, say so. +- **On-screen text & visuals (timeline)** — everything visible across the frames: text overlays/captions transcribed, graphics, charts/numbers, b-roll, setting, what the creator is doing, products/profiles shown. For carousels: every slide in order, with its full text + a description of its image. +- **Caption / description — verbatim** — exactly as posted, including hashtags, @mentions, links. +- **Beat-by-beat structure** (video) — how it's built, with timestamps: hook → point 1 → … → CTA. +- **Takeaway & intent** — core message, what it's actually sharing, the goal (educate / sell / lead-gen / entertain), and any explicit CTA. +- **Notable details** — names, tools, stats, claims, prices, links, anything quotable. + +Adapt per `type`: a plain text tweet skips transcript/frames; an image carousel leans on the visuals section. + +Then stay open for follow-ups ("just the hook," "list every stat," "summarize in 3 bullets," etc.). + +## Notes + +- Everything runs locally; nothing is uploaded. Browser cookies are read **only** when the user opts in for login-walled content. +- TikTok and YouTube are usually public (no login). Instagram posts/carousels and many X posts require being logged in.