From be20c48e9d66cd8f27f36fb265a3955fe6594ae7 Mon Sep 17 00:00:00 2001
From: Shivang Trivedi <69757440+Shivang0@users.noreply.github.com>
Date: Mon, 22 Jun 2026 19:31:22 +0200
Subject: [PATCH] Add social-vision plugin (Marketing Growth)

---
 .claude-plugin/marketplace.json               |  34 +-
 README.md                                     |   3 +-
 .../social-vision/.claude-plugin/plugin.json  |  21 +
 plugins/social-vision/.gitignore              |   3 +
 plugins/social-vision/LICENSE                 |  21 +
 plugins/social-vision/README.md               | 147 +++++++
 plugins/social-vision/commands/analyze.md     |  15 +
 plugins/social-vision/scripts/analyze.py      | 378 ++++++++++++++++++
 plugins/social-vision/scripts/bootstrap.py    | 240 +++++++++++
 plugins/social-vision/scripts/transcribe.py   | 111 +++++
 .../skills/analyze-social/SKILL.md            |  60 +++
 11 files changed, 1025 insertions(+), 8 deletions(-)
 create mode 100644 plugins/social-vision/.claude-plugin/plugin.json
 create mode 100644 plugins/social-vision/.gitignore
 create mode 100644 plugins/social-vision/LICENSE
 create mode 100644 plugins/social-vision/README.md
 create mode 100644 plugins/social-vision/commands/analyze.md
 create mode 100644 plugins/social-vision/scripts/analyze.py
 create mode 100644 plugins/social-vision/scripts/bootstrap.py
 create mode 100644 plugins/social-vision/scripts/transcribe.py
 create mode 100644 plugins/social-vision/skills/analyze-social/SKILL.md
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 6f29b6b..1917cf9 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -5,7 +5,7 @@
     "email": "support@claudecodeplugins.dev"
   },
   "metadata": {
-    "description": "Awesome Claude Code plugins — a curated list of slash commands, subagents, MCP servers, and hooks for Claude Code",
+    "description": "Awesome Claude Code plugins \u2014 a curated list of slash commands, subagents, MCP servers, and hooks for Claude Code",
     "version": "0.0.1",
     "homepage": "https://claudecodeplugins.dev"
   },
@@ -73,7 +73,7 @@
     {
       "name": "ultrathink",
       "source": "./plugins/ultrathink",
-      "description": "Use /ultrathink <TASK_DESCRIPTION> to launch a Coordinator Agent that directs four specialist sub-agents—Architect, Research, Coder, and Tester—to analyze, design, implement, and validate your coding task. The process breaks the task into clear steps, gathers insights, and synthesizes a cohesive solution with actionable outputs. Relevant files can be referenced ad-hoc using @ filename syntax.",
+      "description": "Use /ultrathink <TASK_DESCRIPTION> to launch a Coordinator Agent that directs four specialist sub-agents\u2014Architect, Research, Coder, and Tester\u2014to analyze, design, implement, and validate your coding task. The process breaks the task into clear steps, gathers insights, and synthesizes a cohesive solution with actionable outputs. Relevant files can be referenced ad-hoc using @ filename syntax.",
       "version": "1.0.0",
       "author": {
         "name": "Jeronim Morina"
@@ -847,7 +847,7 @@
       "description": "Use this agent when setting up CI/CD pipelines, configuring Docker containers, deploying applications to cloud platforms, setting up Kubernetes clusters, implementing infrastructure as code, or automating deployment workflows. Examples: <example>Context: User is setting up a new project and needs deployment automation. user: \"I've built a FastAPI application and need to deploy it to production with proper CI/CD\" assistant: \"I'll use the deployment-engineer agent to set up a complete deployment pipeline with Docker, GitHub Actions, and production-ready configurations.\"</example> <example>Context: User mentions containerization or deployment issues. user: \"Our deployment process is manual and error-prone. We need to automate it.\" assistant: \"Let me use the deployment-engineer agent to design an automated CI/CD pipeline that eliminates manual steps and ensures reliable deployments.\"</example>",
       "version": "1.0.0",
       "author": {
-        "name": "Jure Šunić"
+        "name": "Jure \u0160uni\u0107"
       },
       "category": "Automation DevOps",
       "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/deployment-engineer",
@@ -1141,7 +1141,7 @@
       "description": "Use this agent when you need to design, build, or validate n8n automation workflows. This agent specializes in creating efficient n8n workflows using proper validation techniques and MCP tools integration.\\n\\nExamples:\\n- <example>\\n  Context: User wants to create a Slack notification workflow when a new GitHub issue is created.\\n  user: \"I need to create an n8n workflow that sends a Slack message whenever a new GitHub issue is opened\"\\n  assistant: \"I'll use the n8n-workflow-builder agent to design and build this GitHub-to-Slack automation workflow with proper validation.\"\\n  <commentary>\\n  The user needs n8n workflow creation, so use the n8n-workflow-builder agent to handle the complete workflow design, validation, and deployment process.\\n  </commentary>\\n</example>\\n- <example>\\n  Context: User has an existing n8n workflow that needs debugging and optimization.\\n  user: \"My n8n workflow keeps failing at the HTTP Request node, can you help me fix it?\"\\n  assistant: \"I'll use the n8n-workflow-builder agent to analyze and debug your workflow, focusing on the HTTP Request node configuration.\"\\n  <commentary>\\n  Since this involves n8n workflow troubleshooting and validation, use the n8n-workflow-builder agent to diagnose and fix the issue.\\n  </commentary>\\n</example>\\n- <example>\\n  Context: User wants to understand n8n best practices and available nodes for a specific use case.\\n  user: \"What are the best n8n nodes for processing CSV data and sending email reports?\"\\n  assistant: \"I'll use the n8n-workflow-builder agent to explore the available nodes and recommend the best approach for CSV processing and email automation.\"\\n  <commentary>\\n  This requires n8n expertise and node discovery, so use the n8n-workflow-builder agent to provide comprehensive guidance.\\n  </commentary>\\n</example>",
       "version": "1.0.0",
       "author": {
-        "name": "Jure Šunić"
+        "name": "Jure \u0160uni\u0107"
       },
       "category": "Automation DevOps",
       "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/n8n-workflow-builder",
@@ -1197,7 +1197,7 @@
       "description": "Use this agent when you need to create comprehensive Product Requirements Documents (PRDs) that combine business strategy, technical architecture, and user research. Examples: <example>Context: The user needs to create a PRD for a new feature or product launch. user: \"I need to create a PRD for our new user authentication system that will support SSO and multi-factor authentication\" assistant: \"I'll use the prd-specialist agent to create a comprehensive PRD that covers the strategic foundation, technical requirements, and implementation blueprint for your authentication system.\"</example> <example>Context: The user is planning a major product initiative and needs strategic documentation. user: \"We're launching a mobile app for our e-commerce platform and need a detailed PRD to guide development\" assistant: \"Let me engage the prd-specialist agent to develop a thorough PRD that includes market analysis, user research integration, technical architecture, and implementation roadmap for your mobile app initiative.\"</example>",
       "version": "1.0.0",
       "author": {
-        "name": "Jure Šunić"
+        "name": "Jure \u0160uni\u0107"
       },
       "category": "Project & Product Management",
       "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/prd-specialist",
@@ -1281,7 +1281,7 @@
       "description": "Use this agent when working with Python code that requires advanced features, performance optimization, or comprehensive refactoring. Examples: <example>Context: User needs to optimize a slow Python function that processes large datasets. user: \"This function is taking too long to process our data, can you help optimize it?\" assistant: \"I'll use the python-expert agent to analyze and optimize your Python code with advanced techniques and performance profiling.\"</example> <example>Context: User wants to implement async/await patterns in their existing synchronous Python code. user: \"I need to convert this synchronous code to use async/await for better performance\" assistant: \"Let me use the python-expert agent to refactor your code with proper async/await patterns and concurrent programming techniques.\"</example> <example>Context: User needs help implementing complex Python design patterns. user: \"I want to implement a factory pattern with decorators for my API endpoints\" assistant: \"I'll use the python-expert agent to implement advanced Python patterns with decorators and proper design principles.\"</example>",
       "version": "1.0.0",
       "author": {
-        "name": "Jure Šunić"
+        "name": "Jure \u0160uni\u0107"
       },
       "category": "Development Engineering",
       "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/python-expert",
@@ -1670,6 +1670,26 @@
         "security",
         "compliance"
       ]
+    },
+    {
+      "name": "social-vision",
+      "source": "./plugins/social-vision",
+      "description": "Paste an Instagram, TikTok, YouTube (Shorts), or X/Twitter link and Claude watches it for you \u2014 transcribes the audio, reads on-screen text and visuals, and explains what it's about. Local and cross-platform.",
+      "version": "1.0.0",
+      "author": {
+        "name": "Shivang Trivedi",
+        "url": "https://github.com/Shivang0"
+      },
+      "category": "Marketing Growth",
+      "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/social-vision",
+      "keywords": [
+        "video",
+        "instagram",
+        "tiktok",
+        "youtube",
+        "twitter",
+        "transcription"
+      ]
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/README.md b/README.md
index e4de615..7f74f6e 100644
--- a/README.md
+++ b/README.md
@@ -165,6 +165,7 @@ Install or disable them dynamically with the `/plugin` command — enabling you
 - [growth-hacker](./plugins/growth-hacker)
 - [instagram-curator](./plugins/instagram-curator)
 - [reddit-community-builder](./plugins/reddit-community-builder)
+- [social-vision](./plugins/social-vision)
 - [tiktok-strategist](./plugins/tiktok-strategist)
 - [twitter-engager](./plugins/twitter-engager)
 
@@ -212,4 +213,4 @@ Example:
 ## Contributing
 
 Contributions are welcome!
- You can add your favorite plugins, share best practices, or submit your own marketplace.
\ No newline at end of file
+ You can add your favorite plugins, share best practices, or submit your own marketplace.
diff --git a/plugins/social-vision/.claude-plugin/plugin.json b/plugins/social-vision/.claude-plugin/plugin.json
new file mode 100644
index 0000000..ab06d66
--- /dev/null
+++ b/plugins/social-vision/.claude-plugin/plugin.json
@@ -0,0 +1,21 @@
+{
+  "name": "social-vision",
+  "version": "1.0.0",
+  "description": "Paste an Instagram, TikTok, YouTube (Shorts), or X/Twitter link and Claude watches it for you — transcribes the audio, reads the on-screen text and visuals, and explains what it's actually about. Runs locally and cross-platform.",
+  "author": {
+    "name": "Shivang Trivedi"
+  },
+  "homepage": "https://github.com/Shivang0/social-vision",
+  "repository": "https://github.com/Shivang0/social-vision",
+  "license": "MIT",
+  "keywords": [
+    "video",
+    "instagram",
+    "tiktok",
+    "youtube",
+    "twitter",
+    "transcription",
+    "whisper",
+    "vision"
+  ]
+}
diff --git a/plugins/social-vision/.gitignore b/plugins/social-vision/.gitignore
new file mode 100644
index 0000000..b908d4c
--- /dev/null
+++ b/plugins/social-vision/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/
+*.pyc
+.DS_Store
diff --git a/plugins/social-vision/LICENSE b/plugins/social-vision/LICENSE
new file mode 100644
index 0000000..4233b4e
--- /dev/null
+++ b/plugins/social-vision/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Shivang Trivedi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/plugins/social-vision/README.md b/plugins/social-vision/README.md
new file mode 100644
index 0000000..296c701
--- /dev/null
+++ b/plugins/social-vision/README.md
@@ -0,0 +1,147 @@
+# 🎬 social-vision
+
+**Paste a social media link. Claude watches the video for you.**
+
+social-vision is a Claude Code plugin that lets you drop in an Instagram, TikTok,
+YouTube, or X (Twitter) link — and Claude will *actually understand it*. It
+listens to the audio, reads the text and graphics on screen, reads the caption,
+and then tells you what the video is really about.
+
+No copy-pasting transcripts. No "I can't watch videos." Just paste and ask.
+
+---
+
+## What it can do
+
+- 🎧 **Transcribe the audio** — word for word, with timestamps.
+- 👀 **See what's on screen** — on-screen captions, charts, graphics, products, the setting — Claude looks at the actual frames.
+- 📝 **Read the caption** — including hashtags, mentions, and links.
+- 🧠 **Explain it** — a full breakdown: what's said, what's shown, and what it's trying to get you to do.
+- 🖼️ **Handle photo posts too** — Instagram carousels and X image posts are read slide by slide.
+
+### Supported links
+
+| Platform | Reels / Videos / Shorts | Photo posts & carousels |
+|----------|:----------------------:|:-----------------------:|
+| Instagram | ✅ | ✅ |
+| TikTok | ✅ | — |
+| YouTube + Shorts | ✅ | — |
+| X / Twitter | ✅ | ✅ |
+| A video file on your computer | ✅ | — |
+
+---
+
+## Install
+
+In Claude Code:
+
+```
+/plugin marketplace add Shivang0/social-vision
+/plugin install social-vision@social-vision
+/reload-plugins
+```
+
+That's it. No keys, no accounts, no sign-ups.
+
+> Prefer to run it from a local checkout? Clone the repo and
+> `/plugin marketplace add /path/to/social-vision` instead.
+
+---
+
+## Use it
+
+Just paste a link into Claude Code:
+
+> analyze https://www.instagram.com/reel/XXXXXXXX/
+
+or use the command:
+
+> /analyze https://www.tiktok.com/@someone/video/123456789
+
+Claude takes care of the rest and gives you the full breakdown. You can then ask
+follow-ups like *"just give me the hook,"* *"list every stat it mentioned,"* or
+*"summarize it in 3 bullets."*
+
+### Example
+
+**You:**
+
+> /analyze https://www.instagram.com/reel/XXXXXXXX/
+
+**social-vision** (after ~30–60s):
+
+> **Reel by @creator — "How the new LinkedIn algorithm works"**
+> Instagram · 99s · 1,975 likes
+>
+> **TL;DR** — A talking-head reel arguing likes no longer matter; saves, comments, and dwell time now drive reach.
+>
+> **Full transcript (verbatim, with timestamps)**
+> `[00:00]` I grew my LinkedIn followers to over 210,000 by using the new algorithm to my advantage…
+> `[00:16]` LinkedIn replaced its engagement-based system with a 150-billion-parameter model…
+> *…complete, nothing dropped…*
+>
+> **On screen** — creator at a podcast mic; text overlays "saves 5×", "comments 15×"; a ranked chart: Text 2–4% ‹ Video 5.6% ‹ Carousel 6.6%.
+>
+> **Caption** — "LinkedIn's new algorithm doesn't care how many likes you get… comment PLAYBOOK 👇 #linkedintips"
+>
+> **Takeaway** — Educational hook + lead magnet: optimize for saves / long comments / carousels; "comment PLAYBOOK" funnels to a paid program.
+
+Then keep asking: *"list every stat,"* *"just the hook,"* *"summarize in 3 bullets,"* *"what's the call to action?"*
+
+> Works the same for a **TikTok**, **YouTube Short**, **X post**, or a local `.mp4`. Image carousels come back slide-by-slide.
+
+### First run takes a few minutes ⏳
+
+The very first time you use it, social-vision quietly sets itself up — it
+installs the small tools it needs to download and transcribe videos. You may be
+asked to approve an install or two. **This happens once.** Every run after that
+is fast.
+
+It automatically picks the best transcription engine for your computer (Apple
+Silicon Macs, NVIDIA GPUs, and regular laptops are all handled), and everything
+runs **on your own machine** — your videos are never uploaded anywhere.
+
+---
+
+## 🔑 Private or login-only content
+
+Some posts can only be viewed when you're logged in — most **Instagram photo
+posts and carousels**, private accounts, and many **X/Twitter posts**.
+
+To analyze those, **stay logged into that platform in your normal web browser**
+(Chrome, Firefox, Safari, Edge, or Brave). When Claude hits a login wall, it will
+ask which browser you use, then borrow your existing login to fetch the post.
+
+- It only does this **when you say yes** — it never touches your browser otherwise.
+- On a Mac, your system may pop up a keychain prompt to allow it; that's normal.
+- TikTok and YouTube are usually public, so they work without any of this.
+
+---
+
+## 🔒 Privacy
+
+- Everything runs **locally on your computer**. Videos and audio are not sent to any third party.
+- Your browser login is only used when you explicitly approve it for a private post, and it stays on your machine.
+- Downloaded videos and results are saved under `~/.social-vision/` so you can find or delete them anytime.
+
+---
+
+## Troubleshooting
+
+| Problem | Fix |
+|---------|-----|
+| "ffmpeg could not be installed" | Run the one-line command Claude shows you (e.g. `brew install ffmpeg` on Mac, `winget install Gyan.FFmpeg` on Windows, `sudo apt install ffmpeg` on Linux). |
+| A link suddenly stops downloading | Platforms change often. Ask Claude to update the downloader — it can refresh the tool automatically. |
+| "Needs login" on a public-looking post | Tell Claude which browser you're logged into; it'll retry using your session. |
+| Transcript looks repetitive/empty | The video probably has little or no speech (music only). The on-screen visuals still get analyzed. |
+
+---
+
+## Notes
+
+- This tool downloads content for your own personal analysis. Respect each
+  platform's terms of service and the rights of content creators.
+
+## License
+
+MIT — see [LICENSE](LICENSE).
diff --git a/plugins/social-vision/commands/analyze.md b/plugins/social-vision/commands/analyze.md
new file mode 100644
index 0000000..6f8a46c
--- /dev/null
+++ b/plugins/social-vision/commands/analyze.md
@@ -0,0 +1,15 @@
+---
+description: Watch & analyze a social video/post — Instagram, TikTok, YouTube (Shorts), or X/Twitter URL, or a local video file
+argument-hint: <url-or-file>
+---
+
+Analyze the social video/post at: $ARGUMENTS
+
+Follow the `analyze-social` skill workflow:
+
+1. Run `python3 "${CLAUDE_PLUGIN_ROOT}/scripts/analyze.py" "$ARGUMENTS"` (warn the user the first run auto-installs tools and may take a few minutes).
+2. Handle any `ERROR_CODE` (e.g. `NEEDS_LOGIN` → ask which browser they're logged into, re-run with `--cookies-from-browser <browser>`).
+3. Read `manifest.json`, `meta.txt`, `transcript.txt`, and view the frames from the bundle.
+4. Produce the thorough breakdown defined in the skill's output template (header, TL;DR, full verbatim transcript with timestamps, on-screen text & visuals, verbatim caption, beat-by-beat, takeaway & intent, notable details), then stay open for follow-ups.
+
+If `$ARGUMENTS` is empty, ask the user for a link or file path.
diff --git a/plugins/social-vision/scripts/analyze.py b/plugins/social-vision/scripts/analyze.py
new file mode 100644
index 0000000..12e9492
--- /dev/null
+++ b/plugins/social-vision/scripts/analyze.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+"""
+analyze.py — social-vision pipeline orchestrator (pure standard library).
+
+Takes one Instagram / TikTok / YouTube(-Shorts) / X(Twitter) URL or a local video
+file and produces a "bundle" directory that Claude then reads:
+
+    <bundle>/
+      frames/frame_###.jpg   sampled video frames or carousel slides
+      transcript.txt / .srt  spoken audio (if any)
+      meta.txt               human-readable metadata + caption
+      manifest.json          machine-readable map of the bundle (+ error codes)
+      SUMMARY.md             index
+
+It shells out to the tools installed by bootstrap.py (yt-dlp, gallery-dl, ffmpeg,
+and the venv's transcribe.py). It never hard-crashes: if it can salvage any
+frames or a caption it writes a partial bundle and exits 0.
+"""
+
+import argparse
+import json
+import re
+import shutil
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+
+import bootstrap  # same directory
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+VIDEO_EXTS = (".mp4", ".mkv", ".webm", ".mov", ".m4v")
+IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp")
+
+LOGIN_PATTERNS = [
+    "login required", "log in", "logged in", "sign in to confirm", "use --cookies",
+    "requested content is not available", "account is private", "this account is private",
+    "only available for registered", "rate-limit reached", "cookies are no longer valid",
+]
+RATE_PATTERNS = ["429", "too many requests", "please wait a few minutes", "rate limit", "rate-limit"]
+STALE_PATTERNS = ["unable to extract", "unsupported url", "unable to download webpage",
+                  "failed to parse json"]
+
+
+def run(cmd):
+    return subprocess.run([str(c) for c in cmd], capture_output=True, text=True)
+
+
+def platform_of(url):
+    u = url.lower()
+    if "instagram.com" in u:
+        return "Instagram"
+    if "tiktok.com" in u:
+        return "TikTok"
+    if "youtube.com" in u or "youtu.be" in u:
+        return "YouTube"
+    if "x.com" in u or "twitter.com" in u:
+        return "X"
+    return "Web"
+
+
+def classify_error(stderr):
+    s = (stderr or "").lower()
+    if any(p in s for p in RATE_PATTERNS):
+        return "RATE_LIMITED"
+    if any(p in s for p in LOGIN_PATTERNS):
+        return "NEEDS_LOGIN"
+    if any(p in s for p in STALE_PATTERNS):
+        return "EXTRACTOR_STALE"
+    return None
+
+
+# --------------------------------------------------------------------------- #
+# metadata + routing
+# --------------------------------------------------------------------------- #
+def ytdlp_json(deps, url, cookies):
+    cmd = [deps["yt_dlp"], "-J", "--no-warnings", "--no-playlist", *cookies, url]
+    proc = run(cmd)
+    if proc.returncode == 0 and proc.stdout.strip():
+        try:
+            return json.loads(proc.stdout), None
+        except json.JSONDecodeError:
+            return None, "EXTRACTOR_STALE"
+    return None, classify_error(proc.stderr) or "EXTRACTOR_STALE"
+
+
+def has_video(meta):
+    if not meta:
+        return False
+    if meta.get("duration"):
+        return True
+    for f in meta.get("formats", []) or []:
+        if f.get("vcodec") and f.get("vcodec") != "none":
+            return True
+    return meta.get("_type") == "video"
+
+
+def meta_fields(meta, url):
+    return {
+        "title": (meta or {}).get("title"),
+        "uploader": (meta or {}).get("uploader") or (meta or {}).get("channel"),
+        "upload_date": (meta or {}).get("upload_date"),
+        "duration": (meta or {}).get("duration"),
+        "like_count": (meta or {}).get("like_count"),
+        "view_count": (meta or {}).get("view_count"),
+        "caption": (meta or {}).get("description") or "",
+        "source_url": url,
+    }
+
+
+# --------------------------------------------------------------------------- #
+# acquisition
+# --------------------------------------------------------------------------- #
+def download_video(deps, url, cookies, bundle):
+    cmd = [deps["yt_dlp"], "-f", "bv*+ba/best", "--merge-output-format", "mp4",
+           "--no-playlist", "--no-warnings", *cookies,
+           "-o", str(bundle / "video.%(ext)s"), url]
+    proc = run(cmd)
+    for ext in VIDEO_EXTS:
+        hit = list(bundle.glob("video" + ext))
+        if hit:
+            return hit[0], None
+    return None, classify_error(proc.stderr) or "DOWNLOAD_FAILED"
+
+
+def download_images(deps, url, cookies, bundle):
+    media = bundle / "media"
+    media.mkdir(exist_ok=True)
+    proc = run([deps["gallery_dl"], "--dest", str(media), "-o", "directory=[]",
+                *cookies, url])
+    imgs = sorted(p for p in media.iterdir() if p.suffix.lower() in IMAGE_EXTS)
+    if imgs:
+        return imgs, None
+    return [], classify_error(proc.stderr) or "DOWNLOAD_FAILED"
+
+
+def gallery_caption(deps, url, cookies):
+    proc = run([deps["gallery_dl"], "-j", *cookies, url])
+    if proc.returncode != 0 or not proc.stdout.strip():
+        return {}
+    try:
+        data = json.loads(proc.stdout)
+    except json.JSONDecodeError:
+        return {}
+    for item in data:
+        if isinstance(item, list) and len(item) >= 2 and isinstance(item[-1], dict):
+            d = item[-1]
+            return {
+                "caption": d.get("description") or d.get("content") or d.get("tweet_text") or "",
+                "uploader": d.get("username") or d.get("uploader") or d.get("author", {}).get("name"),
+                "title": d.get("title"),
+            }
+    return {}
+
+
+# --------------------------------------------------------------------------- #
+# frames + transcription
+# --------------------------------------------------------------------------- #
+def probe_duration(deps, video):
+    proc = run([deps["ffprobe"], "-v", "error", "-show_entries", "format=duration",
+                "-of", "csv=p=0", str(video)])
+    try:
+        return float(proc.stdout.strip())
+    except ValueError:
+        return 0.0
+
+
+def has_audio(deps, video):
+    proc = run([deps["ffprobe"], "-v", "error", "-select_streams", "a",
+                "-show_entries", "stream=index", "-of", "csv=p=0", str(video)])
+    return bool(proc.stdout.strip())
+
+
+def extract_frames(deps, video, frames_dir, target_frames, width):
+    dur = probe_duration(deps, video)
+    fps = 1.0 if dur <= 0 else max(0.2, min(2.0, target_frames / dur))
+    run([deps["ffmpeg"], "-hide_banner", "-loglevel", "error", "-y", "-i", str(video),
+         "-vf", f"fps={fps:.4f},scale={width}:-2", "-q:v", "3",
+         str(frames_dir / "frame_%03d.jpg")])
+    return dur, sorted(frames_dir.glob("frame_*.jpg"))
+
+
+def slides_to_frames(deps, images, frames_dir, width):
+    out = []
+    for i, img in enumerate(images, 1):
+        dst = frames_dir / f"frame_{i:03d}.jpg"
+        run([deps["ffmpeg"], "-hide_banner", "-loglevel", "error", "-y", "-i", str(img),
+             "-vf", f"scale={width}:-2", "-q:v", "3", str(dst)])
+        if dst.exists():
+            out.append(dst)
+    return out
+
+
+def transcribe(deps, video, bundle, lang):
+    proc = run([deps["venv_python"], str(SCRIPT_DIR / "transcribe.py"),
+                str(video), str(bundle), lang or "auto"])
+    if proc.returncode == 0 and (bundle / "transcript.txt").exists():
+        return True
+    print(proc.stderr, file=sys.stderr)
+    return False
+
+
+# --------------------------------------------------------------------------- #
+# bundle writers
+# --------------------------------------------------------------------------- #
+def write_bundle(bundle, manifest):
+    (bundle / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
+
+    m = manifest
+    lines = []
+    for k in ("title", "uploader", "upload_date", "duration",
+              "like_count", "view_count", "source_url"):
+        if m.get(k) not in (None, ""):
+            lines.append(f"{k}: {m[k]}")
+    if m.get("caption"):
+        lines.append("\n--- caption / description ---\n" + m["caption"])
+    (bundle / "meta.txt").write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+    summary = [
+        "# social-vision bundle", "",
+        f"- platform: {m.get('platform')}",
+        f"- type: {m.get('type')}",
+        f"- source: {m.get('source_url')}",
+        f"- frames: {m.get('n_frames')} -> {m.get('frames_dir')}",
+        f"- transcript: {m.get('transcript_path') or '(none)'}",
+        f"- meta: {bundle / 'meta.txt'}",
+    ]
+    if m.get("error_code"):
+        summary.append(f"- error_code: {m['error_code']}")
+    (bundle / "SUMMARY.md").write_text("\n".join(summary) + "\n", encoding="utf-8")
+
+
+def emit(manifest, bundle=None):
+    print("\n=== social-vision result ===")
+    print(f"ERROR_CODE: {manifest.get('error_code') or 'none'}")
+    if bundle:
+        print(f"BUNDLE: {bundle}")
+        print(f"MANIFEST: {bundle / 'manifest.json'}")
+    print(f"TYPE: {manifest.get('type')}")
+    print(f"FRAMES: {manifest.get('n_frames', 0)}")
+    print(f"TRANSCRIPT: {'yes' if manifest.get('transcript_path') else 'no'}")
+    if manifest.get("message"):
+        print("MESSAGE: " + manifest["message"])
+
+
+# --------------------------------------------------------------------------- #
+# main
+# --------------------------------------------------------------------------- #
+def main():
+    ap = argparse.ArgumentParser(description="Analyze a social video/post for Claude.")
+    ap.add_argument("input", help="URL (Instagram/TikTok/YouTube/X) or local video file")
+    ap.add_argument("--cookies-from-browser", dest="cookies", default=None,
+                    help="chrome|firefox|safari|edge|brave — for login-walled posts")
+    ap.add_argument("--target-frames", type=int, default=40)
+    ap.add_argument("--frame-width", type=int, default=512)
+    ap.add_argument("--image-width", type=int, default=768)
+    ap.add_argument("--model", default=None, help="override transcription model")
+    ap.add_argument("--lang", default=None, help="force language code (else auto)")
+    ap.add_argument("--no-transcribe", action="store_true")
+    ap.add_argument("--outdir", default=None)
+    args = ap.parse_args()
+
+    deps = bootstrap.ensure_ready()
+    if not deps["ok"]:
+        print("\n=== social-vision result ===")
+        print("ERROR_CODE: SETUP_FAILED")
+        print("MESSAGE: " + (deps.get("error") or "dependency setup failed"))
+        sys.exit(1)
+    if args.model:
+        deps["model"] = args.model  # informational; transcribe.py reads engine.json
+
+    cookies = ["--cookies-from-browser", args.cookies] if args.cookies else []
+
+    base = Path(args.outdir) if args.outdir else Path(deps["home"]) / "out"
+    bundle = base / datetime.now().strftime("%Y%m%d-%H%M%S")
+    frames_dir = bundle / "frames"
+    frames_dir.mkdir(parents=True, exist_ok=True)
+
+    is_local = Path(args.input).is_file()
+    platform = "Local" if is_local else platform_of(args.input)
+    errors = []
+
+    # ---- local file: straight to frames + transcribe ----
+    if is_local:
+        video = bundle / ("video" + Path(args.input).suffix.lower())
+        shutil.copy(args.input, video)
+        meta = {"source_url": args.input, "caption": "", "title": Path(args.input).name}
+        return finish_video(args, deps, bundle, frames_dir, video, meta, platform, errors)
+
+    # ---- remote: metadata + routing ----
+    meta_json, err = ytdlp_json(deps, args.input, cookies)
+
+    if err == "EXTRACTOR_STALE":          # try a one-shot yt-dlp self-update, then retry
+        run([deps["venv_python"], "-m", "pip", "install", "-q", "-U", "yt-dlp"])
+        meta_json, err = ytdlp_json(deps, args.input, cookies)
+
+    if err in ("NEEDS_LOGIN", "RATE_LIMITED") and not cookies:
+        manifest = {**meta_fields(meta_json, args.input), "platform": platform,
+                    "type": "text", "n_frames": 0, "transcript_path": None,
+                    "error_code": err,
+                    "message": ("This content needs you to be logged in. Re-run with "
+                                "--cookies-from-browser <chrome|firefox|safari|edge|brave>."
+                                if err == "NEEDS_LOGIN" else
+                                "Rate-limited by the platform. Wait a few minutes, or pass "
+                                "--cookies-from-browser to use your logged-in session.")}
+        emit(manifest)
+        sys.exit(0)
+
+    fields = meta_fields(meta_json, args.input)
+
+    if has_video(meta_json):
+        video, derr = download_video(deps, args.input, cookies, bundle)
+        if not video:
+            errors.append(derr)
+            ec = derr if derr in ("NEEDS_LOGIN", "RATE_LIMITED") else "DOWNLOAD_FAILED"
+            manifest = {**fields, "platform": platform, "type": "text", "n_frames": 0,
+                        "transcript_path": None, "error_code": ec, "errors": errors,
+                        "message": "Could not download the video."}
+            write_bundle(bundle, manifest)
+            emit(manifest, bundle)
+            sys.exit(0)
+        return finish_video(args, deps, bundle, frames_dir, video, fields, platform, errors)
+
+    # ---- image post (carousel / photo tweet) ----
+    images, derr = download_images(deps, args.input, cookies, bundle)
+    if not images:
+        ec = derr if derr in ("NEEDS_LOGIN", "RATE_LIMITED") else "NO_MEDIA"
+        msg = ("This post needs login — re-run with --cookies-from-browser <browser>."
+               if ec == "NEEDS_LOGIN" else
+               "No downloadable images or video were found at this URL.")
+        if not fields["caption"]:
+            fields.update({k: v for k, v in gallery_caption(deps, args.input, cookies).items() if v})
+        manifest = {**fields, "platform": platform,
+                    "type": "text" if fields.get("caption") else "text",
+                    "n_frames": 0, "transcript_path": None, "error_code": ec,
+                    "errors": errors, "message": msg}
+        write_bundle(bundle, manifest)
+        emit(manifest, bundle)
+        sys.exit(0)
+
+    if not fields["caption"]:
+        fields.update({k: v for k, v in gallery_caption(deps, args.input, cookies).items() if v})
+
+    frames = slides_to_frames(deps, images, frames_dir, args.image_width)
+    manifest = {**fields, "platform": platform, "type": "image",
+                "n_frames": len(frames), "frames_dir": str(frames_dir),
+                "frame_files": [f.name for f in frames],
+                "has_audio": False, "transcript_path": None, "errors": errors,
+                "error_code": None}
+    write_bundle(bundle, manifest)
+    emit(manifest, bundle)
+
+
+def finish_video(args, deps, bundle, frames_dir, video, fields, platform, errors):
+    dur, frames = extract_frames(deps, video, frames_dir,
+                                 args.target_frames, args.frame_width)
+    audio = has_audio(deps, video)
+    transcript_path = None
+    if audio and not args.no_transcribe:
+        if transcribe(deps, video, bundle, args.lang):
+            transcript_path = str(bundle / "transcript.txt")
+        else:
+            errors.append("ASR_FAILED")
+    fields["duration"] = fields.get("duration") or round(dur, 2)
+    manifest = {**fields, "platform": platform, "type": "video",
+                "n_frames": len(frames), "frames_dir": str(frames_dir),
+                "frame_files": [f.name for f in frames],
+                "has_audio": audio, "transcript_path": transcript_path,
+                "transcript_srt": str(bundle / "transcript.srt") if transcript_path else None,
+                "engine": deps["engine"], "model": deps["model"],
+                "errors": errors,
+                "error_code": "ASR_FAILED" if ("ASR_FAILED" in errors and not transcript_path) else None}
+    write_bundle(bundle, manifest)
+    emit(manifest, bundle)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plugins/social-vision/scripts/bootstrap.py b/plugins/social-vision/scripts/bootstrap.py
new file mode 100644
index 0000000..08ffdd9
--- /dev/null
+++ b/plugins/social-vision/scripts/bootstrap.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+"""
+bootstrap.py — first-run dependency setup for social-vision.
+
+Pure standard library. Detects the platform, creates a plugin-local virtualenv,
+installs the Python tools (yt-dlp, gallery-dl, and the best Whisper engine for
+this machine), and makes sure ffmpeg is available. Idempotent: after the first
+successful run it just reads the cached engine.json.
+
+Engine choice:
+  - macOS Apple Silicon      -> mlx-whisper       (Metal / Neural Engine)
+  - NVIDIA GPU (Win/Linux)   -> faster-whisper    (CUDA, float16)
+  - everything else (CPU)    -> faster-whisper    (CPU, int8)
+"""
+
+import json
+import os
+import platform
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+VERSION = "1.0.0"  # bump to force a re-bootstrap on upgrade
+
+HOME = Path.home() / ".social-vision"
+VENV = HOME / "venv"
+ENGINE_JSON = HOME / "engine.json"
+MARKER = HOME / ".deps-ok"
+
+IS_WINDOWS = os.name == "nt"
+
+
+# --------------------------------------------------------------------------- #
+# small helpers
+# --------------------------------------------------------------------------- #
+def log(msg):
+    print(f"[bootstrap] {msg}", file=sys.stderr, flush=True)
+
+
+def venv_bin(name):
+    """Path to an executable inside the venv (handles Windows layout)."""
+    if IS_WINDOWS:
+        exe = VENV / "Scripts" / (name + ".exe")
+        return exe if exe.exists() else VENV / "Scripts" / name
+    return VENV / "bin" / name
+
+
+def venv_python():
+    return str(venv_bin("python"))
+
+
+def run(cmd, **kw):
+    """Run a command, returning CompletedProcess (never raises on non-zero)."""
+    return subprocess.run(cmd, capture_output=True, text=True, **kw)
+
+
+def total_ram_gb():
+    """Best-effort total RAM in GB (stdlib only). Defaults to 8 if unknown."""
+    try:
+        if sys.platform == "darwin":
+            out = run(["sysctl", "-n", "hw.memsize"]).stdout.strip()
+            return int(out) / (1024 ** 3)
+        if sys.platform.startswith("linux"):
+            pages = os.sysconf("SC_PHYS_PAGES")
+            page_size = os.sysconf("SC_PAGE_SIZE")
+            return pages * page_size / (1024 ** 3)
+        if IS_WINDOWS:
+            import ctypes
+
+            class MemStatus(ctypes.Structure):
+                _fields_ = [
+                    ("dwLength", ctypes.c_ulong),
+                    ("dwMemoryLoad", ctypes.c_ulong),
+                    ("ullTotalPhys", ctypes.c_ulonglong),
+                    ("ullAvailPhys", ctypes.c_ulonglong),
+                    ("ullTotalPageFile", ctypes.c_ulonglong),
+                    ("ullAvailPageFile", ctypes.c_ulonglong),
+                    ("ullTotalVirtual", ctypes.c_ulonglong),
+                    ("ullAvailVirtual", ctypes.c_ulonglong),
+                    ("ullAvailExtendedVirtual", ctypes.c_ulonglong),
+                ]
+
+            stat = MemStatus()
+            stat.dwLength = ctypes.sizeof(MemStatus)
+            ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(stat))
+            return stat.ullTotalPhys / (1024 ** 3)
+    except Exception:
+        pass
+    return 8.0
+
+
+def has_nvidia_gpu():
+    if shutil.which("nvidia-smi") is None:
+        return False
+    return run(["nvidia-smi"]).returncode == 0
+
+
+# --------------------------------------------------------------------------- #
+# engine selection
+# --------------------------------------------------------------------------- #
+def choose_engine():
+    ram = total_ram_gb()
+    is_apple_silicon = sys.platform == "darwin" and platform.machine() == "arm64"
+
+    if is_apple_silicon:
+        model = (
+            "mlx-community/whisper-large-v3-turbo"
+            if ram >= 8
+            else "mlx-community/whisper-small-mlx"
+        )
+        return {"engine": "mlx", "model": model, "device": "metal",
+                "compute_type": "float16", "pip": ["mlx-whisper"]}
+
+    if has_nvidia_gpu():
+        return {"engine": "faster-whisper", "model": "large-v3", "device": "cuda",
+                "compute_type": "float16", "pip": ["faster-whisper"]}
+
+    # CPU fallback — small is fast and decent; base on low-RAM boxes
+    model = "small" if ram >= 8 else "base"
+    return {"engine": "faster-whisper", "model": model, "device": "cpu",
+            "compute_type": "int8", "pip": ["faster-whisper"]}
+
+
+# --------------------------------------------------------------------------- #
+# ffmpeg
+# --------------------------------------------------------------------------- #
+def ensure_ffmpeg():
+    """Return (ok, message). Auto-installs only via non-sudo managers."""
+    if shutil.which("ffmpeg") and shutil.which("ffprobe"):
+        return True, "ffmpeg present"
+
+    log("ffmpeg not found — attempting install")
+    if sys.platform == "darwin" and shutil.which("brew"):
+        run(["brew", "install", "ffmpeg"])
+    elif IS_WINDOWS and shutil.which("winget"):
+        run(["winget", "install", "--silent", "--accept-package-agreements",
+             "--accept-source-agreements", "-e", "--id", "Gyan.FFmpeg"])
+    elif IS_WINDOWS and shutil.which("scoop"):
+        run(["scoop", "install", "ffmpeg"])
+    # Linux package managers need sudo — don't run silently.
+
+    if shutil.which("ffmpeg") and shutil.which("ffprobe"):
+        return True, "ffmpeg installed"
+
+    if sys.platform.startswith("linux"):
+        cmd = "sudo apt install -y ffmpeg   # (or: sudo dnf install ffmpeg / sudo pacman -S ffmpeg)"
+    elif sys.platform == "darwin":
+        cmd = "brew install ffmpeg   # (install Homebrew first: https://brew.sh)"
+    else:
+        cmd = "winget install Gyan.FFmpeg   # (or: scoop install ffmpeg)"
+    return False, f"ffmpeg is required but could not be auto-installed. Please run:\n    {cmd}"
+
+
+# --------------------------------------------------------------------------- #
+# venv + python tools
+# --------------------------------------------------------------------------- #
+def ensure_venv():
+    if not venv_bin("python").exists():
+        log("creating virtualenv")
+        run([sys.executable, "-m", "venv", str(VENV)])
+        run([venv_python(), "-m", "pip", "install", "-q", "--upgrade", "pip"])
+
+
+def pip_install(packages):
+    log("installing: " + ", ".join(packages))
+    proc = run([venv_python(), "-m", "pip", "install", "-q", "--upgrade", *packages])
+    if proc.returncode != 0:
+        log("pip install failed:\n" + (proc.stderr or proc.stdout))
+    return proc.returncode == 0
+
+
+# --------------------------------------------------------------------------- #
+# orchestration
+# --------------------------------------------------------------------------- #
+def already_ok():
+    if not MARKER.exists() or not ENGINE_JSON.exists():
+        return False
+    try:
+        return MARKER.read_text().strip() == VERSION and venv_bin("python").exists()
+    except Exception:
+        return False
+
+
+def ensure_ready(force=False):
+    """
+    Make sure everything is installed. Returns a dict:
+      {ok, engine, model, device, compute_type, venv_python,
+       yt_dlp, gallery_dl, ffmpeg, ffprobe, error}
+    """
+    HOME.mkdir(parents=True, exist_ok=True)
+
+    if not force and already_ok():
+        engine = json.loads(ENGINE_JSON.read_text())
+        return _paths(engine, ok=True)
+
+    engine = choose_engine()
+    log(f"platform={sys.platform}/{platform.machine()} -> engine={engine['engine']} "
+        f"model={engine['model']} device={engine['device']}")
+
+    ensure_venv()
+    pip_ok = pip_install(["yt-dlp", "gallery-dl", *engine["pip"]])
+    ff_ok, ff_msg = ensure_ffmpeg()
+
+    ENGINE_JSON.write_text(json.dumps(engine, indent=2))
+
+    if pip_ok and ff_ok:
+        MARKER.write_text(VERSION)
+        return _paths(engine, ok=True)
+
+    err = []
+    if not pip_ok:
+        err.append("Failed to install Python tools into the virtualenv.")
+    if not ff_ok:
+        err.append(ff_msg)
+    return _paths(engine, ok=False, error="\n".join(err))
+
+
+def _paths(engine, ok, error=None):
+    return {
+        "ok": ok,
+        "error": error,
+        "engine": engine["engine"],
+        "model": engine["model"],
+        "device": engine["device"],
+        "compute_type": engine["compute_type"],
+        "venv_python": venv_python(),
+        "yt_dlp": str(venv_bin("yt-dlp")),
+        "gallery_dl": str(venv_bin("gallery-dl")),
+        "ffmpeg": shutil.which("ffmpeg") or "ffmpeg",
+        "ffprobe": shutil.which("ffprobe") or "ffprobe",
+        "home": str(HOME),
+    }
+
+
+if __name__ == "__main__":
+    force = "--force" in sys.argv
+    result = ensure_ready(force=force)
+    print(json.dumps(result, indent=2))
+    sys.exit(0 if result["ok"] else 1)
diff --git a/plugins/social-vision/scripts/transcribe.py b/plugins/social-vision/scripts/transcribe.py
new file mode 100644
index 0000000..a5c9cf5
--- /dev/null
+++ b/plugins/social-vision/scripts/transcribe.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+"""
+transcribe.py — runs INSIDE the social-vision venv (so it can import the
+Whisper engine). Reads ~/.social-vision/engine.json, transcribes one audio/video
+file, and writes transcript.txt + transcript.srt into the output directory.
+
+Usage:
+    <venv python> transcribe.py <media_path> <out_dir> [language]
+
+Both engines are configured to avoid the classic Whisper "silence hallucination"
+(repeated phantom lines) via VAD / no-condition-on-previous-text settings.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+HOME = Path.home() / ".social-vision"
+ENGINE_JSON = HOME / "engine.json"
+
+
+def fmt_ts(seconds):
+    if seconds is None or seconds < 0:
+        seconds = 0
+    ms = int(round(seconds * 1000))
+    h, ms = divmod(ms, 3600_000)
+    m, ms = divmod(ms, 60_000)
+    s, ms = divmod(ms, 1000)
+    return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
+
+
+def write_outputs(out_dir, segments):
+    """segments: list of (start, end, text)."""
+    out_dir = Path(out_dir)
+    txt_lines, srt_blocks = [], []
+    for i, (start, end, text) in enumerate(segments, 1):
+        text = (text or "").strip()
+        if not text:
+            continue
+        txt_lines.append(text)
+        srt_blocks.append(f"{i}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n")
+    (out_dir / "transcript.txt").write_text("\n".join(txt_lines) + "\n", encoding="utf-8")
+    (out_dir / "transcript.srt").write_text("\n".join(srt_blocks) + "\n", encoding="utf-8")
+    return len(txt_lines)
+
+
+def transcribe_mlx(media, model, language):
+    import mlx_whisper
+    result = mlx_whisper.transcribe(
+        str(media),
+        path_or_hf_repo=model,
+        language=language,
+        condition_on_previous_text=False,      # kills the repeat-line hallucination
+        no_speech_threshold=0.6,
+        compression_ratio_threshold=2.4,
+        hallucination_silence_threshold=2.0,
+        verbose=False,
+    )
+    segs = [(s.get("start"), s.get("end"), s.get("text")) for s in result.get("segments", [])]
+    if not segs and result.get("text"):
+        segs = [(0, 0, result["text"])]
+    return segs
+
+
+def transcribe_faster(media, model, device, compute_type, language):
+    from faster_whisper import WhisperModel
+
+    def load(dev, ct):
+        return WhisperModel(model, device=dev, compute_type=ct)
+
+    try:
+        wm = load(device, compute_type)
+    except Exception as e:
+        print(f"[transcribe] {device}/{compute_type} unavailable ({e}); "
+              f"falling back to CPU int8", file=sys.stderr)
+        wm = load("cpu", "int8")
+
+    segments, _info = wm.transcribe(
+        str(media),
+        language=language,
+        vad_filter=True,                        # drop silence -> no hallucinated lines
+        condition_on_previous_text=False,
+        no_speech_threshold=0.6,
+    )
+    return [(s.start, s.end, s.text) for s in segments]
+
+
+def main():
+    if len(sys.argv) < 3:
+        print("usage: transcribe.py <media> <out_dir> [language]", file=sys.stderr)
+        sys.exit(2)
+
+    media, out_dir = sys.argv[1], sys.argv[2]
+    language = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] not in ("", "auto") else None
+
+    engine = json.loads(ENGINE_JSON.read_text())
+    eng = engine["engine"]
+    model = engine["model"]
+
+    if eng == "mlx":
+        segs = transcribe_mlx(media, model, language)
+    else:
+        segs = transcribe_faster(media, model, engine["device"],
+                                 engine["compute_type"], language)
+
+    n = write_outputs(out_dir, segs)
+    print(f"[transcribe] wrote {n} lines using {eng}:{model}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plugins/social-vision/skills/analyze-social/SKILL.md b/plugins/social-vision/skills/analyze-social/SKILL.md
new file mode 100644
index 0000000..fc684d1
--- /dev/null
+++ b/plugins/social-vision/skills/analyze-social/SKILL.md
@@ -0,0 +1,60 @@
+---
+name: analyze-social
+description: Use when the user shares a social video/post link or local video and wants it understood — an Instagram (instagram.com), TikTok (tiktok.com), YouTube or YouTube Shorts (youtube.com/youtu.be), or X/Twitter (x.com/twitter.com) URL, or a local .mp4/.mov file, OR when they say "analyze/watch/transcribe this video/reel/post." Downloads it, transcribes the audio, extracts frames Claude can see, and produces a thorough breakdown.
+---
+
+# Analyze Social (watch a video/post for the user)
+
+## Overview
+
+Turns a social link (or local video) into material you can actually read: sampled
+frames you view with native vision, a full transcript, and the post's caption.
+Then you write a thorough breakdown. A bundled cross-platform Python pipeline does
+the fetching/transcribing; you do the understanding.
+
+## When to use
+
+- The user pastes an **Instagram / TikTok / YouTube / YouTube Shorts / X (Twitter)** URL.
+- The user gives a **local video file** path.
+- The user asks to **watch / analyze / transcribe / summarize** a video, reel, short, or post.
+
+## Workflow
+
+1. **Run the pipeline** (use the Bash tool):
+   ```bash
+   python3 "${CLAUDE_PLUGIN_ROOT}/scripts/analyze.py" "<URL or local file path>"
+   ```
+   - The **first run auto-installs** its tools (yt-dlp, gallery-dl, a Whisper engine, and ffmpeg). Tell the user this one-time setup can take a few minutes and may ask them to approve installs. Subsequent runs are fast.
+   - Useful flags: `--no-transcribe` (frames + caption only, faster), `--target-frames N`, `--lang <code>`.
+
+2. **Read the printed result block.** It ends with `ERROR_CODE`, `BUNDLE`, `TYPE`, `FRAMES`, `TRANSCRIPT`. Handle the error code:
+   - `NEEDS_LOGIN` → the content is private/login-walled. Ask the user: *"Which browser are you logged into that platform on? (chrome / firefox / safari / edge / brave)"* then re-run with `--cookies-from-browser <browser>`. Only do this when the user has opted in — reading browser cookies is sensitive and may trigger a keychain prompt.
+   - `RATE_LIMITED` → tell them to wait a few minutes, or retry with `--cookies-from-browser` to use their logged-in session.
+   - `SETUP_FAILED` → show the `MESSAGE` (usually a one-line command to install ffmpeg) and offer to run it.
+   - `none` → success, continue.
+
+3. **Load the bundle.** Read `manifest.json` (the contract) from the `BUNDLE` path, then:
+   - Read `meta.txt` (caption + metadata) and, if present, `transcript.txt`.
+   - **View the frames** with the Read tool — a spread across the timeline (first, several middle, last) for video; **all slides in order** for an image carousel. You read on-screen text/captions directly from the frames; no separate OCR.
+
+4. **Write the analysis** using the template below.
+
+## Output template (be thorough — a faithful record, not a teaser)
+
+- **Header** — platform · creator handle/name · date · type · duration · engagement (views/likes/comments if present) · URL.
+- **TL;DR** — 1–2 sentences: what this is.
+- **Full transcript — verbatim, everything.** The complete spoken audio word-for-word, with timestamps from `transcript.srt`. Do not summarize or drop lines. The only allowed cleanup is collapsing obvious hallucinated repeat-lines (e.g. "Okay. Okay. Okay."), never real speech. If there's no audio/speech, say so.
+- **On-screen text & visuals (timeline)** — everything visible across the frames: text overlays/captions transcribed, graphics, charts/numbers, b-roll, setting, what the creator is doing, products/profiles shown. For carousels: every slide in order, with its full text + a description of its image.
+- **Caption / description — verbatim** — exactly as posted, including hashtags, @mentions, links.
+- **Beat-by-beat structure** (video) — how it's built, with timestamps: hook → point 1 → … → CTA.
+- **Takeaway & intent** — core message, what it's actually sharing, the goal (educate / sell / lead-gen / entertain), and any explicit CTA.
+- **Notable details** — names, tools, stats, claims, prices, links, anything quotable.
+
+Adapt per `type`: a plain text tweet skips transcript/frames; an image carousel leans on the visuals section.
+
+Then stay open for follow-ups ("just the hook," "list every stat," "summarize in 3 bullets," etc.).
+
+## Notes
+
+- Everything runs locally; nothing is uploaded. Browser cookies are read **only** when the user opts in for login-walled content.
+- TikTok and YouTube are usually public (no login). Instagram posts/carousels and many X posts require being logged in.