Merge pull request #37 from jkawamoto/video-info

jkawamoto · web-flow · commit 1597f65c0500 · 2025-08-26T01:54:09.000-06:00
Add `get_video_info` tool and integrate yt-dlp for video handling
diff --git a/README.md b/README.md
@@ -21,6 +21,12 @@ Fetches the transcript of a specified YouTube video.
 - **lang** *(string, optional)*: The desired language for the transcript. Defaults to `en` if not specified.
 - **next_cursor** *(string, optional)*: Cursor to retrieve the next page of the transcript.
 
+### `get_video_info`
+Fetches the metadata of a specified YouTube video.
+
+#### Parameters
+- **url** *(string)*: The full URL of the YouTube video. This field is required.
+
 ## Installation
 > [!NOTE]
 > You'll need [`uv`](https://docs.astral.sh/uv) installed on your system to use `uvx` command.
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "requests>=2.32.3",
     "rich-click>=1.8.8",
     "youtube-transcript-api>=1.1",
+    "yt-dlp>=2025.8.22",
 ]
 
 scripts.mcp-youtube-transcript = "mcp_youtube_transcript.cli:main"
@@ -68,3 +69,9 @@ replace = 'version = "{new_version}"'
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = true
+
+[[tool.mypy.overrides]]
+module = [
+    "yt_dlp.*",
+]
+ignore_missing_imports = true
diff --git a/src/mcp_youtube_transcript/__init__.py b/src/mcp_youtube_transcript/__init__.py
@@ -21,19 +21,39 @@
 from pydantic import Field, BaseModel
 from youtube_transcript_api import YouTubeTranscriptApi
 from youtube_transcript_api.proxies import WebshareProxyConfig, GenericProxyConfig, ProxyConfig
+from yt_dlp import YoutubeDL
+from yt_dlp.extractor.youtube import YoutubeIE
 
 
 @dataclass(frozen=True)
 class AppContext:
     http_client: requests.Session
     ytt_api: YouTubeTranscriptApi
+    dlp: YoutubeDL
 
 
 @asynccontextmanager
 async def _app_lifespan(_server: FastMCP, proxy_config: ProxyConfig | None) -> AsyncIterator[AppContext]:
-    with requests.Session() as http_client:
+    with requests.Session() as http_client, YoutubeDL(params={"quiet": True}, auto_init=False) as dlp:
         ytt_api = YouTubeTranscriptApi(http_client=http_client, proxy_config=proxy_config)
-        yield AppContext(http_client=http_client, ytt_api=ytt_api)
+        dlp.add_info_extractor(YoutubeIE())
+        yield AppContext(http_client=http_client, ytt_api=ytt_api, dlp=dlp)
+
+
+class Transcript(BaseModel):
+    """Transcript of a YouTube video."""
+
+    title: str = Field(description="Title of the video")
+    transcript: str = Field(description="Transcript of the video")
+    next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None)
+
+
+class VideoInfo(BaseModel):
+    """Video information."""
+
+    title: str = Field(description="Title of the video")
+    description: str = Field(description="Description of the video")
+    uploader: str = Field(description="Uploader of the video")
 
 
 @lru_cache
@@ -54,12 +74,10 @@ def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, lis
     return title, [item.text for item in transcripts]
 
 
-class Transcript(BaseModel):
-    """Transcript of a YouTube video."""
-
-    title: str = Field(description="Title of the video")
-    transcript: str = Field(description="Transcript of the video")
-    next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None)
+@lru_cache
+def _get_video_info(ctx: AppContext, video_url: str) -> VideoInfo:
+    res = ctx.dlp.extract_info(video_url, download=False)
+    return VideoInfo(title=res["title"], description=res["description"], uploader=res["uploader"])
 
 
 def server(
@@ -111,7 +129,15 @@ async def get_transcript(
 
         return Transcript(title=title, transcript=res[:-1], next_cursor=cursor)
 
+    @mcp.tool()
+    def get_video_info(
+        ctx: Context[ServerSession, AppContext],
+        url: str = Field(description="The URL of the YouTube video"),
+    ) -> VideoInfo:
+        """Retrieves the video information."""
+        return _get_video_info(ctx.request_context.lifespan_context, url)
+
     return mcp
 
 
-__all__: Final = ["server", "Transcript"]
+__all__: Final = ["server", "Transcript", "VideoInfo"]
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
@@ -14,8 +14,10 @@
 from mcp import StdioServerParameters, stdio_client, ClientSession
 from mcp.types import TextContent
 from youtube_transcript_api import YouTubeTranscriptApi
+import yt_dlp
+from yt_dlp.extractor.youtube import YoutubeIE
 
-from mcp_youtube_transcript import Transcript
+from mcp_youtube_transcript import Transcript, VideoInfo
 
 
 def fetch_title(url: str, lang: str) -> str:
@@ -192,3 +194,26 @@ async def test_get_transcript_with_response_limit(mcp_client_session_with_respon
 
     assert t.title == expect.title
     assert transcript[:-1] == expect.transcript
+
+
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.anyio
+async def test_get_video_info(mcp_client_session: ClientSession) -> None:
+    video_id = "LPZh9BOjkQs"
+
+    dlp = yt_dlp.YoutubeDL(params={"quiet": True}, auto_init=False)
+    dlp.add_info_extractor(YoutubeIE())
+    dlp_res = dlp.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=False)
+    expect = VideoInfo(title=dlp_res["title"], description=dlp_res["description"], uploader=dlp_res["uploader"])
+
+    res = await mcp_client_session.call_tool(
+        "get_video_info",
+        arguments={
+            "url": f"https://www.youtube.com/watch?v={video_id}",
+        },
+    )
+    assert isinstance(res.content[0], TextContent)
+
+    info = VideoInfo.model_validate_json(res.content[0].text)
+    assert info == expect
+    assert not res.isError
diff --git a/uv.lock b/uv.lock