Merge pull request #48 from jkawamoto/timeline

jkawamoto · web-flow · commit 580f72c9b2e6 · 2025-09-30T02:41:54.000-06:00
Add transcript fetching with timestamps
diff --git a/manifest.json b/manifest.json
@@ -32,6 +32,10 @@
       "name": "get_transcript",
       "description": "Fetches the transcript of a specified YouTube video."
     },
+    {
+      "name": "get_timed_transcript",
+      "description": "Fetches the transcript of a specified YouTube video with timestamps."
+    },
     {
       "name": "get_video_info",
       "description": "Fetches the metadata of a specified YouTube video."
diff --git a/src/mcp_youtube_transcript/__init__.py b/src/mcp_youtube_transcript/__init__.py
@@ -5,6 +5,8 @@
 #  This software is released under the MIT License.
 #
 #  http://opensource.org/licenses/mit-license.php
+from __future__ import annotations
+
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from datetime import datetime, timedelta
@@ -21,7 +23,7 @@
 from mcp.server import FastMCP
 from mcp.server.fastmcp import Context
 from pydantic import Field, BaseModel
-from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api import YouTubeTranscriptApi, FetchedTranscriptSnippet
 from youtube_transcript_api.proxies import WebshareProxyConfig, GenericProxyConfig, ProxyConfig
 from yt_dlp import YoutubeDL
 from yt_dlp.extractor.youtube import YoutubeIE
@@ -50,6 +52,31 @@ class Transcript(BaseModel):
     next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None)
 
 
+class TranscriptSnippet(BaseModel):
+    """Transcript snippet of a YouTube video."""
+
+    text: str = Field(description="Text of the transcript snippet")
+    start: float = Field(description="The timestamp at which this transcript snippet appears on screen in seconds.")
+    duration: float = Field(description="The duration of how long the snippet in seconds.")
+
+    def __len__(self) -> int:
+        return len(self.model_dump_json())
+
+    @classmethod
+    def from_fetched_transcript_snippet(
+        cls: type[TranscriptSnippet], snippet: FetchedTranscriptSnippet
+    ) -> TranscriptSnippet:
+        return cls(text=snippet.text, start=snippet.start, duration=snippet.duration)
+
+
+class TimedTranscript(BaseModel):
+    """Transcript of a YouTube video with timestamps."""
+
+    title: str = Field(description="Title of the video")
+    snippets: list[TranscriptSnippet] = Field(description="Transcript snippets of the video")
+    next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None)
+
+
 class VideoInfo(BaseModel):
     """Video information."""
 
@@ -68,8 +95,19 @@ def _parse_time_info(date: int, timestamp: int, duration: int) -> Tuple[datetime
     return upload_date, duration_str
 
 
+def _parse_video_id(url: str) -> str:
+    parsed_url = urlparse(url)
+    if parsed_url.hostname == "youtu.be":
+        return parsed_url.path.lstrip("/")
+    else:
+        q = parse_qs(parsed_url.query).get("v")
+        if q is None:
+            raise ValueError(f"couldn't find a video ID from the provided URL: {url}.")
+        return q[0]
+
+
 @lru_cache
-def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, list[str]]:
+def _get_transcript_snippets(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, list[FetchedTranscriptSnippet]]:
     if lang == "en":
         languages = ["en"]
     else:
@@ -83,7 +121,7 @@ def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, lis
     title = soup.title.string if soup.title and soup.title.string else "Transcript"
 
     transcripts = ctx.ytt_api.fetch(video_id, languages=languages)
-    return title, [item.text for item in transcripts]
+    return title, transcripts.snippets
 
 
 @lru_cache
@@ -124,16 +162,9 @@ async def get_transcript(
         next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None),
     ) -> Transcript:
         """Retrieves the transcript of a YouTube video."""
-        parsed_url = urlparse(url)
-        if parsed_url.hostname == "youtu.be":
-            video_id = parsed_url.path.lstrip("/")
-        else:
-            q = parse_qs(parsed_url.query).get("v")
-            if q is None:
-                raise ValueError(f"couldn't find a video ID from the provided URL: {url}.")
-            video_id = q[0]
 
-        title, transcripts = _get_transcript(ctx.request_context.lifespan_context, video_id, lang)
+        title, snippets = _get_transcript_snippets(ctx.request_context.lifespan_context, _parse_video_id(url), lang)
+        transcripts = (item.text for item in snippets)
 
         if response_limit is None or response_limit <= 0:
             return Transcript(title=title, transcript="\n".join(transcripts))
@@ -148,6 +179,34 @@ async def get_transcript(
 
         return Transcript(title=title, transcript=res[:-1], next_cursor=cursor)
 
+    @mcp.tool()
+    async def get_timed_transcript(
+        ctx: Context[ServerSession, AppContext],
+        url: str = Field(description="The URL of the YouTube video"),
+        lang: str = Field(description="The preferred language for the transcript", default="en"),
+        next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None),
+    ) -> TimedTranscript:
+        """Retrieves the transcript of a YouTube video with timestamps."""
+
+        title, snippets = _get_transcript_snippets(ctx.request_context.lifespan_context, _parse_video_id(url), lang)
+
+        if response_limit is None or response_limit <= 0:
+            return TimedTranscript(
+                title=title, snippets=[TranscriptSnippet.from_fetched_transcript_snippet(s) for s in snippets]
+            )
+
+        res = []
+        size = len(title) + 1
+        cursor = None
+        for i, s in islice(enumerate(snippets), int(next_cursor or 0), None):
+            snippet = TranscriptSnippet.from_fetched_transcript_snippet(s)
+            if size + len(snippet) + 1 > response_limit:
+                cursor = str(i)
+                break
+            res.append(snippet)
+
+        return TimedTranscript(title=title, snippets=res, next_cursor=cursor)
+
     @mcp.tool()
     def get_video_info(
         ctx: Context[ServerSession, AppContext],
@@ -159,4 +218,4 @@ def get_video_info(
     return mcp
 
 
-__all__: Final = ["server", "Transcript", "VideoInfo"]
+__all__: Final = ["server", "Transcript", "TimedTranscript", "TranscriptSnippet", "VideoInfo"]
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
@@ -19,7 +19,7 @@
 import yt_dlp
 from yt_dlp.extractor.youtube import YoutubeIE
 
-from mcp_youtube_transcript import Transcript, VideoInfo, _parse_time_info
+from mcp_youtube_transcript import Transcript, VideoInfo, _parse_time_info, TimedTranscript, TranscriptSnippet
 
 
 def fetch_title(url: str, lang: str) -> str:
@@ -37,10 +37,20 @@ async def mcp_client_session() -> AsyncGenerator[ClientSession, None]:
             yield session
 
 
+@pytest.fixture(scope="module")
+async def mcp_client_session_with_response_limit() -> AsyncGenerator[ClientSession, None]:
+    params = StdioServerParameters(command="uv", args=["run", "mcp-youtube-transcript", "--response-limit", "3000"])
+    async with stdio_client(params) as streams:
+        async with ClientSession(streams[0], streams[1]) as session:
+            await session.initialize()
+            yield session
+
+
 @pytest.mark.anyio
 async def test_list_tools(mcp_client_session: ClientSession) -> None:
     res = await mcp_client_session.list_tools()
     assert any(tool.name == "get_transcript" for tool in res.tools)
+    assert any(tool.name == "get_timed_transcript" for tool in res.tools)
     assert any(tool.name == "get_video_info" for tool in res.tools)
 
 
@@ -158,15 +168,6 @@ async def test_get_transcript_with_short_url(mcp_client_session: ClientSession)
     assert not res.isError
 
 
-@pytest.fixture(scope="module")
-async def mcp_client_session_with_response_limit() -> AsyncGenerator[ClientSession, None]:
-    params = StdioServerParameters(command="uv", args=["run", "mcp-youtube-transcript", "--response-limit", "3000"])
-    async with stdio_client(params) as streams:
-        async with ClientSession(streams[0], streams[1]) as session:
-            await session.initialize()
-            yield session
-
-
 @pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
 @pytest.mark.default_cassette("LPZh9BOjkQs.yaml")
 @pytest.mark.vcr
@@ -199,6 +200,157 @@ async def test_get_transcript_with_response_limit(mcp_client_session_with_respon
     assert transcript[:-1] == expect.transcript
 
 
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.default_cassette("LPZh9BOjkQs.yaml")
+@pytest.mark.vcr
+@pytest.mark.anyio
+async def test_get_timed_transcript(mcp_client_session: ClientSession) -> None:
+    video_id = "LPZh9BOjkQs"
+
+    expect = TimedTranscript(
+        title=fetch_title(video_id, "en"),
+        snippets=[TranscriptSnippet.from_fetched_transcript_snippet(s) for s in YouTubeTranscriptApi().fetch(video_id)],
+    )
+
+    res = await mcp_client_session.call_tool(
+        "get_timed_transcript",
+        arguments={"url": f"https://www.youtube.com/watch?v={video_id}"},
+    )
+    assert isinstance(res.content[0], TextContent)
+
+    transcript = TimedTranscript.model_validate_json(res.content[0].text)
+    assert transcript == expect
+    assert not res.isError
+
+
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.default_cassette("WjAXZkQSE2U.yaml")
+@pytest.mark.vcr
+@pytest.mark.anyio
+async def test_get_timed_transcript_with_language(mcp_client_session: ClientSession) -> None:
+    video_id = "WjAXZkQSE2U"
+
+    expect = TimedTranscript(
+        title=fetch_title(video_id, "ja"),
+        snippets=[
+            TranscriptSnippet.from_fetched_transcript_snippet(s) for s in YouTubeTranscriptApi().fetch(video_id, ["ja"])
+        ],
+    )
+
+    res = await mcp_client_session.call_tool(
+        "get_timed_transcript",
+        arguments={"url": f"https://www.youtube.com/watch?v={video_id}", "lang": "ja"},
+    )
+    assert isinstance(res.content[0], TextContent)
+    print(res.content[0].text)
+
+    transcript = TimedTranscript.model_validate_json(res.content[0].text)
+    assert transcript == expect
+    assert not res.isError
+
+
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.default_cassette("LPZh9BOjkQs.yaml")
+@pytest.mark.vcr
+@pytest.mark.anyio
+async def test_get_timed_transcript_fallback_language(
+    mcp_client_session: ClientSession,
+) -> None:
+    video_id = "LPZh9BOjkQs"
+
+    expect = TimedTranscript(
+        title=fetch_title(video_id, "en"),
+        snippets=[TranscriptSnippet.from_fetched_transcript_snippet(s) for s in YouTubeTranscriptApi().fetch(video_id)],
+    )
+
+    res = await mcp_client_session.call_tool(
+        "get_timed_transcript",
+        arguments={
+            "url": f"https://www.youtube.com/watch?v={video_id}",
+            "lang": "unknown",
+        },
+    )
+    assert isinstance(res.content[0], TextContent)
+
+    transcript = TimedTranscript.model_validate_json(res.content[0].text)
+    assert transcript == expect
+    assert not res.isError
+
+
+@pytest.mark.anyio
+async def test_get_timed_transcript_invalid_url(mcp_client_session: ClientSession) -> None:
+    res = await mcp_client_session.call_tool(
+        "get_timed_transcript", arguments={"url": "https://www.youtube.com/watch?vv=abcdefg"}
+    )
+    assert res.isError
+
+
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.default_cassette("error.yaml")
+@pytest.mark.vcr
+@pytest.mark.anyio
+async def test_get_timed_transcript_not_found(mcp_client_session: ClientSession) -> None:
+    res = await mcp_client_session.call_tool(
+        "get_timed_transcript", arguments={"url": "https://www.youtube.com/watch?v=a"}
+    )
+    assert res.isError
+
+
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.default_cassette("LPZh9BOjkQs.yaml")
+@pytest.mark.vcr
+@pytest.mark.anyio
+async def test_get_timed_transcript_with_short_url(mcp_client_session: ClientSession) -> None:
+    video_id = "LPZh9BOjkQs"
+
+    expect = TimedTranscript(
+        title=fetch_title(video_id, "en"),
+        snippets=[TranscriptSnippet.from_fetched_transcript_snippet(s) for s in YouTubeTranscriptApi().fetch(video_id)],
+    )
+
+    res = await mcp_client_session.call_tool(
+        "get_timed_transcript",
+        arguments={"url": f"https://youtu.be/{video_id}"},
+    )
+    assert isinstance(res.content[0], TextContent)
+
+    transcript = TimedTranscript.model_validate_json(res.content[0].text)
+    assert transcript == expect
+    assert not res.isError
+
+
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.default_cassette("LPZh9BOjkQs.yaml")
+@pytest.mark.vcr
+@pytest.mark.anyio
+async def test_get_timed_transcript_with_response_limit(mcp_client_session_with_response_limit: ClientSession) -> None:
+    video_id = "LPZh9BOjkQs"
+
+    expect = TimedTranscript(
+        title=fetch_title(video_id, "en"),
+        snippets=[TranscriptSnippet.from_fetched_transcript_snippet(s) for s in YouTubeTranscriptApi().fetch(video_id)],
+    )
+
+    snippets = []
+    cursor = None
+    while True:
+        res = await mcp_client_session_with_response_limit.call_tool(
+            "get_timed_transcript",
+            arguments={"url": f"https://www.youtube.com/watch?v={video_id}", "next_cursor": cursor},
+        )
+        assert not res.isError
+        assert isinstance(res.content[0], TextContent)
+
+        t = TimedTranscript.model_validate_json(res.content[0].text)
+        snippets.extend(t.snippets)
+        if t.next_cursor is None:
+            break
+        cursor = t.next_cursor
+
+    assert t.title == expect.title
+    assert snippets == expect.snippets
+
+
 @pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
 @pytest.mark.anyio
 async def test_get_video_info(mcp_client_session: ClientSession) -> None: