Merge pull request #31 from jkawamoto/pagination

jkawamoto · web-flow · commit d3637db0deff · 2025-08-11T22:46:25.000-06:00
Add response limit and transcript pagination support
diff --git a/README.md b/README.md
@@ -19,6 +19,7 @@ Fetches the transcript of a specified YouTube video.
 #### Parameters
 - **url** *(string)*: The full URL of the YouTube video. This field is required.
 - **lang** *(string, optional)*: The desired language for the transcript. Defaults to `en` if not specified.
+- **next_cursor** *(string, optional)*: Cursor to retrieve the next page of the transcript.
 
 ## Installation
 > [!NOTE]
@@ -67,6 +68,30 @@ npx -y @smithery/cli list clients
 
 Refer to the [Smithery CLI documentation](https://github.com/smithery-ai/cli) for additional details.
 
+## Response Pagination
+When retrieving transcripts for longer videos, the content may exceed the token size limits of the LLM.
+To avoid this issue, this server splits transcripts that exceed 50,000 characters.
+If a transcript is split, the response will include a `next_cursor`.
+To retrieve the next part, include this `next_cursor`　value in your request.
+
+The token size limits vary depending on the LLM and language you are using. If you need to split responses into smaller chunks, you can adjust this using the `--response-limit`　command line argument. For example, the configuration below splits responses to contain no more than 15,000 characters each:
+
+```json
+{
+  "mcpServers": {
+    "youtube-transcript": {
+      "command": "uvx",
+      "args": [
+        "--from",
+        "git+https://github.com/jkawamoto/mcp-youtube-transcript",
+        "mcp-youtube-transcript",
+        "--response-limit",
+        "15000"
+      ]
+    }
+  }
+}
+```
 
 ## Using Proxy Servers
 In environments where access to YouTube is restricted, you can use proxy servers.
diff --git a/src/mcp_youtube_transcript/__init__.py b/src/mcp_youtube_transcript/__init__.py
@@ -8,6 +8,7 @@
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from functools import lru_cache, partial
+from itertools import islice
 from typing import AsyncIterator, Tuple
 from typing import Final
 from urllib.parse import urlparse, parse_qs
@@ -35,7 +36,7 @@ async def _app_lifespan(_server: FastMCP, proxy_config: ProxyConfig | None) -> A
 
 
 @lru_cache
-def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, str]:
+def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, list[str]]:
     if lang == "en":
         languages = ["en"]
     else:
@@ -49,17 +50,19 @@ def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, str
     title = soup.title.string if soup.title and soup.title.string else "Transcript"
 
     transcripts = ctx.ytt_api.fetch(video_id, languages=languages)
-    return title, "\n".join((item.text for item in transcripts))
+    return title, [item.text for item in transcripts]
 
 
 class Transcript(BaseModel):
     """Transcript of a YouTube video."""
 
     title: str = Field(description="Title of the video")
     transcript: str = Field(description="Transcript of the video")
+    next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None)
 
 
 def server(
+    response_limit: int | None = None,
     webshare_proxy_username: str | None = None,
     webshare_proxy_password: str | None = None,
     http_proxy: str | None = None,
@@ -80,6 +83,7 @@ async def get_transcript(
         ctx: Context,
         url: str = Field(description="The URL of the YouTube video"),
         lang: str = Field(description="The preferred language for the transcript", default="en"),
+        next_cursor: str | None = Field(description="Cursor to retrieve the next page of the transcript", default=None),
     ) -> Transcript:
         """Retrieves the transcript of a YouTube video."""
         parsed_url = urlparse(url)
@@ -92,8 +96,20 @@ async def get_transcript(
             video_id = q[0]
 
         app_ctx: AppContext = ctx.request_context.lifespan_context  # type: ignore
-        title, transcript = _get_transcript(app_ctx, video_id, lang)
-        return Transcript(title=title, transcript=transcript)
+        title, transcripts = _get_transcript(app_ctx, video_id, lang)
+
+        if response_limit is None or response_limit <= 0:
+            return Transcript(title=title, transcript="\n".join(transcripts))
+
+        res = ""
+        cursor = None
+        for i, line in islice(enumerate(transcripts), int(next_cursor or 0), None):
+            if len(res) + len(line) + 1 > response_limit:
+                cursor = str(i)
+                break
+            res += f"{line}\n"
+
+        return Transcript(title=title, transcript=res[:-1], next_cursor=cursor)
 
     return mcp
 
diff --git a/src/mcp_youtube_transcript/cli.py b/src/mcp_youtube_transcript/cli.py
@@ -13,6 +13,12 @@
 
 
 @click.command()
+@click.option(
+    "--response-limit",
+    type=int,
+    help="Maximum number of characters each response contains. Set a negative value to disable pagination.",
+    default=50000,
+)
 @click.option(
     "--webshare-proxy-username",
     metavar="NAME",
@@ -29,6 +35,7 @@
 @click.option("--https-proxy", metavar="URL", envvar="HTTPS_PROXY", help="HTTPS proxy server URL.")
 @click.version_option()
 def main(
+    response_limit: int | None,
     webshare_proxy_username: str | None,
     webshare_proxy_password: str | None,
     http_proxy: str | None,
@@ -40,6 +47,5 @@ def main(
     logger = logging.getLogger(__name__)
 
     logger.info("starting Youtube Transcript MCP server")
-    mcp = server(webshare_proxy_username, webshare_proxy_password, http_proxy, https_proxy)
-    mcp.run()
+    server(response_limit, webshare_proxy_username, webshare_proxy_password, http_proxy, https_proxy).run()
     logger.info("closed Youtube Transcript MCP server")
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
@@ -17,8 +17,6 @@
 
 from mcp_youtube_transcript import Transcript
 
-params = StdioServerParameters(command="uv", args=["run", "mcp-youtube-transcript"])
-
 
 def fetch_title(url: str, lang: str) -> str:
     res = requests.get(f"https://www.youtube.com/watch?v={url}", headers={"Accept-Language": lang})
@@ -28,6 +26,7 @@ def fetch_title(url: str, lang: str) -> str:
 
 @pytest.fixture(scope="module")
 async def mcp_client_session() -> AsyncGenerator[ClientSession, None]:
+    params = StdioServerParameters(command="uv", args=["run", "mcp-youtube-transcript", "--response-limit", "-1"])
     async with stdio_client(params) as streams:
         async with ClientSession(streams[0], streams[1]) as session:
             await session.initialize()
@@ -49,7 +48,8 @@ async def test_get_transcript(mcp_client_session: ClientSession) -> None:
 
     title = fetch_title(video_id, "en")
     expect = Transcript(
-        title=title, transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
+        title=title,
+        transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id))),
     )
 
     res = await mcp_client_session.call_tool(
@@ -72,7 +72,8 @@ async def test_get_transcript_with_language(mcp_client_session: ClientSession) -
 
     title = fetch_title(video_id, "ja")
     expect = Transcript(
-        title=title, transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id, ["ja"])))
+        title=title,
+        transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id, ["ja"]))),
     )
 
     res = await mcp_client_session.call_tool(
@@ -97,7 +98,8 @@ async def test_get_transcript_fallback_language(
 
     title = fetch_title(video_id, "en")
     expect = Transcript(
-        title=title, transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
+        title=title,
+        transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id))),
     )
 
     res = await mcp_client_session.call_tool(
@@ -140,7 +142,8 @@ async def test_get_transcript_with_short_url(mcp_client_session: ClientSession)
 
     title = fetch_title(video_id, "en")
     expect = Transcript(
-        title=title, transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
+        title=title,
+        transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id))),
     )
 
     res = await mcp_client_session.call_tool(
@@ -152,3 +155,44 @@ async def test_get_transcript_with_short_url(mcp_client_session: ClientSession)
     transcript = Transcript.model_validate_json(res.content[0].text)
     assert transcript == expect
     assert not res.isError
+
+
+@pytest.fixture(scope="module")
+async def mcp_client_session_with_response_limit() -> AsyncGenerator[ClientSession, None]:
+    params = StdioServerParameters(command="uv", args=["run", "mcp-youtube-transcript", "--response-limit", "3000"])
+    async with stdio_client(params) as streams:
+        async with ClientSession(streams[0], streams[1]) as session:
+            await session.initialize()
+            yield session
+
+
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping this test on CI")
+@pytest.mark.default_cassette("LPZh9BOjkQs.yaml")
+@pytest.mark.vcr
+@pytest.mark.anyio
+async def test_get_transcript_with_response_limit(mcp_client_session_with_response_limit: ClientSession) -> None:
+    video_id = "LPZh9BOjkQs"
+
+    expect = Transcript(
+        title=fetch_title(video_id, "en"),
+        transcript="\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id))),
+    )
+
+    transcript = ""
+    cursor = None
+    while True:
+        res = await mcp_client_session_with_response_limit.call_tool(
+            "get_transcript",
+            arguments={"url": f"https://www.youtube.com/watch?v={video_id}", "next_cursor": cursor},
+        )
+        assert not res.isError
+        assert isinstance(res.content[0], TextContent)
+
+        t = Transcript.model_validate_json(res.content[0].text)
+        transcript += t.transcript + "\n"
+        if t.next_cursor is None:
+            break
+        cursor = t.next_cursor
+
+    assert t.title == expect.title
+    assert transcript[:-1] == expect.transcript