Skip to content

Commit 7006daa

Browse files
committed
feat: add video title fetching to transcript generation
Integrate video title retrieval using Beautiful Soup and Requests for enhanced transcript outputs. The title is now prepended to the generated transcript, improving context. Adjusted tests accordingly to validate the updated functionality.
1 parent 1784c0a commit 7006daa

File tree

5 files changed

+71
-6
lines changed

5 files changed

+71
-6
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ repos:
2626
hooks:
2727
- id: mypy
2828
args: []
29-
additional_dependencies: ["mcp>=1.3,<1.4", "youtube-transcript-api>=1.0.1", "pytest>=8.3.5", "pytest-mock>=3.14"]
29+
additional_dependencies: ["mcp>=1.3,<1.4", "youtube-transcript-api>=1.0.1", "beautifulsoup4>=4.13.3", "pytest>=8.3.5", "pytest-mock>=3.14", "types-requests>=2.32.0.20250306"]
3030
- repo: local
3131
hooks:
3232
- id: pytest

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ classifiers = [
2222
"Programming Language :: Python :: 3.13",
2323
]
2424
dependencies = [
25+
"beautifulsoup4>=4.13.3",
2526
"click>=8.1.8",
2627
"mcp>=1.3,<1.4",
2728
"pydantic>=2.10.6",
29+
"requests>=2.32.3",
2830
"youtube-transcript-api>=1.0.1",
2931
]
3032

@@ -37,6 +39,7 @@ dev = [
3739
"pre-commit-uv>=4.1.4",
3840
"pytest>=8.3.5",
3941
"pytest-mock>=3.14",
42+
"types-requests>=2.32.0.20250306",
4043
]
4144

4245
[tool.ruff]

src/mcp_youtube_transcript/server.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from urllib.parse import urlparse, parse_qs
1010

11+
import requests
12+
from bs4 import BeautifulSoup
1113
from mcp.server import FastMCP
1214
from pydantic import Field
1315
from youtube_transcript_api import YouTubeTranscriptApi
@@ -52,8 +54,16 @@ def get_transcript(
5254
languages = ["en"]
5355
else:
5456
languages = [lang, "en"]
57+
58+
page = requests.get(
59+
f"https://www.youtube.com/watch?v={video_id}", headers={"Accept-Language": ",".join(languages)}
60+
)
61+
page.raise_for_status()
62+
soup = BeautifulSoup(page.text, "html.parser")
63+
title = soup.title.string if soup.title else ""
64+
5565
transcripts = ytt_api.fetch(video_id, languages=languages)
5666

57-
return "\n".join((item.text for item in transcripts))
67+
return f"# {title}\n" + "\n".join((item.text for item in transcripts))
5868

5969
return mcp

tests/test_mcp.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,21 @@
99
from typing import AsyncGenerator
1010

1111
import pytest
12+
import requests
13+
from bs4 import BeautifulSoup
1214
from mcp import StdioServerParameters, stdio_client, ClientSession
1315
from mcp.types import TextContent
1416
from youtube_transcript_api import YouTubeTranscriptApi
1517

1618
params = StdioServerParameters(command="uv", args=["run", "mcp-youtube-transcript"])
1719

1820

21+
def fetch_title(url: str, lang: str) -> str:
22+
res = requests.get(f"https://www.youtube.com/watch?v={url}", headers={"Accept-Language": lang})
23+
soup = BeautifulSoup(res.text, "html.parser")
24+
return soup.title.string or "" if soup.title else ""
25+
26+
1927
@pytest.fixture(scope="module")
2028
def anyio_backend() -> str:
2129
return "asyncio"
@@ -40,7 +48,8 @@ async def test_list_tools(mcp_client_session: ClientSession) -> None:
4048
async def test_get_transcript(mcp_client_session: ClientSession) -> None:
4149
video_id = "LPZh9BOjkQs"
4250

43-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
51+
title = fetch_title(video_id, "en")
52+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
4453

4554
res = await mcp_client_session.call_tool(
4655
"get_transcript",
@@ -56,7 +65,8 @@ async def test_get_transcript(mcp_client_session: ClientSession) -> None:
5665
async def test_get_transcript_with_language(mcp_client_session: ClientSession) -> None:
5766
video_id = "WjAXZkQSE2U"
5867

59-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id, ["ja"])))
68+
title = fetch_title(video_id, "ja")
69+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id, ["ja"])))
6070

6171
res = await mcp_client_session.call_tool(
6272
"get_transcript",
@@ -74,7 +84,8 @@ async def test_get_transcript_fallback_language(
7484
) -> None:
7585
video_id = "LPZh9BOjkQs"
7686

77-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
87+
title = fetch_title(video_id, "en")
88+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
7889

7990
res = await mcp_client_session.call_tool(
8091
"get_transcript",
@@ -108,7 +119,8 @@ async def test_get_transcript_not_found(mcp_client_session: ClientSession) -> No
108119
async def test_get_transcript_with_short_url(mcp_client_session: ClientSession) -> None:
109120
video_id = "LPZh9BOjkQs"
110121

111-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
122+
title = fetch_title(video_id, "en")
123+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
112124

113125
res = await mcp_client_session.call_tool(
114126
"get_transcript",

uv.lock

Lines changed: 40 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)