Skip to content

Commit cee92d6

Browse files
authored
Merge pull request #13 from jkawamoto/title
Add video title fetching to transcript generation
2 parents 1784c0a + 7006daa commit cee92d6

File tree

5 files changed

+71
-6
lines changed

5 files changed

+71
-6
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ repos:
2626
hooks:
2727
- id: mypy
2828
args: []
29-
additional_dependencies: ["mcp>=1.3,<1.4", "youtube-transcript-api>=1.0.1", "pytest>=8.3.5", "pytest-mock>=3.14"]
29+
additional_dependencies: ["mcp>=1.3,<1.4", "youtube-transcript-api>=1.0.1", "beautifulsoup4>=4.13.3", "pytest>=8.3.5", "pytest-mock>=3.14", "types-requests>=2.32.0.20250306"]
3030
- repo: local
3131
hooks:
3232
- id: pytest

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ classifiers = [
2222
"Programming Language :: Python :: 3.13",
2323
]
2424
dependencies = [
25+
"beautifulsoup4>=4.13.3",
2526
"click>=8.1.8",
2627
"mcp>=1.3,<1.4",
2728
"pydantic>=2.10.6",
29+
"requests>=2.32.3",
2830
"youtube-transcript-api>=1.0.1",
2931
]
3032

@@ -37,6 +39,7 @@ dev = [
3739
"pre-commit-uv>=4.1.4",
3840
"pytest>=8.3.5",
3941
"pytest-mock>=3.14",
42+
"types-requests>=2.32.0.20250306",
4043
]
4144

4245
[tool.ruff]

src/mcp_youtube_transcript/server.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from urllib.parse import urlparse, parse_qs
1010

11+
import requests
12+
from bs4 import BeautifulSoup
1113
from mcp.server import FastMCP
1214
from pydantic import Field
1315
from youtube_transcript_api import YouTubeTranscriptApi
@@ -52,8 +54,16 @@ def get_transcript(
5254
languages = ["en"]
5355
else:
5456
languages = [lang, "en"]
57+
58+
page = requests.get(
59+
f"https://www.youtube.com/watch?v={video_id}", headers={"Accept-Language": ",".join(languages)}
60+
)
61+
page.raise_for_status()
62+
soup = BeautifulSoup(page.text, "html.parser")
63+
title = soup.title.string if soup.title else ""
64+
5565
transcripts = ytt_api.fetch(video_id, languages=languages)
5666

57-
return "\n".join((item.text for item in transcripts))
67+
return f"# {title}\n" + "\n".join((item.text for item in transcripts))
5868

5969
return mcp

tests/test_mcp.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,21 @@
99
from typing import AsyncGenerator
1010

1111
import pytest
12+
import requests
13+
from bs4 import BeautifulSoup
1214
from mcp import StdioServerParameters, stdio_client, ClientSession
1315
from mcp.types import TextContent
1416
from youtube_transcript_api import YouTubeTranscriptApi
1517

1618
params = StdioServerParameters(command="uv", args=["run", "mcp-youtube-transcript"])
1719

1820

21+
def fetch_title(url: str, lang: str) -> str:
22+
res = requests.get(f"https://www.youtube.com/watch?v={url}", headers={"Accept-Language": lang})
23+
soup = BeautifulSoup(res.text, "html.parser")
24+
return soup.title.string or "" if soup.title else ""
25+
26+
1927
@pytest.fixture(scope="module")
2028
def anyio_backend() -> str:
2129
return "asyncio"
@@ -40,7 +48,8 @@ async def test_list_tools(mcp_client_session: ClientSession) -> None:
4048
async def test_get_transcript(mcp_client_session: ClientSession) -> None:
4149
video_id = "LPZh9BOjkQs"
4250

43-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
51+
title = fetch_title(video_id, "en")
52+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
4453

4554
res = await mcp_client_session.call_tool(
4655
"get_transcript",
@@ -56,7 +65,8 @@ async def test_get_transcript(mcp_client_session: ClientSession) -> None:
5665
async def test_get_transcript_with_language(mcp_client_session: ClientSession) -> None:
5766
video_id = "WjAXZkQSE2U"
5867

59-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id, ["ja"])))
68+
title = fetch_title(video_id, "ja")
69+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id, ["ja"])))
6070

6171
res = await mcp_client_session.call_tool(
6272
"get_transcript",
@@ -74,7 +84,8 @@ async def test_get_transcript_fallback_language(
7484
) -> None:
7585
video_id = "LPZh9BOjkQs"
7686

77-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
87+
title = fetch_title(video_id, "en")
88+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
7889

7990
res = await mcp_client_session.call_tool(
8091
"get_transcript",
@@ -108,7 +119,8 @@ async def test_get_transcript_not_found(mcp_client_session: ClientSession) -> No
108119
async def test_get_transcript_with_short_url(mcp_client_session: ClientSession) -> None:
109120
video_id = "LPZh9BOjkQs"
110121

111-
expect = "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
122+
title = fetch_title(video_id, "en")
123+
expect = f"# {title}\n" + "\n".join((item.text for item in YouTubeTranscriptApi().fetch(video_id)))
112124

113125
res = await mcp_client_session.call_tool(
114126
"get_transcript",

uv.lock

Lines changed: 40 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)