Skip to content

Commit 7748842

Browse files
committed
refactor: update transcript fetching to use FetchedTranscriptSnippet
- Replaced plain text transcripts with `FetchedTranscriptSnippet` in `_get_transcript_snippets`. - Adjusted transcript parsing to return snippets for improved data handling.
1 parent 9dd29cc commit 7748842

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

src/mcp_youtube_transcript/__init__.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from mcp.server import FastMCP
2222
from mcp.server.fastmcp import Context
2323
from pydantic import Field, BaseModel
24-
from youtube_transcript_api import YouTubeTranscriptApi
24+
from youtube_transcript_api import YouTubeTranscriptApi, FetchedTranscriptSnippet
2525
from youtube_transcript_api.proxies import WebshareProxyConfig, GenericProxyConfig, ProxyConfig
2626
from yt_dlp import YoutubeDL
2727
from yt_dlp.extractor.youtube import YoutubeIE
@@ -69,7 +69,7 @@ def _parse_time_info(date: int, timestamp: int, duration: int) -> Tuple[datetime
6969

7070

7171
@lru_cache
72-
def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, list[str]]:
72+
def _get_transcript_snippets(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, list[FetchedTranscriptSnippet]]:
7373
if lang == "en":
7474
languages = ["en"]
7575
else:
@@ -83,7 +83,7 @@ def _get_transcript(ctx: AppContext, video_id: str, lang: str) -> Tuple[str, lis
8383
title = soup.title.string if soup.title and soup.title.string else "Transcript"
8484

8585
transcripts = ctx.ytt_api.fetch(video_id, languages=languages)
86-
return title, [item.text for item in transcripts]
86+
return title, transcripts.snippets
8787

8888

8989
@lru_cache
@@ -133,7 +133,8 @@ async def get_transcript(
133133
raise ValueError(f"couldn't find a video ID from the provided URL: {url}.")
134134
video_id = q[0]
135135

136-
title, transcripts = _get_transcript(ctx.request_context.lifespan_context, video_id, lang)
136+
title, snippets = _get_transcript_snippets(ctx.request_context.lifespan_context, video_id, lang)
137+
transcripts = (item.text for item in snippets)
137138

138139
if response_limit is None or response_limit <= 0:
139140
return Transcript(title=title, transcript="\n".join(transcripts))

0 commit comments

Comments
 (0)