@@ -144,7 +144,7 @@ def fetch_branches(owner: str, repo: str):
144144 """Get brancshes of the repository"""
145145
146146 url = f"https://api.github.com/repos/{ owner } /{ repo } /branches"
147- response = requests .get (url , headers = headers )
147+ response = requests .get (url , headers = headers , timeout = ( 30 , 30 ) )
148148
149149 if response .status_code == 404 :
150150 if not token :
@@ -165,7 +165,7 @@ def check_tree(owner: str, repo: str, tree: str):
165165 """Check the repository has the given tree"""
166166
167167 url = f"https://api.github.com/repos/{ owner } /{ repo } /git/trees/{ tree } "
168- response = requests .get (url , headers = headers )
168+ response = requests .get (url , headers = headers , timeout = ( 30 , 30 ) )
169169
170170 return True if response .status_code == 200 else False
171171
@@ -216,7 +216,7 @@ def fetch_contents(path):
216216 url = f"https://api.github.com/repos/{ owner } /{ repo } /contents/{ path } "
217217 params = {"ref" : ref } if ref != None else {}
218218
219- response = requests .get (url , headers = headers , params = params )
219+ response = requests .get (url , headers = headers , params = params , timeout = ( 30 , 30 ) )
220220
221221 if response .status_code == 403 and 'rate limit exceeded' in response .text .lower ():
222222 reset_time = int (response .headers .get ('X-RateLimit-Reset' , 0 ))
@@ -276,7 +276,7 @@ def fetch_contents(path):
276276 # For files, get raw content
277277 if "download_url" in item and item ["download_url" ]:
278278 file_url = item ["download_url" ]
279- file_response = requests .get (file_url , headers = headers )
279+ file_response = requests .get (file_url , headers = headers , timeout = ( 30 , 30 ) )
280280
281281 # Final size check in case content-length header is available but differs from metadata
282282 content_length = int (file_response .headers .get ('content-length' , 0 ))
@@ -292,7 +292,7 @@ def fetch_contents(path):
292292 print (f"Failed to download { rel_path } : { file_response .status_code } " )
293293 else :
294294 # Alternative method if download_url is not available
295- content_response = requests .get (item ["url" ], headers = headers )
295+ content_response = requests .get (item ["url" ], headers = headers , timeout = ( 30 , 30 ) )
296296 if content_response .status_code == 200 :
297297 content_data = content_response .json ()
298298 if content_data .get ("encoding" ) == "base64" and "content" in content_data :
@@ -312,7 +312,19 @@ def fetch_contents(path):
312312 print (f"Failed to get content for { rel_path } : { content_response .status_code } " )
313313
314314 elif item ["type" ] == "dir" :
315- # Recursively process subdirectories
315+ # OLD IMPLEMENTATION (comment this block to test new implementation)
316+ # Always recurse into directories without checking exclusions first
317+ # fetch_contents(item_path)
318+
319+ # NEW IMPLEMENTATION (uncomment this block to test optimized version)
320+ # # Check if directory should be excluded before recursing
321+ if exclude_patterns :
322+ dir_excluded = any (fnmatch .fnmatch (item_path , pattern ) or
323+ fnmatch .fnmatch (rel_path , pattern ) for pattern in exclude_patterns )
324+ if dir_excluded :
325+ continue
326+
327+ # # Only recurse if directory is not excluded
316328 fetch_contents (item_path )
317329
318330 # Start crawling from the specified path
0 commit comments