mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-04-30 02:24:25 +02:00
New Experimental Downloader, bug fixes, and small updates
+ Updated cleanup function to remove the entire temporary lecture folder instead of just leaving behind tons of empty folders + Fixed typo in mux function + Segment count is now properly calculated from segment timeline + Manifest is now parsed from the URL instead of being downloaded, this should be better for downloading multiple courses at once. + Fixed a bug where audio content_type would try to find a max quality + New Downloader: Threaded Downloader uses multiple threads to download files, this should improve download speeds greatly. By default, the threaded downloader is not used, you can use the threaded downloader by passing ``--use-threaded-downloader``. By default, it only uses 10 threads, you can set a custom number of threads with the ``--threads`` option
This commit is contained in:
parent
88a411d708
commit
aab19bf66f
16
README.md
16
README.md
@ -68,8 +68,8 @@ You can now run `python main.py` to start downloading. The course will download
|
||||
# Advanced Usage
|
||||
|
||||
```
|
||||
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-d] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets]
|
||||
[--download-captions]
|
||||
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-t THREADS] [-l LANG] [--skip-lectures] [--download-assets]
|
||||
[--download-captions] [--use-threaded-downloader] [-d]
|
||||
|
||||
Udemy Downloader
|
||||
|
||||
@ -79,13 +79,15 @@ optional arguments:
|
||||
The URL of the course to download
|
||||
-b BEARER_TOKEN, --bearer BEARER_TOKEN
|
||||
The Bearer token to use
|
||||
-d, --debug Use test_data.json rather than fetch from the udemy api.
|
||||
-q QUALITY, --quality QUALITY
|
||||
Download specific video quality. (144, 360, 480, 720, 1080)
|
||||
-t THREADS, --threads THREADS
|
||||
Max number of threads to use when using the threaded downloader (default 10)
|
||||
-l LANG, --lang LANG The language to download for captions (Default is en)
|
||||
--skip-lectures If specified, lectures won't be downloaded.
|
||||
--download-assets If specified, lecture assets will be downloaded.
|
||||
--download-captions If specified, captions will be downloaded.
|
||||
--use-threaded-downloader If specified, the experimental threaded downloader will be used
|
||||
```
|
||||
|
||||
- Passing a Bearer Token and Course ID as an argument
|
||||
@ -107,8 +109,12 @@ optional arguments:
|
||||
- `python main.py -c <Course URL> --download-captions -l all` - Downloads all subtitles
|
||||
- etc
|
||||
- Skip downloading lecture videos
|
||||
- `python main.py --skip-lectures --download-captions` - Downloads only captions
|
||||
- `python main.py --skip-lectures --download-assets` - Downloads only assets
|
||||
- `python main.py -c <Course URL> --skip-lectures --download-captions` - Downloads only captions
|
||||
- `python main.py -c <Course URL> --skip-lectures --download-assets` - Downloads only assets
|
||||
- Use threaded downloader
|
||||
- `python main.py -c <Course URL> --use-threaded-downloader`
|
||||
- Use threaded downloader with custom max threads
|
||||
- `python main.py -c <Course URL> --use-threaded-downloader --threads 15`
|
||||
|
||||
# Credits
|
||||
|
||||
|
120
downloader.py
Normal file
120
downloader.py
Normal file
@ -0,0 +1,120 @@
|
||||
import os, threading, requests
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class FileDownloader():
|
||||
"""
|
||||
@source: https://gist.github.com/stefanfortuin/9dbfe8618701507d0ef2b5515b165c5f
|
||||
"""
|
||||
def __init__(self, max_threads=10):
|
||||
print("> Threaded downloader using {} threads.".format(
|
||||
str(max_threads)))
|
||||
self.sema = threading.Semaphore(value=max_threads)
|
||||
self.headers = {
|
||||
'user-agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
|
||||
}
|
||||
self.block_size = 1024
|
||||
|
||||
def t_getfile(self, link, filepath, filename, bar, session):
|
||||
"""
|
||||
Threaded function that uses a semaphore
|
||||
to not instantiate too many threads
|
||||
"""
|
||||
|
||||
self.sema.acquire()
|
||||
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
headers = requests.head(link).headers
|
||||
if 'content-length' not in headers:
|
||||
print(f"server doesn't support content-length for {link}")
|
||||
self.sema.release()
|
||||
return
|
||||
|
||||
total_bytes = int(requests.head(link).headers['content-length'])
|
||||
|
||||
if not bar:
|
||||
bar = tqdm(total=total_bytes,
|
||||
initial=0,
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
desc=filename)
|
||||
self.download_new_file(link, filename, filepath, total_bytes, bar,
|
||||
session)
|
||||
else:
|
||||
current_bytes = os.stat(filepath).st_size
|
||||
|
||||
headers = requests.head(link).headers
|
||||
if 'content-length' not in headers:
|
||||
print(f"server doesn't support content-length for {link}")
|
||||
self.sema.release()
|
||||
return
|
||||
|
||||
total_bytes = int(requests.head(link).headers['content-length'])
|
||||
if not bar:
|
||||
bar = tqdm(total=total_bytes,
|
||||
initial=current_bytes,
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
desc=filename)
|
||||
if current_bytes < total_bytes:
|
||||
self.continue_file_download(link, filename, filepath,
|
||||
current_bytes, total_bytes, bar)
|
||||
else:
|
||||
# print(f"already done: {filename}")
|
||||
if bar.unit == "B":
|
||||
bar.update(self.block_size)
|
||||
else:
|
||||
bar.update(1)
|
||||
|
||||
self.sema.release()
|
||||
|
||||
def download_new_file(self, link, filename, filepath, total_bytes, bar,
|
||||
session):
|
||||
if session == None:
|
||||
try:
|
||||
request = requests.get(link,
|
||||
headers=self.headers,
|
||||
timeout=30,
|
||||
stream=True)
|
||||
self.write_file(request, filepath, 'wb', bar)
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(e)
|
||||
else:
|
||||
request = session.get(link, stream=True)
|
||||
self.write_file(request, filepath, 'wb', bar)
|
||||
|
||||
def continue_file_download(self, link, filename, filepath, current_bytes,
|
||||
total_bytes, bar):
|
||||
range_header = self.headers.copy()
|
||||
range_header['Range'] = f"bytes={current_bytes}-{total_bytes}"
|
||||
|
||||
try:
|
||||
request = requests.get(link,
|
||||
headers=range_header,
|
||||
timeout=30,
|
||||
stream=True)
|
||||
self.write_file(request, filepath, 'ab', bar)
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(e)
|
||||
|
||||
def write_file(self, content, filepath, writemode, bar):
|
||||
with open(filepath, writemode) as f:
|
||||
for chunk in content.iter_content(chunk_size=self.block_size):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
if bar.unit == "B":
|
||||
bar.update(self.block_size)
|
||||
|
||||
# print(f"completed file {filepath}", end='\n')
|
||||
f.close()
|
||||
bar.update(1)
|
||||
|
||||
def get_file(self, link, path, filename, bar=None, session=None):
|
||||
""" Downloads the file"""
|
||||
thread = threading.Thread(target=self.t_getfile,
|
||||
args=(link, path, filename, bar, session))
|
||||
thread.start()
|
||||
return thread
|
304
main.py
304
main.py
@ -5,6 +5,7 @@ from dotenv import load_dotenv
|
||||
from mpegdash.parser import MPEGDASHParser
|
||||
from utils import extract_kid
|
||||
from vtt_to_srt import convert
|
||||
from downloader import FileDownloader
|
||||
|
||||
download_dir = os.path.join(os.getcwd(), "out_dir")
|
||||
working_dir = os.path.join(os.getcwd(), "working_dir")
|
||||
@ -12,6 +13,7 @@ retry = 3
|
||||
home_dir = os.getcwd()
|
||||
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
||||
valid_qualities = [144, 360, 480, 720, 1080]
|
||||
downloader = None
|
||||
|
||||
if not os.path.exists(working_dir):
|
||||
os.makedirs(working_dir)
|
||||
@ -115,13 +117,12 @@ def cleanup(path):
|
||||
@author Jayapraveen
|
||||
"""
|
||||
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
|
||||
mpd_files = glob.glob(path + '/*.mpd', recursive=True)
|
||||
leftover_files = leftover_files + mpd_files
|
||||
for file_list in leftover_files:
|
||||
try:
|
||||
os.remove(file_list)
|
||||
except OSError:
|
||||
print(f"Error deleting file: {file_list}")
|
||||
os.removedirs(path)
|
||||
|
||||
|
||||
def mux_process(video_title, lecture_working_dir, outfile):
|
||||
@ -147,30 +148,30 @@ def decrypt(kid, filename, lecture_working_dir):
|
||||
"""
|
||||
try:
|
||||
key = keyfile[kid.lower()]
|
||||
except KeyError as error:
|
||||
except KeyError:
|
||||
exit("Key not found")
|
||||
if (os.name == "nt"):
|
||||
os.system("mp4decrypt --key 1:{} \"{}\" \"{}\"".format(
|
||||
code = os.system("mp4decrypt --key 1:{0} \"{1}\" \"{2}\"".format(
|
||||
key,
|
||||
os.path.join(lecture_working_dir,
|
||||
"encrypted_{}.mp4".format(filename)),
|
||||
"encrypted_{0}.mp4".format(filename)),
|
||||
os.path.join(lecture_working_dir,
|
||||
"decrypted{}.mp4".format(filename))))
|
||||
"decrypted_{0}.mp4".format(filename))))
|
||||
else:
|
||||
os.system("nice -n 7 mp4decrypt --key 1:{} \"{}\" \"{}\"".format(
|
||||
os.system("nice -n 7 mp4decrypt --key 1:{0} \"{1}\" \"{2}\"".format(
|
||||
key,
|
||||
os.path.join(lecture_working_dir,
|
||||
"encrypted_{}.mp4".format(filename)),
|
||||
"encrypted_{0}.mp4".format(filename)),
|
||||
os.path.join(lecture_working_dir,
|
||||
"decrypted{}.mp4".format(filename))))
|
||||
"decrypted_{0}.mp4".format(filename))))
|
||||
|
||||
|
||||
def handle_irregular_segments(media_info, video_title, lecture_working_dir,
|
||||
output_path):
|
||||
def handle_segments(media_info, video_title, lecture_working_dir, output_path):
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
no_segment, video_url, video_init, video_extension, no_segment, audio_url, audio_init, audio_extension = media_info
|
||||
no_segment += 10 # because the download_media function relies on hittin a 404 to know when to finish
|
||||
download_media("video_0.seg.mp4", video_init, lecture_working_dir)
|
||||
video_kid = extract_kid(
|
||||
os.path.join(lecture_working_dir, "video_0.seg.mp4"))
|
||||
@ -179,7 +180,7 @@ def handle_irregular_segments(media_info, video_title, lecture_working_dir,
|
||||
audio_kid = extract_kid(
|
||||
os.path.join(lecture_working_dir, "audio_0.seg.mp4"))
|
||||
print("KID for audio file is: " + audio_kid)
|
||||
for count in range(1, no_segment):
|
||||
for count in range(1, no_segment + 4):
|
||||
video_segment_url = video_url.replace("$Number$", str(count))
|
||||
audio_segment_url = audio_url.replace("$Number$", str(count))
|
||||
video_status = download_media(
|
||||
@ -214,73 +215,138 @@ def handle_irregular_segments(media_info, video_title, lecture_working_dir,
|
||||
decrypt(audio_kid, "audio", lecture_working_dir)
|
||||
os.chdir(home_dir)
|
||||
mux_process(video_title, lecture_working_dir, output_path)
|
||||
cleanup(lecture_working_dir)
|
||||
break
|
||||
|
||||
|
||||
def manifest_parser(mpd_url):
|
||||
def handle_segments_threaded(media_info, video_title, lecture_working_dir,
|
||||
output_path):
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
no_segment, video_url, video_init, video_extension, no_segment, audio_url, audio_init, audio_extension = media_info
|
||||
download_media("video_0.seg.mp4", video_init, lecture_working_dir)
|
||||
video_kid = extract_kid(
|
||||
os.path.join(lecture_working_dir, "video_0.seg.mp4"))
|
||||
print("KID for video file is: " + video_kid)
|
||||
download_media("audio_0.seg.mp4", audio_init, lecture_working_dir)
|
||||
audio_kid = extract_kid(
|
||||
os.path.join(lecture_working_dir, "audio_0.seg.mp4"))
|
||||
print("KID for audio file is: " + audio_kid)
|
||||
|
||||
vbar = tqdm(total=no_segment,
|
||||
initial=1,
|
||||
unit='Video Segments',
|
||||
desc=video_title + " (Video)")
|
||||
abar = tqdm(total=no_segment,
|
||||
initial=1,
|
||||
unit='Audio Segments',
|
||||
desc=video_title + " (Audio)")
|
||||
|
||||
threads = []
|
||||
|
||||
for count in range(1, no_segment):
|
||||
video_filename = f"video_{str(count)}.seg.{video_extension}"
|
||||
video_path = os.path.join(lecture_working_dir, video_filename)
|
||||
video_segment_url = video_url.replace("$Number$", str(count))
|
||||
video = downloader.get_file(video_segment_url, video_path,
|
||||
video_filename, vbar)
|
||||
threads.append(video)
|
||||
|
||||
for count in range(1, no_segment):
|
||||
audio_filename = f"audio_{str(count)}.seg.{audio_extension}"
|
||||
audio_path = os.path.join(lecture_working_dir, audio_filename)
|
||||
audio_segment_url = audio_url.replace("$Number$", str(count))
|
||||
audio = downloader.get_file(audio_segment_url, audio_path,
|
||||
audio_filename, abar)
|
||||
threads.append(audio)
|
||||
|
||||
for x in threads:
|
||||
x.join()
|
||||
|
||||
os.chdir(lecture_working_dir)
|
||||
if os.name == "nt":
|
||||
video_concat_command = "copy /b " + "+".join(
|
||||
[f"video_{i}.seg.{video_extension}"
|
||||
for i in range(0, count)]) + " encrypted_video.mp4"
|
||||
audio_concat_command = "copy /b " + "+".join(
|
||||
[f"audio_{i}.seg.{audio_extension}"
|
||||
for i in range(0, count)]) + " encrypted_audio.mp4"
|
||||
else:
|
||||
video_concat_command = "cat " + " ".join(
|
||||
[f"video_{i}.seg.{video_extension}"
|
||||
for i in range(0, count)]) + " > encrypted_video.mp4"
|
||||
audio_concat_command = "cat " + " ".join(
|
||||
[f"audio_{i}.seg.{audio_extension}"
|
||||
for i in range(0, count)]) + " > encrypted_audio.mp4"
|
||||
os.system(video_concat_command)
|
||||
os.system(audio_concat_command)
|
||||
decrypt(video_kid, "video", lecture_working_dir)
|
||||
decrypt(audio_kid, "audio", lecture_working_dir)
|
||||
os.chdir(home_dir)
|
||||
mux_process(video_title, lecture_working_dir, output_path)
|
||||
cleanup(lecture_working_dir)
|
||||
|
||||
|
||||
def manifest_parser(mpd_url, quality):
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
video = []
|
||||
audio = []
|
||||
manifest = requests.get(mpd_url).text
|
||||
with open("manifest.mpd", 'w') as manifest_handler:
|
||||
manifest_handler.write(manifest)
|
||||
mpd = MPEGDASHParser.parse("./manifest.mpd")
|
||||
running_time = durationtoseconds(mpd.media_presentation_duration)
|
||||
mpd = MPEGDASHParser.parse(mpd_url)
|
||||
for period in mpd.periods:
|
||||
for adapt_set in period.adaptation_sets:
|
||||
print("Processing " + adapt_set.mime_type)
|
||||
content_type = adapt_set.mime_type
|
||||
if quality and content_type == "video/mp4":
|
||||
print(adapt_set.representations[0].height, quality)
|
||||
repr = next((x for x in adapt_set.representations
|
||||
if x.height == quality), None)
|
||||
if not repr:
|
||||
qualities = []
|
||||
for rep in adapt_set.representations:
|
||||
qualities.append(rep.height)
|
||||
print(quality, qualities)
|
||||
if quality < qualities[0]:
|
||||
# they want a lower quality than whats available
|
||||
repr = adapt_set.representations[0] # Lowest Quality
|
||||
elif quality > qualities[-1]:
|
||||
# they want a higher quality than whats available
|
||||
repr = adapt_set.representations[-1] # Max Quality
|
||||
print(
|
||||
"> Could not find video with requested quality, falling back to closest!"
|
||||
)
|
||||
print("> Using quality of %s" % repr.height)
|
||||
if content_type == "video/mp4":
|
||||
if quality:
|
||||
repr = next((x for x in adapt_set.representations
|
||||
if x.height == quality), None)
|
||||
if not repr:
|
||||
qualities = []
|
||||
for rep in adapt_set.representations:
|
||||
qualities.append(rep.height)
|
||||
if quality < qualities[0]:
|
||||
# they want a lower quality than whats available
|
||||
repr = adapt_set.representations[
|
||||
0] # Lowest Quality
|
||||
elif quality > qualities[-1]:
|
||||
# they want a higher quality than whats available
|
||||
repr = adapt_set.representations[-1] # Max Quality
|
||||
print(
|
||||
"> Could not find video with requested quality, falling back to closest!"
|
||||
)
|
||||
print("> Using quality of %s" % repr.height)
|
||||
else:
|
||||
print("> Found MPD representation with quality %s" %
|
||||
repr.height)
|
||||
else:
|
||||
print("> Found MPD representation with quality %s" %
|
||||
repr.height)
|
||||
else:
|
||||
repr = adapt_set.representations[-1] # Max Quality
|
||||
print("> Using max quality of %s" % repr.height)
|
||||
for segment in repr.segment_templates:
|
||||
if (segment.duration):
|
||||
print("Media segments are of equal timeframe")
|
||||
segment_time = segment.duration / segment.timescale
|
||||
total_segments = running_time / segment_time
|
||||
else:
|
||||
print("Media segments are of inequal timeframe")
|
||||
repr = adapt_set.representations[-1] # Max Quality
|
||||
print("> Using max quality of %s" % repr.height)
|
||||
segment_count = 0
|
||||
|
||||
segment = repr.segment_templates[0]
|
||||
timeline = segment.segment_timelines[0]
|
||||
segment_count += len(timeline.Ss)
|
||||
for s in timeline.Ss:
|
||||
if s.r:
|
||||
segment_count += s.r
|
||||
|
||||
print("Expected No of segments:", segment_count)
|
||||
if (content_type == "audio/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
audio.append(segment_count)
|
||||
audio.append(segment.media)
|
||||
audio.append(segment.initialization)
|
||||
audio.append(segment_extension)
|
||||
elif (content_type == "video/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
video.append(segment_count)
|
||||
video.append(segment.media)
|
||||
video.append(segment.initialization)
|
||||
video.append(segment_extension)
|
||||
|
||||
approx_no_segments = round(
|
||||
running_time /
|
||||
6) + 10 # aproximate of 6 sec per segment
|
||||
print("Expected No of segments:", approx_no_segments)
|
||||
if (content_type == "audio/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
audio.append(approx_no_segments)
|
||||
audio.append(segment.media)
|
||||
audio.append(segment.initialization)
|
||||
audio.append(segment_extension)
|
||||
elif (content_type == "video/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
video.append(approx_no_segments)
|
||||
video.append(segment.media)
|
||||
video.append(segment.initialization)
|
||||
video.append(segment_extension)
|
||||
return video + audio
|
||||
|
||||
|
||||
@ -316,6 +382,8 @@ def process_caption(caption,
|
||||
lecture_index,
|
||||
lecture_title,
|
||||
lecture_dir,
|
||||
use_threaded_downloader,
|
||||
threads,
|
||||
tries=0):
|
||||
filename = f"%s. %s_%s.%s" % (lecture_index, sanitize(lecture_title),
|
||||
caption.get("locale_id"), caption.get("ext"))
|
||||
@ -328,7 +396,12 @@ def process_caption(caption,
|
||||
else:
|
||||
print(f"> Downloading captions: '%s'" % filename)
|
||||
try:
|
||||
download(caption.get("url"), filepath, filename)
|
||||
if use_threaded_downloader:
|
||||
thread = downloader.get_file(caption.get("url"), filepath,
|
||||
filename)
|
||||
thread.join()
|
||||
else:
|
||||
download(caption.get("url"), filepath, filename)
|
||||
except Exception as e:
|
||||
if tries >= 3:
|
||||
print(
|
||||
@ -340,7 +413,8 @@ def process_caption(caption,
|
||||
f"> Error downloading captions: {e}. Will retry {3-tries} more times."
|
||||
)
|
||||
process_caption(caption, lecture_index, lecture_title,
|
||||
lecture_dir, tries + 1)
|
||||
lecture_dir, use_threaded_downloader, threads,
|
||||
tries + 1)
|
||||
if caption.get("ext") == "vtt":
|
||||
try:
|
||||
print("> Converting captions to SRT format...")
|
||||
@ -352,7 +426,8 @@ def process_caption(caption,
|
||||
|
||||
|
||||
def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
||||
skip_lectures, dl_assets, dl_captions, caption_locale):
|
||||
skip_lectures, dl_assets, dl_captions, caption_locale,
|
||||
use_threaded_downloader):
|
||||
lecture_title = lecture["title"]
|
||||
lecture_asset = lecture["asset"]
|
||||
if not skip_lectures:
|
||||
@ -371,7 +446,12 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
||||
|
||||
if not os.path.isfile(lecture_path):
|
||||
try:
|
||||
download(lecture_url, lecture_path, lecture_title)
|
||||
if use_threaded_downloader:
|
||||
thread = downloader.get_file(lecture_url, lecture_path,
|
||||
lecture_title)
|
||||
thread.join()
|
||||
else:
|
||||
download(lecture_url, lecture_path, lecture_title)
|
||||
except Exception as e:
|
||||
# We could add a retry here
|
||||
print(f"> Error downloading lecture: {e}. Skipping...")
|
||||
@ -396,10 +476,13 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
||||
"> Couldn't find dash url for lecture '%s', skipping...",
|
||||
lecture_title)
|
||||
return
|
||||
media_info = manifest_parser(mpd_url)
|
||||
handle_irregular_segments(media_info, lecture_title,
|
||||
lecture_working_dir, lecture_path)
|
||||
cleanup(lecture_working_dir)
|
||||
media_info = manifest_parser(mpd_url, quality)
|
||||
if use_threaded_downloader:
|
||||
handle_segments_threaded(media_info, lecture_title,
|
||||
lecture_working_dir, lecture_path)
|
||||
else:
|
||||
handle_segments(media_info, lecture_title,
|
||||
lecture_working_dir, lecture_path)
|
||||
else:
|
||||
print("> Lecture '%s' is already downloaded, skipping..." %
|
||||
lecture_title)
|
||||
@ -418,9 +501,16 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
||||
if x["label"] == "download"), None)
|
||||
if download_url:
|
||||
try:
|
||||
download(download_url,
|
||||
os.path.join(lecture_dir, asset_filename),
|
||||
asset_filename)
|
||||
if use_threaded_downloader:
|
||||
thread = downloader.get_file(
|
||||
download_url,
|
||||
os.path.join(lecture_dir, asset_filename),
|
||||
asset_filename)
|
||||
thread.join()
|
||||
else:
|
||||
download(download_url,
|
||||
os.path.join(lecture_dir, asset_filename),
|
||||
asset_filename)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"> Error downloading lecture asset: {e}. Skipping"
|
||||
@ -472,11 +562,12 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
||||
})
|
||||
|
||||
for caption in captions:
|
||||
process_caption(caption, lecture_index, lecture_title, lecture_dir)
|
||||
process_caption(caption, lecture_index, lecture_title, lecture_dir,
|
||||
use_threaded_downloader)
|
||||
|
||||
|
||||
def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
||||
quality, caption_locale):
|
||||
quality, caption_locale, use_threaded_downloader):
|
||||
course_dir = os.path.join(download_dir, course_name)
|
||||
if not os.path.exists(course_dir):
|
||||
os.mkdir(course_dir)
|
||||
@ -498,9 +589,18 @@ def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
||||
lecture_path = os.path.join(
|
||||
course_dir, "{}. {}.mp4".format(lecture_index,
|
||||
sanitize(obj["title"])))
|
||||
process_lecture(obj, lecture_index, lecture_path, download_dir,
|
||||
quality, skip_lectures, dl_assets, dl_captions,
|
||||
caption_locale)
|
||||
process_lecture(
|
||||
obj,
|
||||
lecture_index,
|
||||
lecture_path,
|
||||
download_dir,
|
||||
quality,
|
||||
skip_lectures,
|
||||
dl_assets,
|
||||
dl_captions,
|
||||
caption_locale,
|
||||
use_threaded_downloader,
|
||||
)
|
||||
|
||||
for chapter in chapters:
|
||||
chapter_dir = os.path.join(
|
||||
@ -516,7 +616,7 @@ def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
||||
sanitize(lecture["title"])))
|
||||
process_lecture(lecture, lecture_index, lecture_path, chapter_dir,
|
||||
quality, skip_lectures, dl_assets, dl_captions,
|
||||
caption_locale)
|
||||
caption_locale, use_threaded_downloader)
|
||||
print("\n\n\n\n\n\n\n\n=====================")
|
||||
print("All downloads completed for course!")
|
||||
print("=====================")
|
||||
@ -571,13 +671,6 @@ if __name__ == "__main__":
|
||||
type=str,
|
||||
help="The Bearer token to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--debug",
|
||||
dest="debug",
|
||||
action="store_true",
|
||||
help="Use test_data.json rather than fetch from the udemy api.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-q",
|
||||
"--quality",
|
||||
@ -585,6 +678,14 @@ if __name__ == "__main__":
|
||||
type=int,
|
||||
help="Download specific video quality. (144, 360, 480, 720, 1080)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--threads",
|
||||
dest="threads",
|
||||
type=int,
|
||||
help=
|
||||
"Max number of threads to use when using the threaded downloader (default 10)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--lang",
|
||||
@ -610,6 +711,19 @@ if __name__ == "__main__":
|
||||
action="store_true",
|
||||
help="If specified, captions will be downloaded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-threaded-downloader",
|
||||
dest="use_threaded_downloader",
|
||||
action="store_true",
|
||||
help="If specified, the experimental threaded downloader will be used",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--debug",
|
||||
dest="debug",
|
||||
action="store_true",
|
||||
help="Use test_data.json rather than fetch from the udemy api.",
|
||||
)
|
||||
|
||||
dl_assets = False
|
||||
skip_lectures = False
|
||||
@ -619,6 +733,8 @@ if __name__ == "__main__":
|
||||
bearer_token = None
|
||||
portal_name = None
|
||||
course_name = None
|
||||
use_threaded_downloader = False
|
||||
threads = 10
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.download_assets:
|
||||
@ -635,6 +751,11 @@ if __name__ == "__main__":
|
||||
sys.exit(1)
|
||||
else:
|
||||
quality = args.quality
|
||||
if args.use_threaded_downloader:
|
||||
use_threaded_downloader = args.use_threaded_downloader
|
||||
if args.threads:
|
||||
threads = args.threads
|
||||
downloader = FileDownloader(max_threads=threads)
|
||||
|
||||
load_dotenv()
|
||||
if args.bearer_token:
|
||||
@ -693,7 +814,8 @@ if __name__ == "__main__":
|
||||
course_data = json.loads(f.read())
|
||||
parse(course_data["results"], course_id, course_name,
|
||||
skip_lectures, dl_assets, dl_captions, quality,
|
||||
caption_locale)
|
||||
caption_locale, use_threaded_downloader)
|
||||
else:
|
||||
parse(course_data["results"], course_id, course_name, skip_lectures,
|
||||
dl_assets, dl_captions, quality, caption_locale)
|
||||
dl_assets, dl_captions, quality, caption_locale,
|
||||
use_threaded_downloader)
|
||||
|
Loading…
x
Reference in New Issue
Block a user