mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-31 07:00:14 +02:00
Bug fixes
- Fixed captions not being downloaded - Fixed trying to load keyfile even if it doesn't exist - Moved asset and subtitle download processing into lecture processing function (in preparation of subtitle merging) - Fixed an error in ffmpeg command when not using h265 - no longer need to specify full path to UdemyDownloader.py, also updated readme to reflect this
This commit is contained in:
parent
ec6ac28d0b
commit
f3a32a2dd6
42
README.md
42
README.md
@ -104,41 +104,41 @@ optional arguments:
|
|||||||
```
|
```
|
||||||
|
|
||||||
- Passing a Bearer Token and Course ID as an argument
|
- Passing a Bearer Token and Course ID as an argument
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> -b <Bearer Token>`
|
- `python udemy_downloader -c <Course URL> -b <Bearer Token>`
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c https://www.udemy.com/courses/myawesomecourse -b <Bearer Token>`
|
- `python udemy_downloader -c https://www.udemy.com/courses/myawesomecourse -b <Bearer Token>`
|
||||||
- Download a specific quality
|
- Download a specific quality
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> -q 720`
|
- `python udemy_downloader -c <Course URL> -q 720`
|
||||||
- Download assets along with lectures
|
- Download assets along with lectures
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-assets`
|
- `python udemy_downloader -c <Course URL> --download-assets`
|
||||||
- Download assets and specify a quality
|
- Download assets and specify a quality
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> -q 360 --download-assets`
|
- `python udemy_downloader -c <Course URL> -q 360 --download-assets`
|
||||||
- Download captions (Defaults to English)
|
- Download captions (Defaults to English)
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-captions`
|
- `python udemy_downloader -c <Course URL> --download-captions`
|
||||||
- Download captions with specific language
|
- Download captions with specific language
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-captions -l en` - English subtitles
|
- `python udemy_downloader -c <Course URL> --download-captions -l en` - English subtitles
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-captions -l es` - Spanish subtitles
|
- `python udemy_downloader -c <Course URL> --download-captions -l es` - Spanish subtitles
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-captions -l it` - Italian subtitles
|
- `python udemy_downloader -c <Course URL> --download-captions -l it` - Italian subtitles
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-captions -l pl` - Polish Subtitles
|
- `python udemy_downloader -c <Course URL> --download-captions -l pl` - Polish Subtitles
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-captions -l all` - Downloads all subtitles
|
- `python udemy_downloader -c <Course URL> --download-captions -l all` - Downloads all subtitles
|
||||||
- etc
|
- etc
|
||||||
- Skip downloading lecture videos
|
- Skip downloading lecture videos
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --skip-lectures --download-captions` - Downloads only captions
|
- `python udemy_downloader -c <Course URL> --skip-lectures --download-captions` - Downloads only captions
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --skip-lectures --download-assets` - Downloads only assets
|
- `python udemy_downloader -c <Course URL> --skip-lectures --download-assets` - Downloads only assets
|
||||||
- Keep .VTT caption files:
|
- Keep .VTT caption files:
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --download-captions --keep-vtt`
|
- `python udemy_downloader -c <Course URL> --download-captions --keep-vtt`
|
||||||
- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
|
- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --skip-hls`
|
- `python udemy_downloader -c <Course URL> --skip-hls`
|
||||||
- Print course information only:
|
- Print course information only:
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --info`
|
- `python udemy_downloader -c <Course URL> --info`
|
||||||
- Specify max number of concurrent downloads:
|
- Specify max number of concurrent downloads:
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --concurrent-downloads 20`
|
- `python udemy_downloader -c <Course URL> --concurrent-downloads 20`
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> -cd 20`
|
- `python udemy_downloader -c <Course URL> -cd 20`
|
||||||
- Encode in H.265:
|
- Encode in H.265:
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --use-h265`
|
- `python udemy_downloader -c <Course URL> --use-h265`
|
||||||
- Encode in H.265 with custom CRF:
|
- Encode in H.265 with custom CRF:
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --use-h265 -h265-crf 20`
|
- `python udemy_downloader -c <Course URL> --use-h265 -h265-crf 20`
|
||||||
- Encode in H.265 with custom preset:
|
- Encode in H.265 with custom preset:
|
||||||
- `python udemy_downloader\UdemyDownloader.py -c <Course URL> --use-h265 --h265-preset faster`
|
- `python udemy_downloader -c <Course URL> --use-h265 --h265-preset faster`
|
||||||
|
|
||||||
# Credits
|
# Credits
|
||||||
|
|
||||||
|
@ -51,6 +51,7 @@ _udemy_path = os.path.join(saved_dir, "_udemy.json")
|
|||||||
|
|
||||||
udemy = None
|
udemy = None
|
||||||
parser = None
|
parser = None
|
||||||
|
iknowwhatimdoing = False
|
||||||
retry = 3
|
retry = 3
|
||||||
_udemy = {}
|
_udemy = {}
|
||||||
course_url = None
|
course_url = None
|
||||||
@ -82,6 +83,7 @@ use_h265 = False
|
|||||||
h265_crf = 28
|
h265_crf = 28
|
||||||
h265_preset = "medium"
|
h265_preset = "medium"
|
||||||
|
|
||||||
|
|
||||||
def download_segments(url, format_id, video_title, output_path, lecture_file_name, chapter_dir):
|
def download_segments(url, format_id, video_title, output_path, lecture_file_name, chapter_dir):
|
||||||
os.chdir(os.path.join(chapter_dir))
|
os.chdir(os.path.join(chapter_dir))
|
||||||
file_name = lecture_file_name.replace("%", "").replace(".mp4", "")
|
file_name = lecture_file_name.replace("%", "").replace(".mp4", "")
|
||||||
@ -103,7 +105,6 @@ def download_segments(url, format_id, video_title, output_path, lecture_file_nam
|
|||||||
print("Return code from the downloader was non-0 (error), skipping!")
|
print("Return code from the downloader was non-0 (error), skipping!")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
# tries to decrypt audio and video, and then merge them
|
# tries to decrypt audio and video, and then merge them
|
||||||
try:
|
try:
|
||||||
# tries to decrypt audio
|
# tries to decrypt audio
|
||||||
@ -113,9 +114,11 @@ def download_segments(url, format_id, video_title, output_path, lecture_file_nam
|
|||||||
audio_key = keys[audio_kid.lower()]
|
audio_key = keys[audio_kid.lower()]
|
||||||
|
|
||||||
print("> Decrypting audio...")
|
print("> Decrypting audio...")
|
||||||
ret_code = decrypt(audio_key, audio_filepath_enc, audio_filepath_dec)
|
ret_code = decrypt(
|
||||||
|
audio_key, audio_filepath_enc, audio_filepath_dec)
|
||||||
if(ret_code != 0):
|
if(ret_code != 0):
|
||||||
print("WARN: Decrypting returned a non-0 result code which usually indicated an error!")
|
print(
|
||||||
|
"WARN: Decrypting returned a non-0 result code which usually indicated an error!")
|
||||||
else:
|
else:
|
||||||
print("Decryption complete")
|
print("Decryption complete")
|
||||||
except KeyError:
|
except KeyError:
|
||||||
@ -129,28 +132,31 @@ def download_segments(url, format_id, video_title, output_path, lecture_file_nam
|
|||||||
video_key = keys[video_kid.lower()]
|
video_key = keys[video_kid.lower()]
|
||||||
|
|
||||||
print("> Decrypting video...")
|
print("> Decrypting video...")
|
||||||
ret_code2 = decrypt(video_key, video_filepath_enc, video_filepath_dec)
|
ret_code2 = decrypt(
|
||||||
|
video_key, video_filepath_enc, video_filepath_dec)
|
||||||
if(ret_code2 != 0):
|
if(ret_code2 != 0):
|
||||||
print("WARN: Decrypting returned a non-0 result code which usually indicated an error!")
|
print(
|
||||||
|
"WARN: Decrypting returned a non-0 result code which usually indicated an error!")
|
||||||
else:
|
else:
|
||||||
print("Decryption complete")
|
print("Decryption complete")
|
||||||
except KeyError:
|
except KeyError:
|
||||||
print("Video key not found!")
|
print("Video key not found!")
|
||||||
raise RuntimeError("No video key")
|
raise RuntimeError("No video key")
|
||||||
|
|
||||||
|
|
||||||
# tries to merge audio and video
|
# tries to merge audio and video
|
||||||
# this should run only if both audio and video decryption returned 0 codes
|
# this should run only if both audio and video decryption returned 0 codes
|
||||||
print("> Merging audio and video files...")
|
print("> Merging audio and video files...")
|
||||||
ret_code3 = merge(video_title=video_title, video_filepath=video_filepath_dec, audio_filepath=audio_filepath_dec, output_path=output_path, use_h265=use_h265, h265_crf=h265_crf, h265_preset=h265_preset)
|
ret_code3 = merge(video_title=video_title, video_filepath=video_filepath_dec, audio_filepath=audio_filepath_dec,
|
||||||
|
output_path=output_path, use_h265=use_h265, h265_crf=h265_crf, h265_preset=h265_preset)
|
||||||
if(ret_code3 != 0):
|
if(ret_code3 != 0):
|
||||||
print("WARN: Merging returned a non-0 result code which usually indicated an error!")
|
print(
|
||||||
|
"WARN: Merging returned a non-0 result code which usually indicated an error!")
|
||||||
|
|
||||||
if(ret_code == 0 and ret_code2 == 0 and ret_code3 == 0):
|
if(ret_code == 0 and ret_code2 == 0 and ret_code3 == 0):
|
||||||
print("> Cleaning up...")
|
print("> Cleaning up...")
|
||||||
# remove all the temporary files left over after decryption and merging if there were no errors
|
# remove all the temporary files left over after decryption and merging if there were no errors
|
||||||
remove_files((video_filepath_enc, video_filepath_dec, audio_filepath_enc, audio_filepath_dec))
|
remove_files((video_filepath_enc, video_filepath_dec,
|
||||||
|
audio_filepath_enc, audio_filepath_dec))
|
||||||
print("> Cleanup complete")
|
print("> Cleanup complete")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
@ -200,7 +206,7 @@ def download_aria(url, file_dir, filename):
|
|||||||
print("Return code: " + str(ret_code))
|
print("Return code: " + str(ret_code))
|
||||||
|
|
||||||
|
|
||||||
def process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries=0):
|
def process_caption(caption, lecture_title, lecture_dir, tries=0):
|
||||||
filename = f"%s_%s.%s" % (sanitize(lecture_title), caption.get("language"),
|
filename = f"%s_%s.%s" % (sanitize(lecture_title), caption.get("language"),
|
||||||
caption.get("extension"))
|
caption.get("extension"))
|
||||||
filename_no_ext = f"%s_%s" % (sanitize(lecture_title),
|
filename_no_ext = f"%s_%s" % (sanitize(lecture_title),
|
||||||
@ -241,127 +247,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
|
|||||||
is_encrypted = lecture.get("is_encrypted")
|
is_encrypted = lecture.get("is_encrypted")
|
||||||
lecture_sources = lecture.get("video_sources")
|
lecture_sources = lecture.get("video_sources")
|
||||||
|
|
||||||
if is_encrypted:
|
|
||||||
if len(lecture_sources) > 0:
|
|
||||||
source = lecture_sources[-1] # last index is the best quality
|
|
||||||
if isinstance(quality, int):
|
|
||||||
source = min(
|
|
||||||
lecture_sources,
|
|
||||||
key=lambda x: abs(int(x.get("height")) - quality))
|
|
||||||
print(f" > Lecture '%s' has DRM, attempting to download" %
|
|
||||||
lecture_title)
|
|
||||||
download_segments(source.get("download_url"),
|
|
||||||
source.get(
|
|
||||||
"format_id"), lecture_title, lecture_path, lecture_file_name, chapter_dir)
|
|
||||||
else:
|
|
||||||
print(f" > Lecture '%s' is missing media links" %
|
|
||||||
lecture_title)
|
|
||||||
print(len(lecture_sources))
|
|
||||||
else:
|
|
||||||
sources = lecture.get("sources")
|
|
||||||
sources = sorted(sources,
|
|
||||||
key=lambda x: int(x.get("height")),
|
|
||||||
reverse=True)
|
|
||||||
if sources:
|
|
||||||
if not os.path.isfile(lecture_path):
|
|
||||||
print(
|
|
||||||
" > Lecture doesn't have DRM, attempting to download..."
|
|
||||||
)
|
|
||||||
source = sources[0] # first index is the best quality
|
|
||||||
if isinstance(quality, int):
|
|
||||||
source = min(
|
|
||||||
sources,
|
|
||||||
key=lambda x: abs(int(x.get("height")) - quality))
|
|
||||||
try:
|
|
||||||
print(" ====== Selected quality: ",
|
|
||||||
source.get("type"), source.get("height"))
|
|
||||||
url = source.get("download_url")
|
|
||||||
source_type = source.get("type")
|
|
||||||
if source_type == "hls":
|
|
||||||
temp_filepath = lecture_path.replace(
|
|
||||||
".mp4", ".%(ext)s")
|
|
||||||
ret_code = subprocess.Popen([
|
|
||||||
"yt-dlp", "--force-generic-extractor",
|
|
||||||
"--concurrent-fragments",
|
|
||||||
f"{concurrent_connections}", "--downloader",
|
|
||||||
"aria2c", "-o", f"{temp_filepath}", f"{url}"
|
|
||||||
]).wait()
|
|
||||||
if ret_code == 0:
|
|
||||||
# os.rename(temp_filepath, lecture_path)
|
|
||||||
print(" > HLS Download success")
|
|
||||||
else:
|
|
||||||
download_aria(url, chapter_dir, lecture_title + ".mp4")
|
|
||||||
except EnvironmentError as e:
|
|
||||||
print(f" > Error downloading lecture: ", e)
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
" > Lecture '%s' is already downloaded, skipping..." %
|
|
||||||
lecture_title)
|
|
||||||
else:
|
|
||||||
print(" > Missing sources for lecture", lecture)
|
|
||||||
|
|
||||||
|
|
||||||
def parse():
|
|
||||||
total_chapters = _udemy.get("total_chapters")
|
|
||||||
total_lectures = _udemy.get("total_lectures")
|
|
||||||
print(f"Chapter(s) ({total_chapters})")
|
|
||||||
print(f"Lecture(s) ({total_lectures})")
|
|
||||||
|
|
||||||
course_name = _udemy.get("course_title")
|
|
||||||
course_dir = os.path.join(download_dir, course_name)
|
|
||||||
if not os.path.exists(course_dir):
|
|
||||||
os.mkdir(course_dir)
|
|
||||||
|
|
||||||
for chapter in _udemy.get("chapters"):
|
|
||||||
chapter_title = chapter.get("chapter_title")
|
|
||||||
chapter_index = chapter.get("chapter_index")
|
|
||||||
chapter_dir = os.path.join(course_dir, chapter_title)
|
|
||||||
if not os.path.exists(chapter_dir):
|
|
||||||
os.mkdir(chapter_dir)
|
|
||||||
print(
|
|
||||||
f"======= Processing chapter {chapter_index} of {total_chapters} ======="
|
|
||||||
)
|
|
||||||
|
|
||||||
for lecture in chapter.get("lectures"):
|
|
||||||
lecture_title = lecture.get("lecture_title")
|
|
||||||
lecture_index = lecture.get("lecture_index")
|
|
||||||
lecture_extension = lecture.get("extension")
|
|
||||||
extension = "mp4" # video lectures dont have an extension property, so we assume its mp4
|
|
||||||
if lecture_extension != None:
|
|
||||||
# if the lecture extension property isnt none, set the extension to the lecture extension
|
|
||||||
extension = lecture_extension
|
|
||||||
lecture_file_name = sanitize(lecture_title + "." + extension)
|
|
||||||
lecture_path = os.path.join(
|
|
||||||
chapter_dir,
|
|
||||||
lecture_file_name)
|
|
||||||
|
|
||||||
print(
|
|
||||||
f" > Processing lecture {lecture_index} of {total_lectures}")
|
|
||||||
if not skip_lectures:
|
|
||||||
print(lecture_file_name)
|
|
||||||
# Check if the lecture is already downloaded
|
|
||||||
if os.path.isfile(lecture_path):
|
|
||||||
print(
|
|
||||||
" > Lecture '%s' is already downloaded, skipping..." %
|
|
||||||
lecture_title)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# Check if the file is an html file
|
|
||||||
if extension == "html":
|
|
||||||
html_content = lecture.get("html_content").encode(
|
|
||||||
"ascii", "ignore").decode("utf8")
|
|
||||||
lecture_path = os.path.join(
|
|
||||||
chapter_dir, "{}.html".format(sanitize(lecture_title)))
|
|
||||||
try:
|
|
||||||
with open(lecture_path, 'w') as f:
|
|
||||||
f.write(html_content)
|
|
||||||
f.close()
|
|
||||||
except Exception as e:
|
|
||||||
print(" > Failed to write html file: ", e)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir)
|
|
||||||
|
|
||||||
if dl_assets:
|
if dl_assets:
|
||||||
assets = lecture.get("assets")
|
assets = lecture.get("assets")
|
||||||
print(" > Processing {} asset(s) for lecture...".format(
|
print(" > Processing {} asset(s) for lecture...".format(
|
||||||
@ -425,11 +310,134 @@ def parse():
|
|||||||
|
|
||||||
subtitles = lecture.get("subtitles")
|
subtitles = lecture.get("subtitles")
|
||||||
if dl_captions and subtitles:
|
if dl_captions and subtitles:
|
||||||
|
selected_subtitles = []
|
||||||
print("Processing {} caption(s)...".format(len(subtitles)))
|
print("Processing {} caption(s)...".format(len(subtitles)))
|
||||||
for subtitle in subtitles:
|
for subtitle in subtitles:
|
||||||
lang = subtitle.get("language")
|
lang = subtitle.get("language")
|
||||||
if lang == caption_locale or caption_locale == "all":
|
if lang == caption_locale or caption_locale == "all":
|
||||||
|
selected_subtitles.append(subtitle)
|
||||||
process_caption(subtitle, lecture_title, chapter_dir)
|
process_caption(subtitle, lecture_title, chapter_dir)
|
||||||
|
print("Selected {} captions".format(len(selected_subtitles)))
|
||||||
|
|
||||||
|
if is_encrypted:
|
||||||
|
if len(lecture_sources) > 0:
|
||||||
|
source = lecture_sources[-1] # last index is the best quality
|
||||||
|
if isinstance(quality, int):
|
||||||
|
source = min(
|
||||||
|
lecture_sources,
|
||||||
|
key=lambda x: abs(int(x.get("height")) - quality))
|
||||||
|
print(f" > Lecture '%s' has DRM, attempting to download" %
|
||||||
|
lecture_title)
|
||||||
|
download_segments(source.get("download_url"),
|
||||||
|
source.get(
|
||||||
|
"format_id"), lecture_title, lecture_path, lecture_file_name, chapter_dir)
|
||||||
|
else:
|
||||||
|
print(f" > Lecture '%s' is missing media links" %
|
||||||
|
lecture_title)
|
||||||
|
print(len(lecture_sources))
|
||||||
|
else:
|
||||||
|
sources = lecture.get("sources")
|
||||||
|
sources = sorted(sources,
|
||||||
|
key=lambda x: int(x.get("height")),
|
||||||
|
reverse=True)
|
||||||
|
if sources:
|
||||||
|
if not os.path.isfile(lecture_path):
|
||||||
|
print(
|
||||||
|
" > Lecture doesn't have DRM, attempting to download..."
|
||||||
|
)
|
||||||
|
source = sources[0] # first index is the best quality
|
||||||
|
if isinstance(quality, int):
|
||||||
|
source = min(
|
||||||
|
sources,
|
||||||
|
key=lambda x: abs(int(x.get("height")) - quality))
|
||||||
|
try:
|
||||||
|
print(" ====== Selected quality: ",
|
||||||
|
source.get("type"), source.get("height"))
|
||||||
|
url = source.get("download_url")
|
||||||
|
source_type = source.get("type")
|
||||||
|
if source_type == "hls":
|
||||||
|
temp_filepath = lecture_path.replace(
|
||||||
|
".mp4", ".%(ext)s")
|
||||||
|
ret_code = subprocess.Popen([
|
||||||
|
"yt-dlp", "--force-generic-extractor",
|
||||||
|
"--concurrent-fragments",
|
||||||
|
f"{concurrent_connections}", "--downloader",
|
||||||
|
"aria2c", "-o", f"{temp_filepath}", f"{url}"
|
||||||
|
]).wait()
|
||||||
|
if ret_code == 0:
|
||||||
|
print(" > HLS Download success")
|
||||||
|
else:
|
||||||
|
download_aria(url, chapter_dir, lecture_title + ".mp4")
|
||||||
|
except EnvironmentError as e:
|
||||||
|
print(f" > Error downloading lecture: ", e)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
" > Lecture '%s' is already downloaded, skipping..." %
|
||||||
|
lecture_title)
|
||||||
|
else:
|
||||||
|
print(" > Missing sources for lecture", lecture)
|
||||||
|
|
||||||
|
|
||||||
|
def parse():
|
||||||
|
total_chapters = _udemy.get("total_chapters")
|
||||||
|
total_lectures = _udemy.get("total_lectures")
|
||||||
|
print(f"Chapter(s) ({total_chapters})")
|
||||||
|
print(f"Lecture(s) ({total_lectures})")
|
||||||
|
|
||||||
|
course_name = _udemy.get("course_title")
|
||||||
|
course_dir = os.path.join(download_dir, course_name)
|
||||||
|
if not os.path.exists(course_dir):
|
||||||
|
os.mkdir(course_dir)
|
||||||
|
|
||||||
|
for chapter in _udemy.get("chapters"):
|
||||||
|
chapter_title = chapter.get("chapter_title")
|
||||||
|
chapter_index = chapter.get("chapter_index")
|
||||||
|
chapter_dir = os.path.join(course_dir, chapter_title)
|
||||||
|
if not os.path.exists(chapter_dir):
|
||||||
|
os.mkdir(chapter_dir)
|
||||||
|
print(
|
||||||
|
f"======= Processing chapter {chapter_index} of {total_chapters} ======="
|
||||||
|
)
|
||||||
|
|
||||||
|
for lecture in chapter.get("lectures"):
|
||||||
|
lecture_title = lecture.get("lecture_title")
|
||||||
|
lecture_index = lecture.get("lecture_index")
|
||||||
|
lecture_extension = lecture.get("extension")
|
||||||
|
extension = "mp4" # video lectures dont have an extension property, so we assume its mp4
|
||||||
|
if lecture_extension != None:
|
||||||
|
# if the lecture extension property isnt none, set the extension to the lecture extension
|
||||||
|
extension = lecture_extension
|
||||||
|
lecture_file_name = sanitize(lecture_title + "." + extension)
|
||||||
|
lecture_path = os.path.join(
|
||||||
|
chapter_dir,
|
||||||
|
lecture_file_name)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f" > Processing lecture {lecture_index} of {total_lectures}")
|
||||||
|
if not skip_lectures:
|
||||||
|
# Check if the lecture is already downloaded
|
||||||
|
if os.path.isfile(lecture_path):
|
||||||
|
print(
|
||||||
|
" > Lecture '%s' is already downloaded, skipping..." %
|
||||||
|
lecture_title)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Check if the file is an html file
|
||||||
|
if extension == "html":
|
||||||
|
html_content = lecture.get("html_content").encode(
|
||||||
|
"ascii", "ignore").decode("utf8")
|
||||||
|
lecture_path = os.path.join(
|
||||||
|
chapter_dir, "{}.html".format(sanitize(lecture_title)))
|
||||||
|
try:
|
||||||
|
with open(lecture_path, 'w') as f:
|
||||||
|
f.write(html_content)
|
||||||
|
f.close()
|
||||||
|
except Exception as e:
|
||||||
|
print(" > Failed to write html file: ", e)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
process_lecture(lecture, lecture_path,
|
||||||
|
lecture_file_name, chapter_dir)
|
||||||
|
|
||||||
|
|
||||||
def process_course():
|
def process_course():
|
||||||
@ -641,6 +649,7 @@ def process_course():
|
|||||||
if entry
|
if entry
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
def get_course_information():
|
def get_course_information():
|
||||||
global course_info, course_id, title, course_title, portal_name
|
global course_info, course_id, title, course_title, portal_name
|
||||||
if(load_from_file):
|
if(load_from_file):
|
||||||
@ -658,6 +667,7 @@ def get_course_information():
|
|||||||
course_title = course_info.get("published_title")
|
course_title = course_info.get("published_title")
|
||||||
portal_name = course_info.get("portal_name")
|
portal_name = course_info.get("portal_name")
|
||||||
|
|
||||||
|
|
||||||
def get_course_content():
|
def get_course_content():
|
||||||
global course_content
|
global course_content
|
||||||
if load_from_file:
|
if load_from_file:
|
||||||
@ -666,18 +676,22 @@ def get_course_content():
|
|||||||
course_content = json.loads(f.read())
|
course_content = json.loads(f.read())
|
||||||
else:
|
else:
|
||||||
print("course_content.json not found, falling back to fetching")
|
print("course_content.json not found, falling back to fetching")
|
||||||
course_content = udemy._extract_course_json(course_url, course_id, portal_name)
|
course_content = udemy._extract_course_json(
|
||||||
|
course_url, course_id, portal_name)
|
||||||
else:
|
else:
|
||||||
course_content = udemy._extract_course_json(course_url, course_id, portal_name)
|
course_content = udemy._extract_course_json(
|
||||||
|
course_url, course_id, portal_name)
|
||||||
|
|
||||||
|
|
||||||
def parse_data():
|
def parse_data():
|
||||||
global _udemy
|
global _udemy
|
||||||
if load_from_file:
|
if load_from_file and os.path.exists(_udemy_path):
|
||||||
f = open(_udemy_path, 'r')
|
f = open(_udemy_path, 'r')
|
||||||
_udemy = json.loads(f.read())
|
_udemy = json.loads(f.read())
|
||||||
else:
|
else:
|
||||||
process_course()
|
process_course()
|
||||||
|
|
||||||
|
|
||||||
def _print_course_info(course_data):
|
def _print_course_info(course_data):
|
||||||
print("\n\n\n\n")
|
print("\n\n\n\n")
|
||||||
course_title = course_data.get("title")
|
course_title = course_data.get("title")
|
||||||
@ -746,6 +760,7 @@ def _print_course_info(course_data):
|
|||||||
if chapter_index != chapter_count:
|
if chapter_index != chapter_count:
|
||||||
print("\n\n")
|
print("\n\n")
|
||||||
|
|
||||||
|
|
||||||
def setup_parser():
|
def setup_parser():
|
||||||
global parser
|
global parser
|
||||||
parser = argparse.ArgumentParser(description='Udemy Downloader')
|
parser = argparse.ArgumentParser(description='Udemy Downloader')
|
||||||
@ -839,6 +854,12 @@ def setup_parser():
|
|||||||
default="medium",
|
default="medium",
|
||||||
help="Set a custom preset value for H.265 encoding. FFMPEG default is medium",
|
help="Set a custom preset value for H.265 encoding. FFMPEG default is medium",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--iknowwhatimdoing",
|
||||||
|
dest="iknowwhatimdoing",
|
||||||
|
action="store_true",
|
||||||
|
help=argparse.SUPPRESS,
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--save-to-file",
|
"--save-to-file",
|
||||||
dest="save_to_file",
|
dest="save_to_file",
|
||||||
@ -856,7 +877,7 @@ def setup_parser():
|
|||||||
|
|
||||||
|
|
||||||
def process_args(args):
|
def process_args(args):
|
||||||
global course_url, bearer_token, dl_assets, caption_locale, skip_lectures, quality, keep_vtt, skip_hls, print_info, load_from_file, save_to_file, concurrent_connections, use_h265, h265_crf, h265_preset
|
global course_url, bearer_token, dl_assets, dl_captions, caption_locale, skip_lectures, quality, keep_vtt, skip_hls, print_info, load_from_file, save_to_file, concurrent_connections, use_h265, h265_crf, h265_preset, iknowwhatimdoing
|
||||||
|
|
||||||
course_url = args.course_url
|
course_url = args.course_url
|
||||||
if args.download_assets:
|
if args.download_assets:
|
||||||
@ -893,6 +914,8 @@ def process_args(args):
|
|||||||
h265_crf = args.h265_crf
|
h265_crf = args.h265_crf
|
||||||
if args.h265_preset:
|
if args.h265_preset:
|
||||||
h265_preset = args.h265_preset
|
h265_preset = args.h265_preset
|
||||||
|
if args.iknowwhatimdoing:
|
||||||
|
iknowwhatimdoing = args.iknowwhatimdoing
|
||||||
|
|
||||||
if args.load_from_file:
|
if args.load_from_file:
|
||||||
print(
|
print(
|
||||||
@ -907,6 +930,7 @@ def process_args(args):
|
|||||||
else:
|
else:
|
||||||
bearer_token = os.getenv("UDEMY_BEARER")
|
bearer_token = os.getenv("UDEMY_BEARER")
|
||||||
|
|
||||||
|
|
||||||
def ensure_dependencies_installed():
|
def ensure_dependencies_installed():
|
||||||
aria_ret_val = check_for_aria()
|
aria_ret_val = check_for_aria()
|
||||||
if not aria_ret_val:
|
if not aria_ret_val:
|
||||||
@ -925,6 +949,7 @@ def ensure_dependencies_installed():
|
|||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def check_dirs():
|
def check_dirs():
|
||||||
if not os.path.exists(saved_dir):
|
if not os.path.exists(saved_dir):
|
||||||
os.makedirs(saved_dir)
|
os.makedirs(saved_dir)
|
||||||
@ -932,29 +957,17 @@ def check_dirs():
|
|||||||
if not os.path.exists(download_dir):
|
if not os.path.exists(download_dir):
|
||||||
os.makedirs(download_dir)
|
os.makedirs(download_dir)
|
||||||
|
|
||||||
def load_keys():
|
|
||||||
|
def try_load_keys():
|
||||||
global keys
|
global keys
|
||||||
f = open(keyfile_path, 'r')
|
f = open(keyfile_path, 'r')
|
||||||
keys = json.loads(f.read())
|
keys = json.loads(f.read())
|
||||||
|
|
||||||
|
|
||||||
def UdemyDownloader():
|
def UdemyDownloader():
|
||||||
global udemy, course, resource
|
global udemy, course, resource
|
||||||
check_dirs()
|
check_dirs()
|
||||||
|
|
||||||
# warn that the keyfile is not found
|
|
||||||
if not os.path.isfile(keyfile_path):
|
|
||||||
print("!!! Keyfile not found! This means you probably didn't rename the keyfile correctly, DRM lecture decryption will fail! If you aren't downloading DRM encrypted courses, you can ignore this message. !!!")
|
|
||||||
print("Waiting for 10 seconds...")
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
load_keys()
|
|
||||||
|
|
||||||
# ensure 3rd party binaries are installed
|
|
||||||
ensure_dependencies_installed();
|
|
||||||
|
|
||||||
# loads the .env file
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
# Creates a new parser and sets up the arguments
|
# Creates a new parser and sets up the arguments
|
||||||
setup_parser()
|
setup_parser()
|
||||||
|
|
||||||
@ -962,6 +975,22 @@ def UdemyDownloader():
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
process_args(args=args)
|
process_args(args=args)
|
||||||
|
|
||||||
|
# warn that the keyfile is not found
|
||||||
|
if not os.path.exists(keyfile_path):
|
||||||
|
print("!!! Keyfile not found! This means you probably didn't rename the keyfile correctly, DRM lecture decryption will fail! If you aren't downloading DRM encrypted courses, you can ignore this message. !!!")
|
||||||
|
if not iknowwhatimdoing:
|
||||||
|
print("Waiting for 10 seconds...")
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
else:
|
||||||
|
try_load_keys()
|
||||||
|
|
||||||
|
# ensure 3rd party binaries are installed
|
||||||
|
ensure_dependencies_installed()
|
||||||
|
|
||||||
|
# loads the .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
udemy = Udemy(access_token=bearer_token)
|
udemy = Udemy(access_token=bearer_token)
|
||||||
|
|
||||||
print("> Fetching course information, this may take a minute...")
|
print("> Fetching course information, this may take a minute...")
|
||||||
|
4
udemy_downloader/__main__.py
Normal file
4
udemy_downloader/__main__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from UdemyDownloader import UdemyDownloader
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
UdemyDownloader()
|
@ -9,6 +9,7 @@ from mp4parse import F4VParser
|
|||||||
from widevine_pssh_pb2 import WidevinePsshData
|
from widevine_pssh_pb2 import WidevinePsshData
|
||||||
from sanitize import sanitize, slugify, SLUG_OK
|
from sanitize import sanitize, slugify, SLUG_OK
|
||||||
|
|
||||||
|
|
||||||
def extract_kid(mp4_file):
|
def extract_kid(mp4_file):
|
||||||
"""
|
"""
|
||||||
Parameters
|
Parameters
|
||||||
@ -26,7 +27,8 @@ def extract_kid(mp4_file):
|
|||||||
boxes = F4VParser.parse(filename=mp4_file)
|
boxes = F4VParser.parse(filename=mp4_file)
|
||||||
for box in boxes:
|
for box in boxes:
|
||||||
if box.header.box_type == 'moov':
|
if box.header.box_type == 'moov':
|
||||||
pssh_box = next(x for x in box.pssh if x.system_id == "edef8ba979d64acea3c827dcd51d21ed")
|
pssh_box = next(x for x in box.pssh if x.system_id ==
|
||||||
|
"edef8ba979d64acea3c827dcd51d21ed")
|
||||||
hex = codecs.decode(pssh_box.payload, "hex")
|
hex = codecs.decode(pssh_box.payload, "hex")
|
||||||
|
|
||||||
pssh = WidevinePsshData()
|
pssh = WidevinePsshData()
|
||||||
@ -37,6 +39,7 @@ def extract_kid(mp4_file):
|
|||||||
# No Moof or PSSH header found
|
# No Moof or PSSH header found
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _clean(text):
|
def _clean(text):
|
||||||
ok = re.compile(r'[^\\/:*?!"<>|]')
|
ok = re.compile(r'[^\\/:*?!"<>|]')
|
||||||
text = "".join(x if ok.match(x) else "_" for x in text)
|
text = "".join(x if ok.match(x) else "_" for x in text)
|
||||||
@ -49,6 +52,7 @@ def _sanitize(self, unsafetext):
|
|||||||
slugify(unsafetext, lower=False, spaces=True, ok=SLUG_OK + "().[]")))
|
slugify(unsafetext, lower=False, spaces=True, ok=SLUG_OK + "().[]")))
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def durationtoseconds(period):
|
def durationtoseconds(period):
|
||||||
"""
|
"""
|
||||||
@author Jayapraveen
|
@author Jayapraveen
|
||||||
@ -74,6 +78,7 @@ def durationtoseconds(period):
|
|||||||
print("Duration Format Error")
|
print("Duration Format Error")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def cleanup(path):
|
def cleanup(path):
|
||||||
"""
|
"""
|
||||||
@author Jayapraveen
|
@author Jayapraveen
|
||||||
@ -86,26 +91,33 @@ def cleanup(path):
|
|||||||
print(f"Error deleting file: {file_list}")
|
print(f"Error deleting file: {file_list}")
|
||||||
os.removedirs(path)
|
os.removedirs(path)
|
||||||
|
|
||||||
|
|
||||||
def remove_files(files):
|
def remove_files(files):
|
||||||
for file in files:
|
for file in files:
|
||||||
os.remove(file)
|
os.remove(file)
|
||||||
|
|
||||||
|
|
||||||
def merge(video_title, video_filepath, audio_filepath, output_path, use_h265, h265_crf, h265_preset):
|
def merge(video_title, video_filepath, audio_filepath, output_path, use_h265, h265_crf, h265_preset):
|
||||||
"""
|
"""
|
||||||
@author Jayapraveen
|
@author Jayapraveen
|
||||||
"""
|
"""
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
if use_h265:
|
if use_h265:
|
||||||
command = "ffmpeg -y -i \"{}\" -i \"{}\" -c:v libx265 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(video_filepath, audio_filepath, h265_crf, h265_preset, video_title, output_path)
|
command = "ffmpeg -y -i \"{}\" -i \"{}\" -c:v libx265 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
|
||||||
|
video_filepath, audio_filepath, h265_crf, h265_preset, video_title, output_path)
|
||||||
else:
|
else:
|
||||||
command = "ffmpeg -y -i \"{}\" -i \"{}\" -c:v copy -vtag hvc1 -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(video_filepath, audio_filepath, video_title, output_path)
|
command = "ffmpeg -y -i \"{}\" -i \"{}\" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
|
||||||
|
video_filepath, audio_filepath, video_title, output_path)
|
||||||
else:
|
else:
|
||||||
if use_h265:
|
if use_h265:
|
||||||
command = "nide -n 7 ffmpeg -y -i \"{}\" -i \"{}\" -c:v libx265 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(video_filepath, audio_filepath, h265_crf, h265_preset, video_title, output_path)
|
command = "nide -n 7 ffmpeg -y -i \"{}\" -i \"{}\" -c:v libx265 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
|
||||||
|
video_filepath, audio_filepath, h265_crf, h265_preset, video_title, output_path)
|
||||||
else:
|
else:
|
||||||
command = "nide -n 7 ffmpeg -y -i \"{}\" -i \"{}\" -c:v copy -vtag hvc1 -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(video_filepath, audio_filepath, video_title, output_path)
|
command = "nide -n 7 ffmpeg -y -i \"{}\" -i \"{}\" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
|
||||||
|
video_filepath, audio_filepath, video_title, output_path)
|
||||||
return os.system(command)
|
return os.system(command)
|
||||||
|
|
||||||
|
|
||||||
def decrypt(key, in_filepath, out_filepath):
|
def decrypt(key, in_filepath, out_filepath):
|
||||||
"""
|
"""
|
||||||
@author Jayapraveen
|
@author Jayapraveen
|
||||||
@ -119,6 +131,7 @@ def decrypt(key, in_filepath, out_filepath):
|
|||||||
|
|
||||||
return ret_code
|
return ret_code
|
||||||
|
|
||||||
|
|
||||||
def check_for_aria():
|
def check_for_aria():
|
||||||
try:
|
try:
|
||||||
subprocess.Popen(["aria2c", "-v"],
|
subprocess.Popen(["aria2c", "-v"],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user