mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-04-30 22:04:27 +02:00
Feat: Info argument
+ Added info argument to print course information + Updated spacing of some text to be more 'tree' like and easier to read
This commit is contained in:
parent
66aad0dc50
commit
1ad4f1edde
1
.gitignore
vendored
1
.gitignore
vendored
@ -122,3 +122,4 @@ manifest.mpd
|
|||||||
.vscode
|
.vscode
|
||||||
saved
|
saved
|
||||||
*.aria2
|
*.aria2
|
||||||
|
info.py
|
@ -69,7 +69,7 @@ You can now run `python main.py` to start downloading. The course will download
|
|||||||
|
|
||||||
```
|
```
|
||||||
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
|
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
|
||||||
[--keep-vtt] [--skip-hls]
|
[--keep-vtt] [--skip-hls] [--info]
|
||||||
|
|
||||||
Udemy Downloader
|
Udemy Downloader
|
||||||
|
|
||||||
@ -89,6 +89,7 @@ optional arguments:
|
|||||||
--keep-vtt If specified, .vtt files won't be removed
|
--keep-vtt If specified, .vtt files won't be removed
|
||||||
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm
|
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm
|
||||||
lectures)
|
lectures)
|
||||||
|
--info If specified, only course information will be printed, nothing will be downloaded
|
||||||
```
|
```
|
||||||
|
|
||||||
- Passing a Bearer Token and Course ID as an argument
|
- Passing a Bearer Token and Course ID as an argument
|
||||||
@ -116,6 +117,8 @@ optional arguments:
|
|||||||
- `python main.py -c <Course URL> --download-captions --keep-vtt`
|
- `python main.py -c <Course URL> --download-captions --keep-vtt`
|
||||||
- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
|
- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
|
||||||
- `python main.py -c <Course URL> --skip-hls`
|
- `python main.py -c <Course URL> --skip-hls`
|
||||||
|
- Print course information only:
|
||||||
|
- `python main.py -c <Course URL> --info`
|
||||||
|
|
||||||
# Credits
|
# Credits
|
||||||
|
|
||||||
|
140
main.py
140
main.py
@ -986,12 +986,12 @@ def download_aria(url, file_dir, filename):
|
|||||||
"""
|
"""
|
||||||
@author Puyodead1
|
@author Puyodead1
|
||||||
"""
|
"""
|
||||||
print("> Downloading File...")
|
print(" > Downloading File...")
|
||||||
ret_code = subprocess.Popen([
|
ret_code = subprocess.Popen([
|
||||||
"aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16",
|
"aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16",
|
||||||
"-c", "--auto-file-renaming=false", "--summary-interval=0"
|
"-c", "--auto-file-renaming=false", "--summary-interval=0"
|
||||||
]).wait()
|
]).wait()
|
||||||
print("> File Downloaded")
|
print(" > File Downloaded")
|
||||||
|
|
||||||
print("Return code: " + str(ret_code))
|
print("Return code: " + str(ret_code))
|
||||||
|
|
||||||
@ -1004,32 +1004,32 @@ def process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries=0):
|
|||||||
filepath = os.path.join(lecture_dir, filename)
|
filepath = os.path.join(lecture_dir, filename)
|
||||||
|
|
||||||
if os.path.isfile(filepath):
|
if os.path.isfile(filepath):
|
||||||
print("> Caption '%s' already downloaded." % filename)
|
print(" > Caption '%s' already downloaded." % filename)
|
||||||
else:
|
else:
|
||||||
print(f"> Downloading caption: '%s'" % filename)
|
print(f" > Downloading caption: '%s'" % filename)
|
||||||
try:
|
try:
|
||||||
download_aria(caption.get("download_url"), lecture_dir, filename)
|
download_aria(caption.get("download_url"), lecture_dir, filename)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if tries >= 3:
|
if tries >= 3:
|
||||||
print(
|
print(
|
||||||
f"> Error downloading caption: {e}. Exceeded retries, skipping."
|
f" > Error downloading caption: {e}. Exceeded retries, skipping."
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
f"> Error downloading caption: {e}. Will retry {3-tries} more times."
|
f" > Error downloading caption: {e}. Will retry {3-tries} more times."
|
||||||
)
|
)
|
||||||
process_caption(caption, lecture_title, lecture_dir, keep_vtt,
|
process_caption(caption, lecture_title, lecture_dir, keep_vtt,
|
||||||
tries + 1)
|
tries + 1)
|
||||||
if caption.get("extension") == "vtt":
|
if caption.get("extension") == "vtt":
|
||||||
try:
|
try:
|
||||||
print("> Converting caption to SRT format...")
|
print(" > Converting caption to SRT format...")
|
||||||
convert(lecture_dir, filename_no_ext)
|
convert(lecture_dir, filename_no_ext)
|
||||||
print("> Caption conversion complete.")
|
print(" > Caption conversion complete.")
|
||||||
if not keep_vtt:
|
if not keep_vtt:
|
||||||
os.remove(filepath)
|
os.remove(filepath)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"> Error converting caption: {e}")
|
print(f" > Error converting caption: {e}")
|
||||||
|
|
||||||
|
|
||||||
def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
|
def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
|
||||||
@ -1040,8 +1040,6 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
|
|||||||
|
|
||||||
if is_encrypted:
|
if is_encrypted:
|
||||||
if len(lecture_audio_sources) > 0 and len(lecture_video_sources) > 0:
|
if len(lecture_audio_sources) > 0 and len(lecture_video_sources) > 0:
|
||||||
print(f"> Lecture '%s' has DRM, attempting to download" %
|
|
||||||
lecture_title)
|
|
||||||
lecture_working_dir = os.path.join(working_dir,
|
lecture_working_dir = os.path.join(working_dir,
|
||||||
str(lecture.get("asset_id")))
|
str(lecture.get("asset_id")))
|
||||||
|
|
||||||
@ -1055,16 +1053,19 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
|
|||||||
key=lambda x: abs(int(x.get("height")) - quality))
|
key=lambda x: abs(int(x.get("height")) - quality))
|
||||||
if not os.path.exists(lecture_working_dir):
|
if not os.path.exists(lecture_working_dir):
|
||||||
os.mkdir(lecture_working_dir)
|
os.mkdir(lecture_working_dir)
|
||||||
|
print(f" > Lecture '%s' has DRM, attempting to download" %
|
||||||
|
lecture_title)
|
||||||
handle_segments(video_source, audio_source, lecture_title,
|
handle_segments(video_source, audio_source, lecture_title,
|
||||||
lecture_working_dir, lecture_path)
|
lecture_working_dir, lecture_path)
|
||||||
else:
|
else:
|
||||||
print("> Lecture '%s' is already downloaded, skipping..." %
|
print(
|
||||||
|
" > Lecture '%s' is already downloaded, skipping..." %
|
||||||
lecture_title)
|
lecture_title)
|
||||||
else:
|
else:
|
||||||
print(f"> Lecture '%s' is missing media links" % lecture_title)
|
print(f" > Lecture '%s' is missing media links" %
|
||||||
|
lecture_title)
|
||||||
print(len(lecture_audio_sources), len(lecture_video_sources))
|
print(len(lecture_audio_sources), len(lecture_video_sources))
|
||||||
else:
|
else:
|
||||||
print("> Lecture doesn't have DRM, attempting to download...")
|
|
||||||
sources = lecture.get("sources")
|
sources = lecture.get("sources")
|
||||||
sources = sorted(sources,
|
sources = sorted(sources,
|
||||||
key=lambda x: int(x.get("height")),
|
key=lambda x: int(x.get("height")),
|
||||||
@ -1075,14 +1076,17 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
|
|||||||
if not os.path.exists(lecture_working_dir):
|
if not os.path.exists(lecture_working_dir):
|
||||||
os.mkdir(lecture_working_dir)
|
os.mkdir(lecture_working_dir)
|
||||||
if not os.path.isfile(lecture_path):
|
if not os.path.isfile(lecture_path):
|
||||||
|
print(
|
||||||
|
" > Lecture doesn't have DRM, attempting to download..."
|
||||||
|
)
|
||||||
source = sources[0] # first index is the best quality
|
source = sources[0] # first index is the best quality
|
||||||
if isinstance(quality, int):
|
if isinstance(quality, int):
|
||||||
source = min(
|
source = min(
|
||||||
sources,
|
sources,
|
||||||
key=lambda x: abs(int(x.get("height")) - quality))
|
key=lambda x: abs(int(x.get("height")) - quality))
|
||||||
try:
|
try:
|
||||||
print("====== Selected quality: ", source.get("type"),
|
print(" ====== Selected quality: ",
|
||||||
source.get("height"))
|
source.get("type"), source.get("height"))
|
||||||
url = source.get("download_url")
|
url = source.get("download_url")
|
||||||
source_type = source.get("type")
|
source_type = source.get("type")
|
||||||
if source_type == "hls":
|
if source_type == "hls":
|
||||||
@ -1092,16 +1096,17 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
|
|||||||
temp_filepath).download()
|
temp_filepath).download()
|
||||||
if retVal:
|
if retVal:
|
||||||
os.rename(temp_filepath, lecture_path)
|
os.rename(temp_filepath, lecture_path)
|
||||||
print("> HLS Download success")
|
print(" > HLS Download success")
|
||||||
else:
|
else:
|
||||||
download_aria(url, lecture_dir, lecture_title + ".mp4")
|
download_aria(url, lecture_dir, lecture_title + ".mp4")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"> Error downloading lecture: ", e)
|
print(f" > Error downloading lecture: ", e)
|
||||||
else:
|
else:
|
||||||
print("> Lecture '%s' is already downloaded, skipping..." %
|
print(
|
||||||
|
" > Lecture '%s' is already downloaded, skipping..." %
|
||||||
lecture_title)
|
lecture_title)
|
||||||
else:
|
else:
|
||||||
print("Missing sources for lecture", lecture)
|
print(" > Missing sources for lecture", lecture)
|
||||||
|
|
||||||
|
|
||||||
def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
||||||
@ -1131,7 +1136,8 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
|||||||
lecture_index = lecture.get("lecture_index")
|
lecture_index = lecture.get("lecture_index")
|
||||||
|
|
||||||
extension = lecture.get("extension")
|
extension = lecture.get("extension")
|
||||||
print(f"> Processing lecture {lecture_index} of {total_lectures}")
|
print(
|
||||||
|
f" > Processing lecture {lecture_index} of {total_lectures}")
|
||||||
if not skip_lectures:
|
if not skip_lectures:
|
||||||
if extension == "html":
|
if extension == "html":
|
||||||
html_content = lecture.get("html_content").encode(
|
html_content = lecture.get("html_content").encode(
|
||||||
@ -1143,7 +1149,7 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
|||||||
f.write(html_content)
|
f.write(html_content)
|
||||||
f.close()
|
f.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Failed to write html file: ", e)
|
print(" > Failed to write html file: ", e)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
lecture_path = os.path.join(
|
lecture_path = os.path.join(
|
||||||
@ -1153,7 +1159,7 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
|||||||
|
|
||||||
if dl_assets:
|
if dl_assets:
|
||||||
assets = lecture.get("assets")
|
assets = lecture.get("assets")
|
||||||
print("> Processing {} asset(s) for lecture...".format(
|
print(" > Processing {} asset(s) for lecture...".format(
|
||||||
len(assets)))
|
len(assets)))
|
||||||
|
|
||||||
for asset in assets:
|
for asset in assets:
|
||||||
@ -1220,6 +1226,75 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
|||||||
keep_vtt)
|
keep_vtt)
|
||||||
|
|
||||||
|
|
||||||
|
def course_info(course_data):
|
||||||
|
print("\n\n\n\n")
|
||||||
|
course_title = course_data.get("title")
|
||||||
|
chapter_count = course_data.get("total_chapters")
|
||||||
|
lecture_count = course_data.get("total_lectures")
|
||||||
|
|
||||||
|
print("> Course: {}".format(course_title))
|
||||||
|
print("> Total Chapters: {}".format(chapter_count))
|
||||||
|
print("> Total Lectures: {}".format(lecture_count))
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
chapters = course_data.get("chapters")
|
||||||
|
for chapter in chapters:
|
||||||
|
chapter_title = chapter.get("chapter_title")
|
||||||
|
chapter_index = chapter.get("chapter_index")
|
||||||
|
chapter_lecture_count = chapter.get("lecture_count")
|
||||||
|
chapter_lectures = chapter.get("lectures")
|
||||||
|
|
||||||
|
print("> Chapter: {} ({} of {})".format(chapter_title, chapter_index,
|
||||||
|
chapter_count))
|
||||||
|
|
||||||
|
for lecture in chapter_lectures:
|
||||||
|
lecture_title = lecture.get("lecture_title")
|
||||||
|
lecture_index = lecture.get("index")
|
||||||
|
lecture_asset_count = lecture.get("assets_count")
|
||||||
|
lecture_is_encrypted = lecture.get("is_encrypted")
|
||||||
|
lecture_subtitles = lecture.get("subtitles")
|
||||||
|
lecture_extension = lecture.get("extension")
|
||||||
|
lecture_sources = lecture.get("sources")
|
||||||
|
lecture_video_sources = lecture.get("video_sources")
|
||||||
|
|
||||||
|
if lecture_sources:
|
||||||
|
lecture_sources = sorted(lecture.get("sources"),
|
||||||
|
key=lambda x: int(x.get("height")),
|
||||||
|
reverse=True)
|
||||||
|
if lecture_video_sources:
|
||||||
|
lecture_video_sources = sorted(
|
||||||
|
lecture.get("video_sources"),
|
||||||
|
key=lambda x: int(x.get("height")),
|
||||||
|
reverse=True)
|
||||||
|
|
||||||
|
if lecture_is_encrypted:
|
||||||
|
lecture_qualities = [
|
||||||
|
"{}@{}x{}".format(x.get("type"), x.get("width"),
|
||||||
|
x.get("height"))
|
||||||
|
for x in lecture_video_sources
|
||||||
|
]
|
||||||
|
elif not lecture_is_encrypted and lecture_sources:
|
||||||
|
lecture_qualities = [
|
||||||
|
"{}@{}x{}".format(x.get("type"), x.get("height"),
|
||||||
|
x.get("width")) for x in lecture_sources
|
||||||
|
]
|
||||||
|
|
||||||
|
if lecture_extension:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(" > Lecture: {} ({} of {})".format(lecture_title,
|
||||||
|
lecture_index,
|
||||||
|
chapter_lecture_count))
|
||||||
|
print(" > DRM: {}".format(lecture_is_encrypted))
|
||||||
|
print(" > Asset Count: {}".format(lecture_asset_count))
|
||||||
|
print(" > Captions: {}".format(
|
||||||
|
[x.get("language") for x in lecture_subtitles]))
|
||||||
|
print(" > Qualities: {}".format(lecture_qualities))
|
||||||
|
|
||||||
|
if chapter_index != chapter_count:
|
||||||
|
print("\n\n")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='Udemy Downloader')
|
parser = argparse.ArgumentParser(description='Udemy Downloader')
|
||||||
parser.add_argument("-c",
|
parser.add_argument("-c",
|
||||||
@ -1282,6 +1357,13 @@ if __name__ == "__main__":
|
|||||||
help=
|
help=
|
||||||
"If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)",
|
"If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--info",
|
||||||
|
dest="info",
|
||||||
|
action="store_true",
|
||||||
|
help=
|
||||||
|
"If specified, only course information will be printed, nothing will be downloaded",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--save-to-file",
|
"--save-to-file",
|
||||||
@ -1360,8 +1442,9 @@ if __name__ == "__main__":
|
|||||||
access_token = os.getenv("UDEMY_BEARER")
|
access_token = os.getenv("UDEMY_BEARER")
|
||||||
|
|
||||||
udemy = Udemy(access_token)
|
udemy = Udemy(access_token)
|
||||||
print("> Fetching course information, this may take a minute...")
|
|
||||||
|
|
||||||
|
print("> Fetching course information, this may take a minute...")
|
||||||
|
if not args.load_from_file:
|
||||||
course_id, course_info = udemy._extract_course_info(args.course_url)
|
course_id, course_info = udemy._extract_course_info(args.course_url)
|
||||||
print("> Course information retrieved!")
|
print("> Course information retrieved!")
|
||||||
if course_info and isinstance(course_info, dict):
|
if course_info and isinstance(course_info, dict):
|
||||||
@ -1374,6 +1457,9 @@ if __name__ == "__main__":
|
|||||||
course_json = json.loads(
|
course_json = json.loads(
|
||||||
open(os.path.join(os.getcwd(), "saved", "course_content.json"),
|
open(os.path.join(os.getcwd(), "saved", "course_content.json"),
|
||||||
'r').read())
|
'r').read())
|
||||||
|
title = course_json.get("title")
|
||||||
|
course_title = course_json.get("published_title")
|
||||||
|
portal_name = course_json.get("portal_name")
|
||||||
else:
|
else:
|
||||||
course_json = udemy._extract_course_json(args.course_url, course_id,
|
course_json = udemy._extract_course_json(args.course_url, course_id,
|
||||||
portal_name)
|
portal_name)
|
||||||
@ -1390,6 +1476,9 @@ if __name__ == "__main__":
|
|||||||
if args.load_from_file:
|
if args.load_from_file:
|
||||||
_udemy = json.loads(
|
_udemy = json.loads(
|
||||||
open(os.path.join(os.getcwd(), "saved", "_udemy.json")).read())
|
open(os.path.join(os.getcwd(), "saved", "_udemy.json")).read())
|
||||||
|
if args.info:
|
||||||
|
course_info(_udemy)
|
||||||
|
else:
|
||||||
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
||||||
caption_locale, keep_vtt, access_token)
|
caption_locale, keep_vtt, access_token)
|
||||||
else:
|
else:
|
||||||
@ -1620,5 +1709,8 @@ if __name__ == "__main__":
|
|||||||
f.close()
|
f.close()
|
||||||
print("Saved parsed data to json")
|
print("Saved parsed data to json")
|
||||||
|
|
||||||
|
if args.info:
|
||||||
|
course_info(_udemy)
|
||||||
|
else:
|
||||||
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
||||||
caption_locale, keep_vtt, access_token)
|
caption_locale, keep_vtt, access_token)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user