Feat: Info argument

+ Added info argument to print course information
+ Updated spacing of some text to be more 'tree' like and easier to read
This commit is contained in:
Puyodead1 2021-05-28 16:59:52 -04:00
parent 66aad0dc50
commit 1ad4f1edde
3 changed files with 134 additions and 38 deletions

1
.gitignore vendored
View File

@ -122,3 +122,4 @@ manifest.mpd
.vscode
saved
*.aria2
info.py

View File

@ -69,7 +69,7 @@ You can now run `python main.py` to start downloading. The course will download
```
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
[--keep-vtt] [--skip-hls]
[--keep-vtt] [--skip-hls] [--info]
Udemy Downloader
@ -89,6 +89,7 @@ optional arguments:
--keep-vtt If specified, .vtt files won't be removed
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm
lectures)
--info If specified, only course information will be printed, nothing will be downloaded
```
- Passing a Bearer Token and Course ID as an argument
@ -116,6 +117,8 @@ optional arguments:
- `python main.py -c <Course URL> --download-captions --keep-vtt`
- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
- `python main.py -c <Course URL> --skip-hls`
- Print course information only:
- `python main.py -c <Course URL> --info`
# Credits

116
main.py
View File

@ -1040,8 +1040,6 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
if is_encrypted:
if len(lecture_audio_sources) > 0 and len(lecture_video_sources) > 0:
print(f"> Lecture '%s' has DRM, attempting to download" %
lecture_title)
lecture_working_dir = os.path.join(working_dir,
str(lecture.get("asset_id")))
@ -1055,16 +1053,19 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
key=lambda x: abs(int(x.get("height")) - quality))
if not os.path.exists(lecture_working_dir):
os.mkdir(lecture_working_dir)
print(f" > Lecture '%s' has DRM, attempting to download" %
lecture_title)
handle_segments(video_source, audio_source, lecture_title,
lecture_working_dir, lecture_path)
else:
print("> Lecture '%s' is already downloaded, skipping..." %
print(
" > Lecture '%s' is already downloaded, skipping..." %
lecture_title)
else:
print(f"> Lecture '%s' is missing media links" % lecture_title)
print(f" > Lecture '%s' is missing media links" %
lecture_title)
print(len(lecture_audio_sources), len(lecture_video_sources))
else:
print("> Lecture doesn't have DRM, attempting to download...")
sources = lecture.get("sources")
sources = sorted(sources,
key=lambda x: int(x.get("height")),
@ -1075,14 +1076,17 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
if not os.path.exists(lecture_working_dir):
os.mkdir(lecture_working_dir)
if not os.path.isfile(lecture_path):
print(
" > Lecture doesn't have DRM, attempting to download..."
)
source = sources[0] # first index is the best quality
if isinstance(quality, int):
source = min(
sources,
key=lambda x: abs(int(x.get("height")) - quality))
try:
print("====== Selected quality: ", source.get("type"),
source.get("height"))
print(" ====== Selected quality: ",
source.get("type"), source.get("height"))
url = source.get("download_url")
source_type = source.get("type")
if source_type == "hls":
@ -1098,10 +1102,11 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
except Exception as e:
print(f" > Error downloading lecture: ", e)
else:
print("> Lecture '%s' is already downloaded, skipping..." %
print(
" > Lecture '%s' is already downloaded, skipping..." %
lecture_title)
else:
print("Missing sources for lecture", lecture)
print(" > Missing sources for lecture", lecture)
def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
@ -1131,7 +1136,8 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
lecture_index = lecture.get("lecture_index")
extension = lecture.get("extension")
print(f"> Processing lecture {lecture_index} of {total_lectures}")
print(
f" > Processing lecture {lecture_index} of {total_lectures}")
if not skip_lectures:
if extension == "html":
html_content = lecture.get("html_content").encode(
@ -1143,7 +1149,7 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
f.write(html_content)
f.close()
except Exception as e:
print("Failed to write html file: ", e)
print(" > Failed to write html file: ", e)
continue
else:
lecture_path = os.path.join(
@ -1220,6 +1226,75 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
keep_vtt)
def course_info(course_data):
print("\n\n\n\n")
course_title = course_data.get("title")
chapter_count = course_data.get("total_chapters")
lecture_count = course_data.get("total_lectures")
print("> Course: {}".format(course_title))
print("> Total Chapters: {}".format(chapter_count))
print("> Total Lectures: {}".format(lecture_count))
print("\n")
chapters = course_data.get("chapters")
for chapter in chapters:
chapter_title = chapter.get("chapter_title")
chapter_index = chapter.get("chapter_index")
chapter_lecture_count = chapter.get("lecture_count")
chapter_lectures = chapter.get("lectures")
print("> Chapter: {} ({} of {})".format(chapter_title, chapter_index,
chapter_count))
for lecture in chapter_lectures:
lecture_title = lecture.get("lecture_title")
lecture_index = lecture.get("index")
lecture_asset_count = lecture.get("assets_count")
lecture_is_encrypted = lecture.get("is_encrypted")
lecture_subtitles = lecture.get("subtitles")
lecture_extension = lecture.get("extension")
lecture_sources = lecture.get("sources")
lecture_video_sources = lecture.get("video_sources")
if lecture_sources:
lecture_sources = sorted(lecture.get("sources"),
key=lambda x: int(x.get("height")),
reverse=True)
if lecture_video_sources:
lecture_video_sources = sorted(
lecture.get("video_sources"),
key=lambda x: int(x.get("height")),
reverse=True)
if lecture_is_encrypted:
lecture_qualities = [
"{}@{}x{}".format(x.get("type"), x.get("width"),
x.get("height"))
for x in lecture_video_sources
]
elif not lecture_is_encrypted and lecture_sources:
lecture_qualities = [
"{}@{}x{}".format(x.get("type"), x.get("height"),
x.get("width")) for x in lecture_sources
]
if lecture_extension:
continue
print(" > Lecture: {} ({} of {})".format(lecture_title,
lecture_index,
chapter_lecture_count))
print(" > DRM: {}".format(lecture_is_encrypted))
print(" > Asset Count: {}".format(lecture_asset_count))
print(" > Captions: {}".format(
[x.get("language") for x in lecture_subtitles]))
print(" > Qualities: {}".format(lecture_qualities))
if chapter_index != chapter_count:
print("\n\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Udemy Downloader')
parser.add_argument("-c",
@ -1282,6 +1357,13 @@ if __name__ == "__main__":
help=
"If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)",
)
parser.add_argument(
"--info",
dest="info",
action="store_true",
help=
"If specified, only course information will be printed, nothing will be downloaded",
)
parser.add_argument(
"--save-to-file",
@ -1360,8 +1442,9 @@ if __name__ == "__main__":
access_token = os.getenv("UDEMY_BEARER")
udemy = Udemy(access_token)
print("> Fetching course information, this may take a minute...")
print("> Fetching course information, this may take a minute...")
if not args.load_from_file:
course_id, course_info = udemy._extract_course_info(args.course_url)
print("> Course information retrieved!")
if course_info and isinstance(course_info, dict):
@ -1374,6 +1457,9 @@ if __name__ == "__main__":
course_json = json.loads(
open(os.path.join(os.getcwd(), "saved", "course_content.json"),
'r').read())
title = course_json.get("title")
course_title = course_json.get("published_title")
portal_name = course_json.get("portal_name")
else:
course_json = udemy._extract_course_json(args.course_url, course_id,
portal_name)
@ -1390,6 +1476,9 @@ if __name__ == "__main__":
if args.load_from_file:
_udemy = json.loads(
open(os.path.join(os.getcwd(), "saved", "_udemy.json")).read())
if args.info:
course_info(_udemy)
else:
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
caption_locale, keep_vtt, access_token)
else:
@ -1620,5 +1709,8 @@ if __name__ == "__main__":
f.close()
print("Saved parsed data to json")
if args.info:
course_info(_udemy)
else:
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
caption_locale, keep_vtt, access_token)