diff --git a/README.md b/README.md index 0c0955e..e6cb566 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Windows is the primary development OS, but I've made an effort to support Linux # Requirements -1. You would need to download `ffmpeg`, `aria2c`, `mp4decrypt` (from Bento4 SDK) and ``yt-dlp`` (``pip install yt-dlp``). Ensure they are in the system path (typing their name in cmd should invoke them). +1. You would need to download `ffmpeg`, `aria2c`, `mp4decrypt` (from Bento4 SDK) and `yt-dlp` (`pip install yt-dlp`). Ensure they are in the system path (typing their name in cmd should invoke them). # Usage @@ -67,8 +67,9 @@ You can now run the program, see the examples below. The course will download to # Advanced Usage ``` -usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--skip-lectures] [--download-assets] - [--download-captions] [--keep-vtt] [--skip-hls] [--info] +usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cc CONCURRENT_CONNECTIONS] + [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls] [--use_mkv] + [--info] Udemy Downloader @@ -79,20 +80,30 @@ optional arguments: -b BEARER_TOKEN, --bearer BEARER_TOKEN The Bearer token to use -q QUALITY, --quality QUALITY - Download specific video quality. If the requested quality isn't available, the closest quality will be used. If not - specified, the best quality will be downloaded for each lecture - -l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en') - -cd CONCURRENT_DOWNLOADS, --concurrent-downloads CONCURRENT_DOWNLOADS - The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-50) + Download specific video quality. If the requested quality isn't available, the closest quality + will be used. If not specified, the best quality will be downloaded for each lecture + -l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is + 'en') + -cc CONCURRENT_CONNECTIONS, --concurrent-connections CONCURRENT_CONNECTIONS + The number of maximum concurrent connections for segments (HLS and DASH, must be a number + 1-30) --skip-lectures If specified, lectures won't be downloaded --download-assets If specified, lecture assets will be downloaded --download-captions If specified, captions will be downloaded --keep-vtt If specified, .vtt files won't be removed - --skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm - lectures) + --skip-hls If specified, HLS streams will be skipped (faster fetching, HLS streams usually contain 1080p + quality for non-DRM lectures) + --use_mkv If specified, MKV container will be used instead of MP4, subtitles will be muxed (if subtitles + are requested) --info If specified, only course information will be printed, nothing will be downloaded ``` +
+ +### NOTE: Loading bearer token from a .env file or system environment is deprecated and may break in the future, pass the token as a command argument instead! + +## Command Examples: + - Passing a Bearer Token and Course ID as an argument - `python main.py -c -b ` - `python main.py -c https://www.udemy.com/courses/myawesomecourse -b ` @@ -121,7 +132,7 @@ optional arguments: - Print course information only: - `python main.py -c --info` - Specify max number of concurrent downloads: - - `python main.py -c --concurrent-downloads 20` + - `python main.py -c --concurrent-connections 20` - `python main.py -c -cd 20` # Credits diff --git a/main.py b/main.py index cfbab22..a086ef9 100644 --- a/main.py +++ b/main.py @@ -23,8 +23,23 @@ from _version import __version__ home_dir = os.getcwd() download_dir = os.path.join(os.getcwd(), "out_dir") keyfile_path = os.path.join(os.getcwd(), "keyfile.json") +keys = None retry = 3 downloader = None +dl_assets = False +skip_lectures = False +dl_captions = False +caption_locale = "en" +quality = None +bearer_token = None +portal_name = None +course_name = None +keep_vtt = False +skip_hls = False +use_mkv = False +concurrent_connections = 10 +access_token = None + HEADERS = { "Origin": "www.udemy.com", "User-Agent": @@ -779,11 +794,6 @@ class UdemyAuth(object): if not os.path.exists(download_dir): os.makedirs(download_dir) -# Get the keys -with open(keyfile_path, 'r') as keyfile: - keyfile = keyfile.read() -keyfile = json.loads(keyfile) - def durationtoseconds(period): """ @@ -843,7 +853,7 @@ def decrypt(kid, in_filepath, out_filepath): """ print("> Decrypting, this might take a minute...") try: - key = keyfile[kid.lower()] + key = keys[kid.lower()] if (os.name == "nt"): os.system(f"mp4decrypt --key 1:%s \"%s\" \"%s\"" % (key, in_filepath, out_filepath)) @@ -984,7 +994,7 @@ def download_aria(url, file_dir, filename): print("Return code: " + str(ret_code)) -def process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries=0): +def process_caption(caption, lecture_title, lecture_dir, tries=0): filename = f"%s_%s.%s" % (sanitize(lecture_title), caption.get("language"), caption.get("extension")) filename_no_ext = f"%s_%s" % (sanitize(lecture_title), @@ -1020,8 +1030,7 @@ def process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries=0): print(f" > Error converting caption: {e}") -def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_token, - concurrent_connections, chapter_dir): +def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir): lecture_title = lecture.get("lecture_title") is_encrypted = lecture.get("is_encrypted") lecture_sources = lecture.get("video_sources") @@ -1037,8 +1046,7 @@ def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_to lecture_title) handle_segments(source.get("download_url"), source.get( - "format_id"), lecture_title, lecture_path, lecture_file_name, - concurrent_connections, chapter_dir) + "format_id"), lecture_title, lecture_path, lecture_file_name, chapter_dir) else: print(f" > Lecture '%s' is missing media links" % lecture_title) @@ -1087,8 +1095,7 @@ def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_to print(" > Missing sources for lecture", lecture) -def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, - caption_locale, keep_vtt, access_token, concurrent_connections): +def parse_new(_udemy): total_chapters = _udemy.get("total_chapters") total_lectures = _udemy.get("total_lectures") print(f"Chapter(s) ({total_chapters})") @@ -1147,9 +1154,8 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, print(" > Failed to write html file: ", e) continue else: - process_lecture(lecture, lecture_path, lecture_file_name, - quality, access_token, - concurrent_connections, chapter_dir) + process_lecture(lecture, lecture_path, + lecture_file_name, chapter_dir) if dl_assets: assets = lecture.get("assets") @@ -1218,8 +1224,7 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, for subtitle in subtitles: lang = subtitle.get("language") if lang == caption_locale or caption_locale == "all": - process_caption(subtitle, lecture_title, chapter_dir, - keep_vtt) + process_caption(subtitle, lecture_title, chapter_dir) def _print_course_info(course_data): @@ -1331,11 +1336,11 @@ if __name__ == "__main__": help="The language to download for captions, specify 'all' to download all captions (Default is 'en')", ) parser.add_argument( - "-cd", - "--concurrent-downloads", - dest="concurrent_downloads", + "-cc", + "--concurrent-connections", + dest="concurrent_connections", type=int, - help="The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30)", + help="The number of maximum concurrent connections for segments (HLS and DASH, must be a number 1-30)", ) parser.add_argument( "--skip-lectures", @@ -1365,7 +1370,13 @@ if __name__ == "__main__": "--skip-hls", dest="skip_hls", action="store_true", - help="If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)", + help="If specified, HLS streams will be skipped (faster fetching, HLS streams usually contain 1080p quality for non-DRM lectures)", + ) + parser.add_argument( + "--use_mkv", + dest="use_mkv", + action="store_true", + help="If specified, MKV container will be used instead of MP4, subtitles will be muxed (if subtitles are requested)", ) parser.add_argument( "--info", @@ -1389,18 +1400,6 @@ if __name__ == "__main__": parser.add_argument("-v", "--version", action="version", version='You are running version {version}'.format(version=get_version_string())) - dl_assets = False - skip_lectures = False - dl_captions = False - caption_locale = "en" - quality = None - bearer_token = None - portal_name = None - course_name = None - keep_vtt = False - skip_hls = False - concurrent_downloads = 10 - args = parser.parse_args() if args.download_assets: dl_assets = True @@ -1416,15 +1415,17 @@ if __name__ == "__main__": keep_vtt = args.keep_vtt if args.skip_hls: skip_hls = args.skip_hls - if args.concurrent_downloads: - concurrent_downloads = args.concurrent_downloads + if args.use_mkv: + use_mkv = args.use_mkv + if args.concurrent_connections: + concurrent_connections = args.concurrent_connections - if concurrent_downloads <= 0: + if concurrent_connections <= 0: # if the user gave a number that is less than or equal to 0, set cc to default of 10 - concurrent_downloads = 10 - elif concurrent_downloads > 30: + concurrent_connections = 10 + elif concurrent_connections > 30: # if the user gave a number thats greater than 30, set cc to the max of 30 - concurrent_downloads = 30 + concurrent_connections = 30 aria_ret_val = check_for_aria() if not aria_ret_val: @@ -1452,11 +1453,15 @@ if __name__ == "__main__": "> 'save_to_file' was specified, data will be saved to json files") if not os.path.isfile(keyfile_path): - print("> Keyfile not found! Did you rename the file correctly?") + print("❗ Keyfile not found! Did you rename the file correctly? ❗") sys.exit(1) + # Read keys + with open(keyfile_path, 'r') as keyfile: + keyfile = keyfile.read() + keyfile = json.loads(keyfile) + load_dotenv() - access_token = None if args.bearer_token: access_token = args.bearer_token else: @@ -1500,9 +1505,7 @@ if __name__ == "__main__": if args.info: _print_course_info(_udemy) else: - parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, - caption_locale, keep_vtt, access_token, - concurrent_downloads) + parse_new(_udemy) else: _udemy = {} _udemy["access_token"] = access_token @@ -1737,4 +1740,4 @@ if __name__ == "__main__": else: parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, caption_locale, keep_vtt, access_token, - concurrent_downloads) + concurrent_connections)