Merge branch 'feat-mkv'

This commit is contained in:
Puyodead1 2021-08-08 14:21:56 -04:00
commit e835ab6eb1
2 changed files with 72 additions and 58 deletions

View File

@ -28,7 +28,7 @@ Windows is the primary development OS, but I've made an effort to support Linux
# Requirements # Requirements
1. You would need to download `ffmpeg`, `aria2c`, `mp4decrypt` (from Bento4 SDK) and ``yt-dlp`` (``pip install yt-dlp``). Ensure they are in the system path (typing their name in cmd should invoke them). 1. You would need to download `ffmpeg`, `aria2c`, `mp4decrypt` (from Bento4 SDK) and `yt-dlp` (`pip install yt-dlp`). Ensure they are in the system path (typing their name in cmd should invoke them).
# Usage # Usage
@ -67,8 +67,9 @@ You can now run the program, see the examples below. The course will download to
# Advanced Usage # Advanced Usage
``` ```
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--skip-lectures] [--download-assets] usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cc CONCURRENT_CONNECTIONS]
[--download-captions] [--keep-vtt] [--skip-hls] [--info] [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls] [--use_mkv]
[--info]
Udemy Downloader Udemy Downloader
@ -79,20 +80,30 @@ optional arguments:
-b BEARER_TOKEN, --bearer BEARER_TOKEN -b BEARER_TOKEN, --bearer BEARER_TOKEN
The Bearer token to use The Bearer token to use
-q QUALITY, --quality QUALITY -q QUALITY, --quality QUALITY
Download specific video quality. If the requested quality isn't available, the closest quality will be used. If not Download specific video quality. If the requested quality isn't available, the closest quality
specified, the best quality will be downloaded for each lecture will be used. If not specified, the best quality will be downloaded for each lecture
-l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en') -l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is
-cd CONCURRENT_DOWNLOADS, --concurrent-downloads CONCURRENT_DOWNLOADS 'en')
The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-50) -cc CONCURRENT_CONNECTIONS, --concurrent-connections CONCURRENT_CONNECTIONS
The number of maximum concurrent connections for segments (HLS and DASH, must be a number
1-30)
--skip-lectures If specified, lectures won't be downloaded --skip-lectures If specified, lectures won't be downloaded
--download-assets If specified, lecture assets will be downloaded --download-assets If specified, lecture assets will be downloaded
--download-captions If specified, captions will be downloaded --download-captions If specified, captions will be downloaded
--keep-vtt If specified, .vtt files won't be removed --keep-vtt If specified, .vtt files won't be removed
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm --skip-hls If specified, HLS streams will be skipped (faster fetching, HLS streams usually contain 1080p
lectures) quality for non-DRM lectures)
--use_mkv If specified, MKV container will be used instead of MP4, subtitles will be muxed (if subtitles
are requested)
--info If specified, only course information will be printed, nothing will be downloaded --info If specified, only course information will be printed, nothing will be downloaded
``` ```
<br>
### NOTE: Loading bearer token from a .env file or system environment is deprecated and may break in the future, pass the token as a command argument instead!
## Command Examples:
- Passing a Bearer Token and Course ID as an argument - Passing a Bearer Token and Course ID as an argument
- `python main.py -c <Course URL> -b <Bearer Token>` - `python main.py -c <Course URL> -b <Bearer Token>`
- `python main.py -c https://www.udemy.com/courses/myawesomecourse -b <Bearer Token>` - `python main.py -c https://www.udemy.com/courses/myawesomecourse -b <Bearer Token>`
@ -121,7 +132,7 @@ optional arguments:
- Print course information only: - Print course information only:
- `python main.py -c <Course URL> --info` - `python main.py -c <Course URL> --info`
- Specify max number of concurrent downloads: - Specify max number of concurrent downloads:
- `python main.py -c <Course URL> --concurrent-downloads 20` - `python main.py -c <Course URL> --concurrent-connections 20`
- `python main.py -c <Course URL> -cd 20` - `python main.py -c <Course URL> -cd 20`
# Credits # Credits

97
main.py
View File

@ -23,8 +23,23 @@ from _version import __version__
home_dir = os.getcwd() home_dir = os.getcwd()
download_dir = os.path.join(os.getcwd(), "out_dir") download_dir = os.path.join(os.getcwd(), "out_dir")
keyfile_path = os.path.join(os.getcwd(), "keyfile.json") keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
keys = None
retry = 3 retry = 3
downloader = None downloader = None
dl_assets = False
skip_lectures = False
dl_captions = False
caption_locale = "en"
quality = None
bearer_token = None
portal_name = None
course_name = None
keep_vtt = False
skip_hls = False
use_mkv = False
concurrent_connections = 10
access_token = None
HEADERS = { HEADERS = {
"Origin": "www.udemy.com", "Origin": "www.udemy.com",
"User-Agent": "User-Agent":
@ -779,11 +794,6 @@ class UdemyAuth(object):
if not os.path.exists(download_dir): if not os.path.exists(download_dir):
os.makedirs(download_dir) os.makedirs(download_dir)
# Get the keys
with open(keyfile_path, 'r') as keyfile:
keyfile = keyfile.read()
keyfile = json.loads(keyfile)
def durationtoseconds(period): def durationtoseconds(period):
""" """
@ -843,7 +853,7 @@ def decrypt(kid, in_filepath, out_filepath):
""" """
print("> Decrypting, this might take a minute...") print("> Decrypting, this might take a minute...")
try: try:
key = keyfile[kid.lower()] key = keys[kid.lower()]
if (os.name == "nt"): if (os.name == "nt"):
os.system(f"mp4decrypt --key 1:%s \"%s\" \"%s\"" % os.system(f"mp4decrypt --key 1:%s \"%s\" \"%s\"" %
(key, in_filepath, out_filepath)) (key, in_filepath, out_filepath))
@ -984,7 +994,7 @@ def download_aria(url, file_dir, filename):
print("Return code: " + str(ret_code)) print("Return code: " + str(ret_code))
def process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries=0): def process_caption(caption, lecture_title, lecture_dir, tries=0):
filename = f"%s_%s.%s" % (sanitize(lecture_title), caption.get("language"), filename = f"%s_%s.%s" % (sanitize(lecture_title), caption.get("language"),
caption.get("extension")) caption.get("extension"))
filename_no_ext = f"%s_%s" % (sanitize(lecture_title), filename_no_ext = f"%s_%s" % (sanitize(lecture_title),
@ -1020,8 +1030,7 @@ def process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries=0):
print(f" > Error converting caption: {e}") print(f" > Error converting caption: {e}")
def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_token, def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
concurrent_connections, chapter_dir):
lecture_title = lecture.get("lecture_title") lecture_title = lecture.get("lecture_title")
is_encrypted = lecture.get("is_encrypted") is_encrypted = lecture.get("is_encrypted")
lecture_sources = lecture.get("video_sources") lecture_sources = lecture.get("video_sources")
@ -1037,8 +1046,7 @@ def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_to
lecture_title) lecture_title)
handle_segments(source.get("download_url"), handle_segments(source.get("download_url"),
source.get( source.get(
"format_id"), lecture_title, lecture_path, lecture_file_name, "format_id"), lecture_title, lecture_path, lecture_file_name, chapter_dir)
concurrent_connections, chapter_dir)
else: else:
print(f" > Lecture '%s' is missing media links" % print(f" > Lecture '%s' is missing media links" %
lecture_title) lecture_title)
@ -1087,8 +1095,7 @@ def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_to
print(" > Missing sources for lecture", lecture) print(" > Missing sources for lecture", lecture)
def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, def parse_new(_udemy):
caption_locale, keep_vtt, access_token, concurrent_connections):
total_chapters = _udemy.get("total_chapters") total_chapters = _udemy.get("total_chapters")
total_lectures = _udemy.get("total_lectures") total_lectures = _udemy.get("total_lectures")
print(f"Chapter(s) ({total_chapters})") print(f"Chapter(s) ({total_chapters})")
@ -1147,9 +1154,8 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
print(" > Failed to write html file: ", e) print(" > Failed to write html file: ", e)
continue continue
else: else:
process_lecture(lecture, lecture_path, lecture_file_name, process_lecture(lecture, lecture_path,
quality, access_token, lecture_file_name, chapter_dir)
concurrent_connections, chapter_dir)
if dl_assets: if dl_assets:
assets = lecture.get("assets") assets = lecture.get("assets")
@ -1218,8 +1224,7 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
for subtitle in subtitles: for subtitle in subtitles:
lang = subtitle.get("language") lang = subtitle.get("language")
if lang == caption_locale or caption_locale == "all": if lang == caption_locale or caption_locale == "all":
process_caption(subtitle, lecture_title, chapter_dir, process_caption(subtitle, lecture_title, chapter_dir)
keep_vtt)
def _print_course_info(course_data): def _print_course_info(course_data):
@ -1331,11 +1336,11 @@ if __name__ == "__main__":
help="The language to download for captions, specify 'all' to download all captions (Default is 'en')", help="The language to download for captions, specify 'all' to download all captions (Default is 'en')",
) )
parser.add_argument( parser.add_argument(
"-cd", "-cc",
"--concurrent-downloads", "--concurrent-connections",
dest="concurrent_downloads", dest="concurrent_connections",
type=int, type=int,
help="The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30)", help="The number of maximum concurrent connections for segments (HLS and DASH, must be a number 1-30)",
) )
parser.add_argument( parser.add_argument(
"--skip-lectures", "--skip-lectures",
@ -1365,7 +1370,13 @@ if __name__ == "__main__":
"--skip-hls", "--skip-hls",
dest="skip_hls", dest="skip_hls",
action="store_true", action="store_true",
help="If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)", help="If specified, HLS streams will be skipped (faster fetching, HLS streams usually contain 1080p quality for non-DRM lectures)",
)
parser.add_argument(
"--use_mkv",
dest="use_mkv",
action="store_true",
help="If specified, MKV container will be used instead of MP4, subtitles will be muxed (if subtitles are requested)",
) )
parser.add_argument( parser.add_argument(
"--info", "--info",
@ -1389,18 +1400,6 @@ if __name__ == "__main__":
parser.add_argument("-v", "--version", action="version", parser.add_argument("-v", "--version", action="version",
version='You are running version {version}'.format(version=get_version_string())) version='You are running version {version}'.format(version=get_version_string()))
dl_assets = False
skip_lectures = False
dl_captions = False
caption_locale = "en"
quality = None
bearer_token = None
portal_name = None
course_name = None
keep_vtt = False
skip_hls = False
concurrent_downloads = 10
args = parser.parse_args() args = parser.parse_args()
if args.download_assets: if args.download_assets:
dl_assets = True dl_assets = True
@ -1416,15 +1415,17 @@ if __name__ == "__main__":
keep_vtt = args.keep_vtt keep_vtt = args.keep_vtt
if args.skip_hls: if args.skip_hls:
skip_hls = args.skip_hls skip_hls = args.skip_hls
if args.concurrent_downloads: if args.use_mkv:
concurrent_downloads = args.concurrent_downloads use_mkv = args.use_mkv
if args.concurrent_connections:
concurrent_connections = args.concurrent_connections
if concurrent_downloads <= 0: if concurrent_connections <= 0:
# if the user gave a number that is less than or equal to 0, set cc to default of 10 # if the user gave a number that is less than or equal to 0, set cc to default of 10
concurrent_downloads = 10 concurrent_connections = 10
elif concurrent_downloads > 30: elif concurrent_connections > 30:
# if the user gave a number thats greater than 30, set cc to the max of 30 # if the user gave a number thats greater than 30, set cc to the max of 30
concurrent_downloads = 30 concurrent_connections = 30
aria_ret_val = check_for_aria() aria_ret_val = check_for_aria()
if not aria_ret_val: if not aria_ret_val:
@ -1452,11 +1453,15 @@ if __name__ == "__main__":
"> 'save_to_file' was specified, data will be saved to json files") "> 'save_to_file' was specified, data will be saved to json files")
if not os.path.isfile(keyfile_path): if not os.path.isfile(keyfile_path):
print("> Keyfile not found! Did you rename the file correctly?") print(" Keyfile not found! Did you rename the file correctly?")
sys.exit(1) sys.exit(1)
# Read keys
with open(keyfile_path, 'r') as keyfile:
keyfile = keyfile.read()
keyfile = json.loads(keyfile)
load_dotenv() load_dotenv()
access_token = None
if args.bearer_token: if args.bearer_token:
access_token = args.bearer_token access_token = args.bearer_token
else: else:
@ -1500,9 +1505,7 @@ if __name__ == "__main__":
if args.info: if args.info:
_print_course_info(_udemy) _print_course_info(_udemy)
else: else:
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, parse_new(_udemy)
caption_locale, keep_vtt, access_token,
concurrent_downloads)
else: else:
_udemy = {} _udemy = {}
_udemy["access_token"] = access_token _udemy["access_token"] = access_token
@ -1737,4 +1740,4 @@ if __name__ == "__main__":
else: else:
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
caption_locale, keep_vtt, access_token, caption_locale, keep_vtt, access_token,
concurrent_downloads) concurrent_connections)