some refactoring

This commit is contained in:
Puyodead1 2023-10-27 09:57:07 -04:00 committed by Puyodead1
parent 7f522ebebb
commit ef9d2a6be3
No known key found for this signature in database
GPG Key ID: A4FA4FEC0DD353FC
3 changed files with 26 additions and 225 deletions

View File

@ -86,7 +86,7 @@ You can now run the program, see the examples below. The course will download to
# Advanced Usage # Advanced Usage
``` ```
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--skip-lectures] [--download-assets]
[--download-captions] [--download-quizzes] [--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--download-captions] [--download-quizzes] [--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file]
[--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET]
[--use-nvenc] [--out OUT] [--continue-lecture-numbers] [--use-nvenc] [--out OUT] [--continue-lecture-numbers]
@ -105,7 +105,6 @@ options:
-l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en') -l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en')
-cd CONCURRENT_DOWNLOADS, --concurrent-downloads CONCURRENT_DOWNLOADS -cd CONCURRENT_DOWNLOADS, --concurrent-downloads CONCURRENT_DOWNLOADS
The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30) The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30)
--disable-ipv6 If specified, ipv6 will be disabled in aria2
--skip-lectures If specified, lectures won't be downloaded --skip-lectures If specified, lectures won't be downloaded
--download-assets If specified, lecture assets will be downloaded --download-assets If specified, lecture assets will be downloaded
--download-captions If specified, captions will be downloaded --download-captions If specified, captions will be downloaded
@ -187,16 +186,6 @@ options:
- `python main.py -c <Course URL> --continue-lecture-numbers` - `python main.py -c <Course URL> --continue-lecture-numbers`
- `python main.py -c <Course URL> -n` - `python main.py -c <Course URL> -n`
If you encounter errors while downloading such as
`errorCode=1 Network problem has occurred. cause:Unknown socket error 10051 (0x2743)`
or
`errorCode=1 Network problem has occurred. cause:A socket operation was attempted to an unreachable network.`
Then try disabling ipv6 in aria2 using the `--disable-ipv6` option
# Support # Support
if you want help using the program, join my [Discord](https://discord.gg/tMzrSxQ) server or use [GitHub Issues](https://github.com/Puyodead1/udemy-downloader/issues) if you want help using the program, join my [Discord](https://discord.gg/tMzrSxQ) server or use [GitHub Issues](https://github.com/Puyodead1/udemy-downloader/issues)

View File

@ -1,3 +1,3 @@
{ {
"KeyID": "key" "the key id goes here": "the key goes here"
} }

236
main.py
View File

@ -45,7 +45,6 @@ course_name = None
keep_vtt = False keep_vtt = False
skip_hls = False skip_hls = False
concurrent_downloads = 10 concurrent_downloads = 10
disable_ipv6 = False
save_to_file = None save_to_file = None
load_from_file = None load_from_file = None
course_url = None course_url = None
@ -72,7 +71,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]):
# this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist # this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist
def pre_run(): def pre_run():
global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser, is_subscription_course, DOWNLOAD_DIR, use_continuous_lecture_numbers global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser, is_subscription_course, DOWNLOAD_DIR, use_continuous_lecture_numbers
# make sure the logs directory exists # make sure the logs directory exists
if not os.path.exists(LOG_DIR_PATH): if not os.path.exists(LOG_DIR_PATH):
@ -108,12 +107,6 @@ def pre_run():
type=int, type=int,
help="The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30)", help="The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30)",
) )
parser.add_argument(
"--disable-ipv6",
dest="disable_ipv6",
action="store_true",
help="If specified, ipv6 will be disabled in aria2",
)
parser.add_argument( parser.add_argument(
"--skip-lectures", "--skip-lectures",
dest="skip_lectures", dest="skip_lectures",
@ -259,8 +252,6 @@ def pre_run():
elif concurrent_downloads > 30: elif concurrent_downloads > 30:
# if the user gave a number thats greater than 30, set cc to the max of 30 # if the user gave a number thats greater than 30, set cc to the max of 30
concurrent_downloads = 30 concurrent_downloads = 30
if args.disable_ipv6:
disable_ipv6 = args.disable_ipv6
if args.load_from_file: if args.load_from_file:
load_from_file = args.load_from_file load_from_file = args.load_from_file
if args.save_to_file: if args.save_to_file:
@ -665,10 +656,6 @@ class Udemy:
format_id = results.get("format_id") format_id = results.get("format_id")
best_audio_format_id = format_id.split("+")[1] best_audio_format_id = format_id.split("+")[1]
# I forget what this was for
# best_audio = next((x for x in formats
# if x.get("format_id") == best_audio_format_id),
# None)
for f in formats: for f in formats:
if "video" in f.get("format_note"): if "video" in f.get("format_note"):
# is a video stream # is a video stream
@ -1122,92 +1109,6 @@ class Session(object):
return return
# Thanks to a great open source utility youtube-dl ..
class HTMLAttributeParser(compat_HTMLParser): # pylint: disable=W
"""Trivial HTML parser to gather the attributes for a single element"""
def __init__(self):
self.attrs = {}
compat_HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.attrs = dict(attrs)
def extract_attributes(html_element):
"""Given a string for an HTML element such as
<el
a="foo" B="bar" c="&98;az" d=boz
empty= noval entity="&amp;"
sq='"' dq="'"
>
Decode and return a dictionary of attributes.
{
'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
'empty': '', 'noval': None, 'entity': '&',
'sq': '"', 'dq': '\''
}.
NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
"""
parser = HTMLAttributeParser()
try:
parser.feed(html_element)
parser.close()
except Exception: # pylint: disable=W
pass
return parser.attrs
def hidden_inputs(html):
html = re.sub(r"<!--(?:(?!<!--).)*-->", "", html)
hidden_inputs = {} # pylint: disable=W
for entry in re.findall(r"(?i)(<input[^>]+>)", html):
attrs = extract_attributes(entry)
if not entry:
continue
if attrs.get("type") not in ("hidden", "submit"):
continue
name = attrs.get("name") or attrs.get("id")
value = attrs.get("value")
if name and value is not None:
hidden_inputs[name] = value
return hidden_inputs
def search_regex(pattern, string, name, default=object(), fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
In case of failure return a default value or raise a WARNING or a
RegexNotFoundError, depending on fatal, specifying the field name.
"""
if isinstance(pattern, str):
mobj = re.search(pattern, string, flags)
else:
for p in pattern:
mobj = re.search(p, string, flags)
if mobj:
break
_name = name
if mobj:
if group is None:
# return the first matching group
return next(g for g in mobj.groups() if g is not None)
else:
return mobj.group(group)
elif default is not object():
return default
elif fatal:
logger.fatal("[-] Unable to extract %s" % _name)
exit(0)
else:
logger.fatal("[-] unable to extract %s" % _name)
exit(0)
class UdemyAuth(object): class UdemyAuth(object):
def __init__(self, username="", password="", cache_session=False): def __init__(self, username="", password="", cache_session=False):
self.username = username self.username = username
@ -1245,19 +1146,6 @@ def durationtoseconds(period):
return None return None
def cleanup(path):
"""
@author Jayapraveen
"""
leftover_files = glob.glob(path + "/*.mp4", recursive=True)
for file_list in leftover_files:
try:
os.remove(file_list)
except OSError:
logger.exception(f"Error deleting file: {file_list}")
os.removedirs(path)
def mux_process(video_title, video_filepath, audio_filepath, output_path): def mux_process(video_title, video_filepath, audio_filepath, output_path):
""" """
@author Jayapraveen @author Jayapraveen
@ -1312,90 +1200,15 @@ def decrypt(kid, in_filepath, out_filepath):
return ret_code return ret_code
def handle_segments(url, format_id, video_title, output_path, lecture_file_name, chapter_dir): def handle_segments(url, format_id, lecture_id, video_title, output_path, chapter_dir):
os.chdir(os.path.join(chapter_dir)) os.chdir(os.path.join(chapter_dir))
# for french language among others, this characters cause problems with shaka-packager resulting in decryption failure
# https://github.com/Puyodead1/udemy-downloader/issues/137
# Thank to cutecat !
lecture_file_name = (
lecture_file_name.replace("é", "e")
.replace("è", "e")
.replace("à", "a")
.replace("À", "A")
.replace("à", "a")
.replace("Á", "A")
.replace("á", "a")
.replace("Â", "a")
.replace("â", "a")
.replace("Ã", "A")
.replace("ã", "a")
.replace("Ä", "A")
.replace("ä", "a")
.replace("Å", "A")
.replace("å", "a")
.replace("Æ", "AE")
.replace("æ", "ae")
.replace("Ç", "C")
.replace("ç", "c")
.replace("Ð", "D")
.replace("ð", "o")
.replace("È", "E")
.replace("è", "e")
.replace("É", "e")
.replace("Ê", "e")
.replace("ê", "e")
.replace("Ë", "E")
.replace("ë", "e")
.replace("Ì", "I")
.replace("ì", "i")
.replace("Í", "I")
.replace("í", "I")
.replace("Î", "I")
.replace("î", "i")
.replace("Ï", "I")
.replace("ï", "i")
.replace("Ñ", "N")
.replace("ñ", "n")
.replace("Ò", "O")
.replace("ò", "o")
.replace("Ó", "O")
.replace("ó", "o")
.replace("Ô", "O")
.replace("ô", "o")
.replace("Õ", "O")
.replace("õ", "o")
.replace("Ö", "o")
.replace("ö", "o")
.replace("œ", "oe")
.replace("Œ", "OE")
.replace("Ø", "O")
.replace("ø", "o")
.replace("ß", "B")
.replace("Ù", "U")
.replace("ù", "u")
.replace("Ú", "U")
.replace("ú", "u")
.replace("Û", "U")
.replace("û", "u")
.replace("Ü", "U")
.replace("ü", "u")
.replace("Ý", "Y")
.replace("ý", "y")
.replace("Þ", "P")
.replace("þ", "P")
.replace("Ÿ", "Y")
.replace("ÿ", "y")
.replace("%", "")
# commas cause problems with shaka-packager resulting in decryption failure
.replace(",", "")
.replace("", "-")
.replace(".mp4", "")
)
video_filepath_enc = lecture_file_name + ".encrypted.mp4" video_filepath_enc = lecture_id + ".encrypted.mp4"
audio_filepath_enc = lecture_file_name + ".encrypted.m4a" audio_filepath_enc = lecture_id + ".encrypted.m4a"
video_filepath_dec = lecture_file_name + ".decrypted.mp4" video_filepath_dec = lecture_id + ".decrypted.mp4"
audio_filepath_dec = lecture_file_name + ".decrypted.m4a" audio_filepath_dec = lecture_id + ".decrypted.m4a"
temp_output_path = os.path.join(chapter_dir, lecture_id + ".mp4")
logger.info("> Downloading Lecture Tracks...") logger.info("> Downloading Lecture Tracks...")
args = [ args = [
"yt-dlp", "yt-dlp",
@ -1406,18 +1219,17 @@ def handle_segments(url, format_id, video_title, output_path, lecture_file_name,
f"{concurrent_downloads}", f"{concurrent_downloads}",
"--downloader", "--downloader",
"aria2c", "aria2c",
"--downloader-args",
'aria2c:"--disable-ipv6"',
"--fixup", "--fixup",
"never", "never",
"-k", "-k",
"-o", "-o",
f"{lecture_file_name}.encrypted.%(ext)s", f"{lecture_id}.encrypted.%(ext)s",
"-f", "-f",
format_id, format_id,
f"{url}", f"{url}",
] ]
if disable_ipv6:
args.append("--downloader-args")
args.append('aria2c:"--disable-ipv6"')
process = subprocess.Popen(args) process = subprocess.Popen(args)
log_subprocess_output("YTDLP-STDOUT", process.stdout) log_subprocess_output("YTDLP-STDOUT", process.stdout)
log_subprocess_output("YTDLP-STDERR", process.stderr) log_subprocess_output("YTDLP-STDERR", process.stderr)
@ -1456,11 +1268,13 @@ def handle_segments(url, format_id, video_title, output_path, lecture_file_name,
return return
logger.info("> Decryption complete") logger.info("> Decryption complete")
logger.info("> Merging video and audio, this might take a minute...") logger.info("> Merging video and audio, this might take a minute...")
mux_process(video_title, video_filepath_dec, audio_filepath_dec, output_path) mux_process(video_title, video_filepath_dec, audio_filepath_dec, temp_output_path)
if ret_code != 0: if ret_code != 0:
logger.error("> Return code from ffmpeg was non-0 (error), skipping!") logger.error("> Return code from ffmpeg was non-0 (error), skipping!")
return return
logger.info("> Merging complete, removing temporary files...") logger.info("> Merging complete, renaming final file...")
os.rename(temp_output_path, output_path)
logger.info("> Cleaning up temporary files...")
os.remove(video_filepath_enc) os.remove(video_filepath_enc)
os.remove(audio_filepath_enc) os.remove(audio_filepath_enc)
os.remove(video_filepath_dec) os.remove(video_filepath_dec)
@ -1538,9 +1352,7 @@ def download_aria(url, file_dir, filename):
""" """
@author Puyodead1 @author Puyodead1
""" """
args = ["aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16", "-c", "--auto-file-renaming=false", "--summary-interval=0"] args = ["aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16", "-c", "--auto-file-renaming=false", "--summary-interval=0", "--disable-ipv6"]
if disable_ipv6:
args.append("--disable-ipv6")
process = subprocess.Popen(args) process = subprocess.Popen(args)
log_subprocess_output("ARIA2-STDOUT", process.stdout) log_subprocess_output("ARIA2-STDOUT", process.stdout)
log_subprocess_output("ARIA2-STDERR", process.stderr) log_subprocess_output("ARIA2-STDERR", process.stderr)
@ -1580,7 +1392,8 @@ def process_caption(caption, lecture_title, lecture_dir, tries=0):
logger.exception(f" > Error converting caption") logger.exception(f" > Error converting caption")
def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir): def process_lecture(lecture, lecture_path, chapter_dir):
lecture_id = lecture.get("id")
lecture_title = lecture.get("lecture_title") lecture_title = lecture.get("lecture_title")
is_encrypted = lecture.get("is_encrypted") is_encrypted = lecture.get("is_encrypted")
lecture_sources = lecture.get("video_sources") lecture_sources = lecture.get("video_sources")
@ -1590,10 +1403,10 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
source = lecture_sources[-1] # last index is the best quality source = lecture_sources[-1] # last index is the best quality
if isinstance(quality, int): if isinstance(quality, int):
source = min(lecture_sources, key=lambda x: abs(int(x.get("height")) - quality)) source = min(lecture_sources, key=lambda x: abs(int(x.get("height")) - quality))
logger.info(f" > Lecture '%s' has DRM, attempting to download" % lecture_title) logger.info(f" > Lecture '{lecture_title}' has DRM, attempting to download")
handle_segments(source.get("download_url"), source.get("format_id"), lecture_title, lecture_path, lecture_file_name, chapter_dir) handle_segments(source.get("download_url"), source.get("format_id"), str(lecture_id), lecture_title, lecture_path, chapter_dir)
else: else:
logger.info(f" > Lecture '%s' is missing media links" % lecture_title) logger.info(f" > Lecture '{lecture_title}' is missing media links")
logger.debug(f"Lecture source count: {len(lecture_sources)}") logger.debug(f"Lecture source count: {len(lecture_sources)}")
else: else:
sources = lecture.get("sources") sources = lecture.get("sources")
@ -1618,13 +1431,12 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
f"{concurrent_downloads}", f"{concurrent_downloads}",
"--downloader", "--downloader",
"aria2c", "aria2c",
"--downloader-args",
'aria2c:"--disable-ipv6"',
"-o", "-o",
f"{temp_filepath}", f"{temp_filepath}",
f"{url}", f"{url}",
] ]
if disable_ipv6:
cmd.append("--downloader-args")
cmd.append('aria2c:"--disable-ipv6"')
process = subprocess.Popen(cmd) process = subprocess.Popen(cmd)
log_subprocess_output("YTDLP-STDOUT", process.stdout) log_subprocess_output("YTDLP-STDOUT", process.stdout)
log_subprocess_output("YTDLP-STDERR", process.stderr) log_subprocess_output("YTDLP-STDERR", process.stderr)
@ -1771,7 +1583,7 @@ def parse_new(udemy: Udemy, udemy_object: dict):
except Exception: except Exception:
logger.exception(" > Failed to write html file") logger.exception(" > Failed to write html file")
else: else:
process_lecture(parsed_lecture, lecture_path, lecture_file_name, chapter_dir) process_lecture(parsed_lecture, lecture_path, chapter_dir)
# download subtitles for this lecture # download subtitles for this lecture
subtitles = parsed_lecture.get("subtitles") subtitles = parsed_lecture.get("subtitles")