From ef9d2a6be37773e4ca4a42a368988a6287f444d4 Mon Sep 17 00:00:00 2001 From: Puyodead1 Date: Fri, 27 Oct 2023 09:57:07 -0400 Subject: [PATCH] some refactoring --- README.md | 13 +-- keyfile.example.json | 2 +- main.py | 236 +++++-------------------------------------- 3 files changed, 26 insertions(+), 225 deletions(-) diff --git a/README.md b/README.md index 569f2e3..19bd284 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ You can now run the program, see the examples below. The course will download to # Advanced Usage ``` -usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] +usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--skip-lectures] [--download-assets] [--download-captions] [--download-quizzes] [--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [--out OUT] [--continue-lecture-numbers] @@ -105,7 +105,6 @@ options: -l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en') -cd CONCURRENT_DOWNLOADS, --concurrent-downloads CONCURRENT_DOWNLOADS The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30) - --disable-ipv6 If specified, ipv6 will be disabled in aria2 --skip-lectures If specified, lectures won't be downloaded --download-assets If specified, lecture assets will be downloaded --download-captions If specified, captions will be downloaded @@ -187,16 +186,6 @@ options: - `python main.py -c --continue-lecture-numbers` - `python main.py -c -n` -If you encounter errors while downloading such as - -`errorCode=1 Network problem has occurred. cause:Unknown socket error 10051 (0x2743)` - -or - -`errorCode=1 Network problem has occurred. cause:A socket operation was attempted to an unreachable network.` - -Then try disabling ipv6 in aria2 using the `--disable-ipv6` option - # Support if you want help using the program, join my [Discord](https://discord.gg/tMzrSxQ) server or use [GitHub Issues](https://github.com/Puyodead1/udemy-downloader/issues) diff --git a/keyfile.example.json b/keyfile.example.json index 6500cf9..8a7a18f 100644 --- a/keyfile.example.json +++ b/keyfile.example.json @@ -1,3 +1,3 @@ { - "KeyID": "key" + "the key id goes here": "the key goes here" } diff --git a/main.py b/main.py index 7c24c93..284a85f 100644 --- a/main.py +++ b/main.py @@ -45,7 +45,6 @@ course_name = None keep_vtt = False skip_hls = False concurrent_downloads = 10 -disable_ipv6 = False save_to_file = None load_from_file = None course_url = None @@ -72,7 +71,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]): # this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist def pre_run(): - global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser, is_subscription_course, DOWNLOAD_DIR, use_continuous_lecture_numbers + global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser, is_subscription_course, DOWNLOAD_DIR, use_continuous_lecture_numbers # make sure the logs directory exists if not os.path.exists(LOG_DIR_PATH): @@ -108,12 +107,6 @@ def pre_run(): type=int, help="The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30)", ) - parser.add_argument( - "--disable-ipv6", - dest="disable_ipv6", - action="store_true", - help="If specified, ipv6 will be disabled in aria2", - ) parser.add_argument( "--skip-lectures", dest="skip_lectures", @@ -259,8 +252,6 @@ def pre_run(): elif concurrent_downloads > 30: # if the user gave a number thats greater than 30, set cc to the max of 30 concurrent_downloads = 30 - if args.disable_ipv6: - disable_ipv6 = args.disable_ipv6 if args.load_from_file: load_from_file = args.load_from_file if args.save_to_file: @@ -665,10 +656,6 @@ class Udemy: format_id = results.get("format_id") best_audio_format_id = format_id.split("+")[1] - # I forget what this was for - # best_audio = next((x for x in formats - # if x.get("format_id") == best_audio_format_id), - # None) for f in formats: if "video" in f.get("format_note"): # is a video stream @@ -1122,92 +1109,6 @@ class Session(object): return -# Thanks to a great open source utility youtube-dl .. -class HTMLAttributeParser(compat_HTMLParser): # pylint: disable=W - """Trivial HTML parser to gather the attributes for a single element""" - - def __init__(self): - self.attrs = {} - compat_HTMLParser.__init__(self) - - def handle_starttag(self, tag, attrs): - self.attrs = dict(attrs) - - -def extract_attributes(html_element): - """Given a string for an HTML element such as - - Decode and return a dictionary of attributes. - { - 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz', - 'empty': '', 'noval': None, 'entity': '&', - 'sq': '"', 'dq': '\'' - }. - NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, - but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. - """ - parser = HTMLAttributeParser() - try: - parser.feed(html_element) - parser.close() - except Exception: # pylint: disable=W - pass - return parser.attrs - - -def hidden_inputs(html): - html = re.sub(r"", "", html) - hidden_inputs = {} # pylint: disable=W - for entry in re.findall(r"(?i)(]+>)", html): - attrs = extract_attributes(entry) - if not entry: - continue - if attrs.get("type") not in ("hidden", "submit"): - continue - name = attrs.get("name") or attrs.get("id") - value = attrs.get("value") - if name and value is not None: - hidden_inputs[name] = value - return hidden_inputs - - -def search_regex(pattern, string, name, default=object(), fatal=True, flags=0, group=None): - """ - Perform a regex search on the given string, using a single or a list of - patterns returning the first matching group. - In case of failure return a default value or raise a WARNING or a - RegexNotFoundError, depending on fatal, specifying the field name. - """ - if isinstance(pattern, str): - mobj = re.search(pattern, string, flags) - else: - for p in pattern: - mobj = re.search(p, string, flags) - if mobj: - break - - _name = name - - if mobj: - if group is None: - # return the first matching group - return next(g for g in mobj.groups() if g is not None) - else: - return mobj.group(group) - elif default is not object(): - return default - elif fatal: - logger.fatal("[-] Unable to extract %s" % _name) - exit(0) - else: - logger.fatal("[-] unable to extract %s" % _name) - exit(0) - - class UdemyAuth(object): def __init__(self, username="", password="", cache_session=False): self.username = username @@ -1245,19 +1146,6 @@ def durationtoseconds(period): return None -def cleanup(path): - """ - @author Jayapraveen - """ - leftover_files = glob.glob(path + "/*.mp4", recursive=True) - for file_list in leftover_files: - try: - os.remove(file_list) - except OSError: - logger.exception(f"Error deleting file: {file_list}") - os.removedirs(path) - - def mux_process(video_title, video_filepath, audio_filepath, output_path): """ @author Jayapraveen @@ -1312,90 +1200,15 @@ def decrypt(kid, in_filepath, out_filepath): return ret_code -def handle_segments(url, format_id, video_title, output_path, lecture_file_name, chapter_dir): +def handle_segments(url, format_id, lecture_id, video_title, output_path, chapter_dir): os.chdir(os.path.join(chapter_dir)) - # for french language among others, this characters cause problems with shaka-packager resulting in decryption failure - # https://github.com/Puyodead1/udemy-downloader/issues/137 - # Thank to cutecat ! - lecture_file_name = ( - lecture_file_name.replace("é", "e") - .replace("è", "e") - .replace("à", "a") - .replace("À", "A") - .replace("à", "a") - .replace("Á", "A") - .replace("á", "a") - .replace("Â", "a") - .replace("â", "a") - .replace("Ã", "A") - .replace("ã", "a") - .replace("Ä", "A") - .replace("ä", "a") - .replace("Å", "A") - .replace("å", "a") - .replace("Æ", "AE") - .replace("æ", "ae") - .replace("Ç", "C") - .replace("ç", "c") - .replace("Ð", "D") - .replace("ð", "o") - .replace("È", "E") - .replace("è", "e") - .replace("É", "e") - .replace("Ê", "e") - .replace("ê", "e") - .replace("Ë", "E") - .replace("ë", "e") - .replace("Ì", "I") - .replace("ì", "i") - .replace("Í", "I") - .replace("í", "I") - .replace("Î", "I") - .replace("î", "i") - .replace("Ï", "I") - .replace("ï", "i") - .replace("Ñ", "N") - .replace("ñ", "n") - .replace("Ò", "O") - .replace("ò", "o") - .replace("Ó", "O") - .replace("ó", "o") - .replace("Ô", "O") - .replace("ô", "o") - .replace("Õ", "O") - .replace("õ", "o") - .replace("Ö", "o") - .replace("ö", "o") - .replace("œ", "oe") - .replace("Œ", "OE") - .replace("Ø", "O") - .replace("ø", "o") - .replace("ß", "B") - .replace("Ù", "U") - .replace("ù", "u") - .replace("Ú", "U") - .replace("ú", "u") - .replace("Û", "U") - .replace("û", "u") - .replace("Ü", "U") - .replace("ü", "u") - .replace("Ý", "Y") - .replace("ý", "y") - .replace("Þ", "P") - .replace("þ", "P") - .replace("Ÿ", "Y") - .replace("ÿ", "y") - .replace("%", "") - # commas cause problems with shaka-packager resulting in decryption failure - .replace(",", "") - .replace("–", "-") - .replace(".mp4", "") - ) + + video_filepath_enc = lecture_id + ".encrypted.mp4" + audio_filepath_enc = lecture_id + ".encrypted.m4a" + video_filepath_dec = lecture_id + ".decrypted.mp4" + audio_filepath_dec = lecture_id + ".decrypted.m4a" + temp_output_path = os.path.join(chapter_dir, lecture_id + ".mp4") - video_filepath_enc = lecture_file_name + ".encrypted.mp4" - audio_filepath_enc = lecture_file_name + ".encrypted.m4a" - video_filepath_dec = lecture_file_name + ".decrypted.mp4" - audio_filepath_dec = lecture_file_name + ".decrypted.m4a" logger.info("> Downloading Lecture Tracks...") args = [ "yt-dlp", @@ -1406,18 +1219,17 @@ def handle_segments(url, format_id, video_title, output_path, lecture_file_name, f"{concurrent_downloads}", "--downloader", "aria2c", + "--downloader-args", + 'aria2c:"--disable-ipv6"', "--fixup", "never", "-k", "-o", - f"{lecture_file_name}.encrypted.%(ext)s", + f"{lecture_id}.encrypted.%(ext)s", "-f", format_id, f"{url}", ] - if disable_ipv6: - args.append("--downloader-args") - args.append('aria2c:"--disable-ipv6"') process = subprocess.Popen(args) log_subprocess_output("YTDLP-STDOUT", process.stdout) log_subprocess_output("YTDLP-STDERR", process.stderr) @@ -1456,11 +1268,13 @@ def handle_segments(url, format_id, video_title, output_path, lecture_file_name, return logger.info("> Decryption complete") logger.info("> Merging video and audio, this might take a minute...") - mux_process(video_title, video_filepath_dec, audio_filepath_dec, output_path) + mux_process(video_title, video_filepath_dec, audio_filepath_dec, temp_output_path) if ret_code != 0: logger.error("> Return code from ffmpeg was non-0 (error), skipping!") return - logger.info("> Merging complete, removing temporary files...") + logger.info("> Merging complete, renaming final file...") + os.rename(temp_output_path, output_path) + logger.info("> Cleaning up temporary files...") os.remove(video_filepath_enc) os.remove(audio_filepath_enc) os.remove(video_filepath_dec) @@ -1538,9 +1352,7 @@ def download_aria(url, file_dir, filename): """ @author Puyodead1 """ - args = ["aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16", "-c", "--auto-file-renaming=false", "--summary-interval=0"] - if disable_ipv6: - args.append("--disable-ipv6") + args = ["aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16", "-c", "--auto-file-renaming=false", "--summary-interval=0", "--disable-ipv6"] process = subprocess.Popen(args) log_subprocess_output("ARIA2-STDOUT", process.stdout) log_subprocess_output("ARIA2-STDERR", process.stderr) @@ -1580,7 +1392,8 @@ def process_caption(caption, lecture_title, lecture_dir, tries=0): logger.exception(f" > Error converting caption") -def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir): +def process_lecture(lecture, lecture_path, chapter_dir): + lecture_id = lecture.get("id") lecture_title = lecture.get("lecture_title") is_encrypted = lecture.get("is_encrypted") lecture_sources = lecture.get("video_sources") @@ -1590,10 +1403,10 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir): source = lecture_sources[-1] # last index is the best quality if isinstance(quality, int): source = min(lecture_sources, key=lambda x: abs(int(x.get("height")) - quality)) - logger.info(f" > Lecture '%s' has DRM, attempting to download" % lecture_title) - handle_segments(source.get("download_url"), source.get("format_id"), lecture_title, lecture_path, lecture_file_name, chapter_dir) + logger.info(f" > Lecture '{lecture_title}' has DRM, attempting to download") + handle_segments(source.get("download_url"), source.get("format_id"), str(lecture_id), lecture_title, lecture_path, chapter_dir) else: - logger.info(f" > Lecture '%s' is missing media links" % lecture_title) + logger.info(f" > Lecture '{lecture_title}' is missing media links") logger.debug(f"Lecture source count: {len(lecture_sources)}") else: sources = lecture.get("sources") @@ -1618,13 +1431,12 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir): f"{concurrent_downloads}", "--downloader", "aria2c", + "--downloader-args", + 'aria2c:"--disable-ipv6"', "-o", f"{temp_filepath}", f"{url}", ] - if disable_ipv6: - cmd.append("--downloader-args") - cmd.append('aria2c:"--disable-ipv6"') process = subprocess.Popen(cmd) log_subprocess_output("YTDLP-STDOUT", process.stdout) log_subprocess_output("YTDLP-STDERR", process.stderr) @@ -1771,7 +1583,7 @@ def parse_new(udemy: Udemy, udemy_object: dict): except Exception: logger.exception(" > Failed to write html file") else: - process_lecture(parsed_lecture, lecture_path, lecture_file_name, chapter_dir) + process_lecture(parsed_lecture, lecture_path, chapter_dir) # download subtitles for this lecture subtitles = parsed_lecture.get("subtitles")