From e5450b6f85df12de393d8b452fa3658a2070c72e Mon Sep 17 00:00:00 2001 From: Puyodead1 Date: Sun, 2 Jul 2023 17:49:04 -0400 Subject: [PATCH 1/5] Cookie extraction - Removed cloudscraper - Added cookie extraction from browser --- README.md | 177 +++++++++++++++++++++++++---------------------- _version.py | 2 +- main.py | 139 ++++++++++++++++++------------------- requirements.txt | 1 + 4 files changed, 165 insertions(+), 154 deletions(-) diff --git a/README.md b/README.md index 90a11d4..085f411 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,9 @@ # NOTE -- **This tool will not work without decryption keys. Do not bother installing unless you already have keys or can obtain them!** -- **Downloading courses is against Udemy's Terms of Service, I am NOT held responsible for your account getting suspended as a result from the use of this program!** -- This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program. +- **This tool will not work without decryption keys. Do not bother installing unless you already have keys or can obtain them!** +- **Downloading courses is against Udemy's Terms of Service, I am NOT held responsible for your account getting suspended as a result from the use of this program!** +- This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program. # Description @@ -25,10 +25,10 @@ The following are a list of required third-party tools, you will need to ensure _**Note**:_ _These are seperate requirements that are not installed with the pip command! You will need to download and install these manually!_ -- [ffmpeg](https://www.ffmpeg.org/) - This tool is also available in Linux package repositories -- [aria2/aria2c](https://github.com/aria2/aria2/) - This tool is also available in Linux package repositories -- [shaka-packager](https://github.com/shaka-project/shaka-packager/releases/latest) -- [yt-dlp](https://github.com/yt-dlp/yt-dlp/) - This tool is also available in Linux package repositories, but can also be installed using pip if desired (`pip install yt-dlp`) +- [ffmpeg](https://www.ffmpeg.org/) - This tool is also available in Linux package repositories +- [aria2/aria2c](https://github.com/aria2/aria2/) - This tool is also available in Linux package repositories +- [shaka-packager](https://github.com/shaka-project/shaka-packager/releases/latest) +- [yt-dlp](https://github.com/yt-dlp/yt-dlp/) - This tool is also available in Linux package repositories, but can also be installed using pip if desired (`pip install yt-dlp`) # Usage @@ -36,44 +36,56 @@ _quick and dirty how-to_ You will need to get a few things before you can use this program: -- Decryption Key ID -- Decryption Key -- Udemy Course URL -- Udemy Bearer Token (aka acccess token for udemy-dl users) -- Udemy cookies (only required for subscription plans - see [Udemy Subscription Plans](#udemy-subscription-plans)) +- Decryption Key ID +- Decryption Key +- Udemy Course URL +- Udemy Bearer Token (aka acccess token for udemy-dl users) +- Udemy cookies (only required for subscription plans - see [Udemy Subscription Plans](#udemy-subscription-plans)) ## Setting up -- rename `.env.sample` to `.env` _(you only need to do this if you plan to use the .env file to store your bearer token)_ -- rename `keyfile.example.json` to `keyfile.json` +- rename `.env.sample` to `.env` _(you only need to do this if you plan to use the .env file to store your bearer token)_ +- rename `keyfile.example.json` to `keyfile.json` ## Acquire Bearer Token -- Firefox: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-491903900) -- Chrome: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-492569372) -- If you want to use the .env file to store your Bearer Token, edit the .env and add your token. +- Firefox: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-491903900) +- Chrome: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-492569372) +- If you want to use the .env file to store your Bearer Token, edit the .env and add your token. ## Key ID and Key It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help acquiring these, decrypting DRM protected content can be considered piracy. The tool required for this has already been discused in a GitHub issue. -- Enter the key and key id in the `keyfile.json` -- ![keyfile example](https://i.imgur.com/e5aU0ng.png) -- ![example key and kid from console](https://i.imgur.com/awgndZA.png) +- Enter the key and key id in the `keyfile.json` +- ![keyfile example](https://i.imgur.com/e5aU0ng.png) +- ![example key and kid from console](https://i.imgur.com/awgndZA.png) -## Start Downloading +## Cookies + +To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens. + +The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are: + +- chrome +- firefox +- opera +- edge +- brave +- chromium +- vivaldi +- safari + +## Ready to go You can now run the program, see the examples below. The course will download to `out_dir`. -# Udemy Subscription Plans - -You will need to use a different branch of the program, please see [feat/cookies](https://github.com/Puyodead1/udemy-downloader/tree/feat/cookies). - # Advanced Usage ``` -usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls] - [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v] +usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--download-quizzes] + [--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}] + [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v] Udemy Downloader @@ -92,6 +104,7 @@ options: --skip-lectures If specified, lectures won't be downloaded --download-assets If specified, lecture assets will be downloaded --download-captions If specified, captions will be downloaded + --download-quizzes If specified, quizzes will be downloaded --keep-vtt If specified, .vtt files won't be removed --skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures) --info If specified, only course information will be printed, nothing will be downloaded @@ -104,6 +117,8 @@ options: time) --log-level LOG_LEVEL Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO) + --browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari} + The browser to extract cookies from --use-h265 If specified, videos will be encoded with the H.265 codec --h265-crf H265_CRF Set a custom CRF value for H.265 encoding. FFMPEG default is 28 --h265-preset H265_PRESET @@ -112,55 +127,55 @@ options: -v, --version show program's version number and exit ``` -- Passing a Bearer Token and Course ID as an argument - - `python main.py -c -b ` - - `python main.py -c https://www.udemy.com/courses/myawesomecourse -b ` -- Download a specific quality - - `python main.py -c -q 720` -- Download assets along with lectures - - `python main.py -c --download-assets` -- Download assets and specify a quality - - `python main.py -c -q 360 --download-assets` -- Download captions (Defaults to English) - - `python main.py -c --download-captions` -- Download captions with specific language - - `python main.py -c --download-captions -l en` - English subtitles - - `python main.py -c --download-captions -l es` - Spanish subtitles - - `python main.py -c --download-captions -l it` - Italian subtitles - - `python main.py -c --download-captions -l pl` - Polish Subtitles - - `python main.py -c --download-captions -l all` - Downloads all subtitles - - etc -- Skip downloading lecture videos - - `python main.py -c --skip-lectures --download-captions` - Downloads only captions - - `python main.py -c --skip-lectures --download-assets` - Downloads only assets -- Keep .VTT caption files: - - `python main.py -c --download-captions --keep-vtt` -- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures): - - `python main.py -c --skip-hls` -- Print course information only: - - `python main.py -c --info` -- Specify max number of concurrent downloads: - - `python main.py -c --concurrent-downloads 20` - - `python main.py -c -cd 20` -- Cache course information: - - `python main.py -c --save-to-file` -- Load course cache: - - `python main.py -c --load-from-file` -- Change logging level: - - `python main.py -c --log-level DEBUG` - - `python main.py -c --log-level WARNING` - - `python main.py -c --log-level INFO` - - `python main.py -c --log-level CRITICAL` -- Use course ID as the course name: - - `python main.py -c --id-as-course-name` -- Encode in H.265: - - `python main.py -c --use-h265` -- Encode in H.265 with custom CRF: - - `python main.py -c --use-h265 -h265-crf 20` -- Encode in H.265 with custom preset: - - `python main.py -c --use-h265 --h265-preset faster` -- Encode in H.265 using NVIDIA hardware transcoding: - - `python main.py -c --use-h265 --use-nvenc` +- Passing a Bearer Token and Course ID as an argument + - `python main.py -c -b ` + - `python main.py -c https://www.udemy.com/courses/myawesomecourse -b ` +- Download a specific quality + - `python main.py -c -q 720` +- Download assets along with lectures + - `python main.py -c --download-assets` +- Download assets and specify a quality + - `python main.py -c -q 360 --download-assets` +- Download captions (Defaults to English) + - `python main.py -c --download-captions` +- Download captions with specific language + - `python main.py -c --download-captions -l en` - English subtitles + - `python main.py -c --download-captions -l es` - Spanish subtitles + - `python main.py -c --download-captions -l it` - Italian subtitles + - `python main.py -c --download-captions -l pl` - Polish Subtitles + - `python main.py -c --download-captions -l all` - Downloads all subtitles + - etc +- Skip downloading lecture videos + - `python main.py -c --skip-lectures --download-captions` - Downloads only captions + - `python main.py -c --skip-lectures --download-assets` - Downloads only assets +- Keep .VTT caption files: + - `python main.py -c --download-captions --keep-vtt` +- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures): + - `python main.py -c --skip-hls` +- Print course information only: + - `python main.py -c --info` +- Specify max number of concurrent downloads: + - `python main.py -c --concurrent-downloads 20` + - `python main.py -c -cd 20` +- Cache course information: + - `python main.py -c --save-to-file` +- Load course cache: + - `python main.py -c --load-from-file` +- Change logging level: + - `python main.py -c --log-level DEBUG` + - `python main.py -c --log-level WARNING` + - `python main.py -c --log-level INFO` + - `python main.py -c --log-level CRITICAL` +- Use course ID as the course name: + - `python main.py -c --id-as-course-name` +- Encode in H.265: + - `python main.py -c --use-h265` +- Encode in H.265 with custom CRF: + - `python main.py -c --use-h265 -h265-crf 20` +- Encode in H.265 with custom preset: + - `python main.py -c --use-h265 --h265-preset faster` +- Encode in H.265 using NVIDIA hardware transcoding: + - `python main.py -c --use-h265 --use-nvenc` If you encounter errors while downloading such as @@ -178,11 +193,11 @@ if you want help using the program, join my [Discord](https://discord.gg/tMzrSxQ # Credits -- https://github.com/Jayapraveen/Drm-Dash-stream-downloader - For the original code which this is based on -- https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction -- https://github.com/alastairmccormack/pymp4parse - For code related to mp4 box parsing (used by pywvpssh) -- https://github.com/lbrayner/vtt-to-srt - For code related to converting subtitles from vtt to srt format -- https://github.com/r0oth3x49/udemy-dl - For some of the informaton related to using the udemy api +- https://github.com/Jayapraveen/Drm-Dash-stream-downloader - For the original code which this is based on +- https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction +- https://github.com/alastairmccormack/pymp4parse - For code related to mp4 box parsing (used by pywvpssh) +- https://github.com/lbrayner/vtt-to-srt - For code related to converting subtitles from vtt to srt format +- https://github.com/r0oth3x49/udemy-dl - For some of the informaton related to using the udemy api ## License diff --git a/_version.py b/_version.py index bff3210..dda6764 100644 --- a/_version.py +++ b/_version.py @@ -1 +1 @@ -__version__ = "1.2.10" +__version__ = "1.2.10-cookies" diff --git a/main.py b/main.py index 50784c8..50060ef 100644 --- a/main.py +++ b/main.py @@ -12,6 +12,7 @@ from html.parser import HTMLParser as compat_HTMLParser from pathlib import Path from typing import IO +import browser_cookie3 import m3u8 import requests import yt_dlp @@ -29,7 +30,6 @@ from utils import extract_kid from vtt_to_srt import convert retry = 3 -cookies = "" downloader = None logger: logging.Logger = None dl_assets = False @@ -51,11 +51,12 @@ course_url = None info = None keys = {} id_as_course_name = False -is_subscription_course = False use_h265 = False h265_crf = 28 h265_preset = "medium" use_nvenc = False +browser = None +cj = None # from https://stackoverflow.com/a/21978778/9785713 @@ -68,7 +69,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]): # this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist def pre_run(): - global cookies, dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc + global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser # make sure the directory exists if not os.path.exists(DOWNLOAD_DIR): @@ -162,13 +163,6 @@ def pre_run(): action="store_true", help="If specified, the course id will be used in place of the course name for the output directory. This is a 'hack' to reduce the path length", ) - parser.add_argument( - "-sc", - "--subscription-course", - dest="is_subscription_course", - action="store_true", - help="Mark the course as a subscription based course, use this if you are having problems with the program auto detecting it", - ) parser.add_argument( "--save-to-file", dest="save_to_file", @@ -187,6 +181,12 @@ def pre_run(): type=str, help="Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)", ) + parser.add_argument( + "--browser", + dest="browser", + help="The browser to extract cookies from", + choices=["chrome", "firefox", "opera", "edge", "brave", "chromium", "vivaldi", "safari"], + ) parser.add_argument( "--use-h265", dest="use_h265", @@ -302,8 +302,8 @@ def pre_run(): if args.id_as_course_name: id_as_course_name = args.id_as_course_name - if args.is_subscription_course: - is_subscription_course = args.is_subscription_course + if args.browser: + browser = args.browser Path(DOWNLOAD_DIR).mkdir(parents=True, exist_ok=True) Path(SAVED_DIR).mkdir(parents=True, exist_ok=True) @@ -315,32 +315,40 @@ def pre_run(): else: logger.warning("> Keyfile not found! You won't be able to decrypt videos!") - # Read cookies from file - if os.path.exists(COOKIE_FILE_PATH): - with open(COOKIE_FILE_PATH, encoding="utf8", mode="r") as cookiefile: - cookies = cookiefile.read() - cookies = cookies.rstrip() - else: - logger.warning( - "No cookies.txt file was found, you won't be able to download subscription courses! You can ignore ignore this if you don't plan to download a course included in a subscription plan." - ) - class Udemy: def __init__(self, bearer_token): + global cj + self.session = None self.bearer_token = None self.auth = UdemyAuth(cache_session=False) if not self.session: - self.session, self.bearer_token = self.auth.authenticate(bearer_token=bearer_token) + self.session = self.auth.authenticate(bearer_token=bearer_token) - if self.session and self.bearer_token: - self.session._headers.update({"Authorization": "Bearer {}".format(self.bearer_token)}) - self.session._headers.update({"X-Udemy-Authorization": "Bearer {}".format(self.bearer_token)}) - logger.info("Login Success") - else: - logger.fatal("Login Failure! You are probably missing an access token!") - sys.exit(1) + if not self.session: + if browser == None: + logger.error("No bearer token was provided, and no browser for cookie extraction was specified.") + sys.exit(1) + + logger.warning("No bearer token was provided, attempting to use browser cookies.") + + self.session = self.auth._session + + if browser == "chrome": + cj = browser_cookie3.chrome() + elif browser == "firefox": + cj = browser_cookie3.firefox() + elif browser == "opera": + cj = browser_cookie3.opera() + elif browser == "edge": + cj = browser_cookie3.edge() + elif browser == "brave": + cj = browser_cookie3.brave() + elif browser == "chromium": + cj = browser_cookie3.chromium() + elif browser == "vivaldi": + cj = browser_cookie3.vivaldi() def _get_quiz(self, quiz_id): print(portal_name) @@ -540,14 +548,15 @@ class Udemy: for pl in playlists: resolution = pl.stream_info.resolution codecs = pl.stream_info.codecs - + if not resolution: continue if not codecs: continue width, height = resolution - - if height in seen: continue + + if height in seen: + continue # we need to save the individual playlists to disk also playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8") @@ -868,27 +877,8 @@ class Udemy: def _extract_course_info(self, url): global portal_name - portal_name, course_name = self.extract_course_name(url) - course = { - "portal_name": portal_name - } - - if not is_subscription_course: - results = self._subscribed_courses(portal_name=portal_name, course_name=course_name) - course = self._extract_course(response=results, course_name=course_name) - if not course: - results = self._my_courses(portal_name=portal_name) - course = self._extract_course(response=results, course_name=course_name) - if not course: - results = self._subscribed_collection_courses(portal_name=portal_name) - course = self._extract_course(response=results, course_name=course_name) - if not course: - results = self._archived_courses(portal_name=portal_name) - course = self._extract_course(response=results, course_name=course_name) - - if not course or is_subscription_course: - course_id = self._extract_subscription_course_info(url) - course = self._extract_course_info_json(url, course_id) + course_id, portal_name = self._extract_subscription_course_info(url) + course = self._extract_course_info_json(url, course_id, portal_name) if course: return course.get("id"), course @@ -898,11 +888,11 @@ class Udemy: "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.", ) logger.info( - "Trying to logout now...", + "Terminating Session...", ) self.session.terminate() logger.info( - "Logged out successfully.", + "Session terminated.", ) sys.exit(1) @@ -1009,6 +999,7 @@ class Udemy: return lecture + class Session(object): def __init__(self): self._headers = HEADERS @@ -1023,11 +1014,10 @@ class Session(object): def _set_auth_headers(self, bearer_token=""): self._headers["Authorization"] = "Bearer {}".format(bearer_token) self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token) - self._headers["Cookie"] = cookies def _get(self, url): for i in range(10): - session = self._session.get(url, headers=self._headers) + session = self._session.get(url, headers=self._headers, cookies=cj) if session.ok or session.status_code in [502, 503]: return session if not session.ok: @@ -1036,7 +1026,7 @@ class Session(object): time.sleep(0.8) def _post(self, url, data, redirect=True): - session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect) + session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect, cookies=cj) if session.ok: return session if not session.ok: @@ -1140,14 +1130,12 @@ class UdemyAuth(object): self._cache = cache_session self._session = Session() - def authenticate(self, bearer_token=""): + def authenticate(self, bearer_token=None): if bearer_token: self._session._set_auth_headers(bearer_token=bearer_token) - self._session._session.cookies.update({"bearer_token": bearer_token}) - return self._session, bearer_token + return self._session else: - self._session._set_auth_headers() - return None, None + return None def durationtoseconds(period): @@ -1197,9 +1185,7 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path): transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path ) else: - command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format( - video_filepath, audio_filepath, video_title, output_path - ) + command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path) else: if use_h265: command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format( @@ -1538,7 +1524,18 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir): source_type = source.get("type") if source_type == "hls": temp_filepath = lecture_path.replace(".mp4", ".%(ext)s") - cmd = ["yt-dlp", "--enable-file-urls", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"] + cmd = [ + "yt-dlp", + "--enable-file-urls", + "--force-generic-extractor", + "--concurrent-fragments", + f"{concurrent_downloads}", + "--downloader", + "aria2c", + "-o", + f"{temp_filepath}", + f"{url}", + ] if disable_ipv6: cmd.append("--downloader-args") cmd.append('aria2c:"--disable-ipv6"') @@ -1574,7 +1571,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir): logger.error(" > Missing sources for lecture", lecture) - def process_quiz(udemy: Udemy, lecture, chapter_dir): lecture_title = lecture.get("lecture_title") lecture_index = lecture.get("lecture_index") @@ -1594,7 +1590,6 @@ def process_quiz(udemy: Udemy, lecture, chapter_dir): f.write(html) - def parse_new(udemy: Udemy, udemy_object: dict): total_chapters = udemy_object.get("total_chapters") total_lectures = udemy_object.get("total_lectures") @@ -1851,9 +1846,9 @@ def main(): counter = -1 if resource: - logger.info("> Trying to logout") + logger.info("> Terminating Session...") udemy.session.terminate() - logger.info("> Logged out.") + logger.info("> Session Terminated.") if course: logger.info("> Processing course data, this may take a minute. ") diff --git a/requirements.txt b/requirements.txt index 4ed8200..fe47813 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ lxml six pathvalidate coloredlogs +browser_cookie3 \ No newline at end of file From fdf8cde414d3e1b98f024840e5a447f339571499 Mon Sep 17 00:00:00 2001 From: Puyodead1 Date: Mon, 7 Aug 2023 00:00:05 -0400 Subject: [PATCH 2/5] bug fix --- main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 50060ef..5532c3b 100644 --- a/main.py +++ b/main.py @@ -877,8 +877,11 @@ class Udemy: def _extract_course_info(self, url): global portal_name - course_id, portal_name = self._extract_subscription_course_info(url) - course = self._extract_course_info_json(url, course_id, portal_name) + portal_name, course_name = self.extract_course_name(url) + course = {"portal_name": portal_name} + + course_id = self._extract_subscription_course_info(url) + course = self._extract_course_info_json(url, course_id) if course: return course.get("id"), course From b5741b2373b7ddac3a68b77ba084ed3d4e6a66ae Mon Sep 17 00:00:00 2001 From: Puyodead1 Date: Wed, 9 Aug 2023 00:44:35 -0400 Subject: [PATCH 3/5] bug fix? --- main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 5532c3b..cdf7ed6 100644 --- a/main.py +++ b/main.py @@ -1861,7 +1861,6 @@ def main(): if clazz == "chapter": lecture_counter = 0 - lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) @@ -1870,6 +1869,7 @@ def main(): counter += 1 elif clazz == "lecture": lecture_counter += 1 + lectures = [] lecture_id = entry.get("id") if len(udemy_object["chapters"]) == 0: # dummy chapters to handle lectures without chapters @@ -1891,6 +1891,7 @@ def main(): udemy_object["chapters"][counter]["lecture_count"] = len(lectures) elif clazz == "quiz": lecture_counter += 1 + lectures = [] lecture_id = entry.get("id") if len(udemy_object["chapters"]) == 0: # dummy chapters to handle lectures without chapters From f9634168d4068c83572798d74c8f43f1755a1eb2 Mon Sep 17 00:00:00 2001 From: Puyodead1 Date: Wed, 9 Aug 2023 00:49:53 -0400 Subject: [PATCH 4/5] Revert "bug fix?" This reverts commit 0ab68cb95f97f05c5ef62309c5775ab4280efd58. --- main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/main.py b/main.py index cdf7ed6..5532c3b 100644 --- a/main.py +++ b/main.py @@ -1861,6 +1861,7 @@ def main(): if clazz == "chapter": lecture_counter = 0 + lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) @@ -1869,7 +1870,6 @@ def main(): counter += 1 elif clazz == "lecture": lecture_counter += 1 - lectures = [] lecture_id = entry.get("id") if len(udemy_object["chapters"]) == 0: # dummy chapters to handle lectures without chapters @@ -1891,7 +1891,6 @@ def main(): udemy_object["chapters"][counter]["lecture_count"] = len(lectures) elif clazz == "quiz": lecture_counter += 1 - lectures = [] lecture_id = entry.get("id") if len(udemy_object["chapters"]) == 0: # dummy chapters to handle lectures without chapters From 43f6085e917ac21f32728a49f89369965cb3f00b Mon Sep 17 00:00:00 2001 From: Puyodead1 Date: Wed, 9 Aug 2023 15:58:13 -0400 Subject: [PATCH 5/5] some bug fixes --- main.py | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index 5532c3b..46ed6cf 100644 --- a/main.py +++ b/main.py @@ -51,6 +51,7 @@ course_url = None info = None keys = {} id_as_course_name = False +is_subscription_course = False use_h265 = False h265_crf = 28 h265_preset = "medium" @@ -163,6 +164,13 @@ def pre_run(): action="store_true", help="If specified, the course id will be used in place of the course name for the output directory. This is a 'hack' to reduce the path length", ) + parser.add_argument( + "-sc", + "--subscription-course", + dest="is_subscription_course", + action="store_true", + help="Mark the course as a subscription based course, use this if you are having problems with the program auto detecting it", + ) parser.add_argument( "--save-to-file", dest="save_to_file", @@ -302,6 +310,8 @@ def pre_run(): if args.id_as_course_name: id_as_course_name = args.id_as_course_name + if args.is_subscription_course: + is_subscription_course = args.is_subscription_course if args.browser: browser = args.browser @@ -880,8 +890,22 @@ class Udemy: portal_name, course_name = self.extract_course_name(url) course = {"portal_name": portal_name} - course_id = self._extract_subscription_course_info(url) - course = self._extract_course_info_json(url, course_id) + if not is_subscription_course: + results = self._subscribed_courses(portal_name=portal_name, course_name=course_name) + course = self._extract_course(response=results, course_name=course_name) + if not course: + results = self._my_courses(portal_name=portal_name) + course = self._extract_course(response=results, course_name=course_name) + if not course: + results = self._subscribed_collection_courses(portal_name=portal_name) + course = self._extract_course(response=results, course_name=course_name) + if not course: + results = self._archived_courses(portal_name=portal_name) + course = self._extract_course(response=results, course_name=course_name) + + if not course or is_subscription_course: + course_id = self._extract_subscription_course_info(url) + course = self._extract_course_info_json(url, course_id) if course: return course.get("id"), course @@ -1846,7 +1870,7 @@ def main(): udemy_object["title"] = title udemy_object["course_title"] = course_title udemy_object["chapters"] = [] - counter = -1 + chapter_index_counter = -1 if resource: logger.info("> Terminating Session...") @@ -1856,18 +1880,27 @@ def main(): if course: logger.info("> Processing course data, this may take a minute. ") lecture_counter = 0 + lectures = [] + for entry in course: clazz = entry.get("_class") if clazz == "chapter": + # add all lectures for the previous chapter + if len(lectures) > 0: + udemy_object["chapters"][chapter_index_counter]["lectures"] = lectures + udemy_object["chapters"][chapter_index_counter]["lecture_count"] = len(lectures) + + # reset lecture tracking lecture_counter = 0 lectures = [] + chapter_index = entry.get("object_index") chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) if chapter_title not in udemy_object["chapters"]: udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": []}) - counter += 1 + chapter_index_counter += 1 elif clazz == "lecture": lecture_counter += 1 lecture_id = entry.get("id") @@ -1887,8 +1920,8 @@ def main(): lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title")) lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry}) - udemy_object["chapters"][counter]["lectures"] = lectures - udemy_object["chapters"][counter]["lecture_count"] = len(lectures) + else: + logger.debug("Lecture: ID is None, skipping") elif clazz == "quiz": lecture_counter += 1 lecture_id = entry.get("id") @@ -1908,9 +1941,8 @@ def main(): lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title")) lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry}) - - udemy_object["chapters"][counter]["lectures"] = lectures - udemy_object["chapters"][counter]["lectures_count"] = len(lectures) + else: + logger.debug("Quiz: ID is None, skipping") udemy_object["total_chapters"] = len(udemy_object["chapters"]) udemy_object["total_lectures"] = sum([entry.get("lecture_count", 0) for entry in udemy_object["chapters"] if entry])