Cookie extraction

- Removed cloudscraper - Added cookie extraction from browser
2025-05-04 01:34:25 +02:00 · 2023-07-02 17:49:04 -04:00 · 2023-07-02 17:49:04 -04:00 · e5450b6f85
commit e5450b6f85
parent e9b9d8a6a4
4 changed files with 165 additions and 154 deletions
--- a/README.md
+++ b/README.md
@ -61,19 +61,31 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
 -   ![keyfile example](https://i.imgur.com/e5aU0ng.png)
 -   ![example key and kid from console](https://i.imgur.com/awgndZA.png)
-## Start Downloading
+## Cookies
 To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
 The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
 -   chrome
 -   firefox
 -   opera
 -   edge
 -   brave
 -   chromium
 -   vivaldi
 -   safari
 ## Ready to go
 You can now run the program, see the examples below. The course will download to `out_dir`.
 # Udemy Subscription Plans
 You will need to use a different branch of the program, please see [feat/cookies](https://github.com/Puyodead1/udemy-downloader/tree/feat/cookies).
 # Advanced Usage
 ```
-usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls]
+usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--download-quizzes]
-               [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
+               [--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}]
               [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
 Udemy Downloader
@ -92,6 +104,7 @@ options:
  --skip-lectures       If specified, lectures won't be downloaded
  --download-assets     If specified, lecture assets will be downloaded
  --download-captions   If specified, captions will be downloaded
  --download-quizzes    If specified, quizzes will be downloaded
  --keep-vtt            If specified, .vtt files won't be removed
  --skip-hls            If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)
  --info                If specified, only course information will be printed, nothing will be downloaded
@ -104,6 +117,8 @@ options:
                        time)
  --log-level LOG_LEVEL
                        Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)
  --browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}
                        The browser to extract cookies from
  --use-h265            If specified, videos will be encoded with the H.265 codec
  --h265-crf H265_CRF   Set a custom CRF value for H.265 encoding. FFMPEG default is 28
  --h265-preset H265_PRESET
--- a/_version.py
+++ b/_version.py
@ -1 +1 @@
-__version__ = "1.2.10"
+__version__ = "1.2.10-cookies"
--- a/main.py
+++ b/main.py
@ -12,6 +12,7 @@ from html.parser import HTMLParser as compat_HTMLParser
 from pathlib import Path
 from typing import IO
 import browser_cookie3
 import m3u8
 import requests
 import yt_dlp
@ -29,7 +30,6 @@ from utils import extract_kid
 from vtt_to_srt import convert
 retry = 3
 cookies = ""
 downloader = None
 logger: logging.Logger = None
 dl_assets = False
@ -51,11 +51,12 @@ course_url = None
 info = None
 keys = {}
 id_as_course_name = False
 is_subscription_course = False
 use_h265 = False
 h265_crf = 28
 h265_preset = "medium"
 use_nvenc = False
 browser = None
 cj = None
 # from https://stackoverflow.com/a/21978778/9785713
@ -68,7 +69,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]):
 # this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist
 def pre_run():
-    global cookies, dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc
+    global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser
    # make sure the directory exists
    if not os.path.exists(DOWNLOAD_DIR):
@ -162,13 +163,6 @@ def pre_run():
        action="store_true",
        help="If specified, the course id will be used in place of the course name for the output directory. This is a 'hack' to reduce the path length",
    )
    parser.add_argument(
        "-sc",
        "--subscription-course",
        dest="is_subscription_course",
        action="store_true",
        help="Mark the course as a subscription based course, use this if you are having problems with the program auto detecting it",
    )
    parser.add_argument(
        "--save-to-file",
        dest="save_to_file",
@ -187,6 +181,12 @@ def pre_run():
        type=str,
        help="Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)",
    )
    parser.add_argument(
        "--browser",
        dest="browser",
        help="The browser to extract cookies from",
        choices=["chrome", "firefox", "opera", "edge", "brave", "chromium", "vivaldi", "safari"],
    )
    parser.add_argument(
        "--use-h265",
        dest="use_h265",
@ -302,8 +302,8 @@ def pre_run():
    if args.id_as_course_name:
        id_as_course_name = args.id_as_course_name
-    if args.is_subscription_course:
+    if args.browser:
-        is_subscription_course = args.is_subscription_course
+        browser = args.browser
    Path(DOWNLOAD_DIR).mkdir(parents=True, exist_ok=True)
    Path(SAVED_DIR).mkdir(parents=True, exist_ok=True)
@ -315,33 +315,41 @@ def pre_run():
    else:
        logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
    # Read cookies from file
    if os.path.exists(COOKIE_FILE_PATH):
        with open(COOKIE_FILE_PATH, encoding="utf8", mode="r") as cookiefile:
            cookies = cookiefile.read()
            cookies = cookies.rstrip()
    else:
        logger.warning(
            "No cookies.txt file was found, you won't be able to download subscription courses! You can ignore ignore this if you don't plan to download a course included in a subscription plan."
        )
 class Udemy:
    def __init__(self, bearer_token):
        global cj
        self.session = None
        self.bearer_token = None
        self.auth = UdemyAuth(cache_session=False)
        if not self.session:
-            self.session, self.bearer_token = self.auth.authenticate(bearer_token=bearer_token)
+            self.session = self.auth.authenticate(bearer_token=bearer_token)
-        if self.session and self.bearer_token:
+        if not self.session:
-            self.session._headers.update({"Authorization": "Bearer {}".format(self.bearer_token)})
+            if browser == None:
-            self.session._headers.update({"X-Udemy-Authorization": "Bearer {}".format(self.bearer_token)})
+                logger.error("No bearer token was provided, and no browser for cookie extraction was specified.")
            logger.info("Login Success")
        else:
            logger.fatal("Login Failure! You are probably missing an access token!")
                sys.exit(1)
            logger.warning("No bearer token was provided, attempting to use browser cookies.")
            self.session = self.auth._session
            if browser == "chrome":
                cj = browser_cookie3.chrome()
            elif browser == "firefox":
                cj = browser_cookie3.firefox()
            elif browser == "opera":
                cj = browser_cookie3.opera()
            elif browser == "edge":
                cj = browser_cookie3.edge()
            elif browser == "brave":
                cj = browser_cookie3.brave()
            elif browser == "chromium":
                cj = browser_cookie3.chromium()
            elif browser == "vivaldi":
                cj = browser_cookie3.vivaldi()
    def _get_quiz(self, quiz_id):
        print(portal_name)
        self.session._headers.update(
@ -547,7 +555,8 @@ class Udemy:
                    continue
                width, height = resolution
-                if height in seen: continue
+                if height in seen:
                    continue
                # we need to save the individual playlists to disk also
                playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8")
@ -868,27 +877,8 @@ class Udemy:
    def _extract_course_info(self, url):
        global portal_name
-        portal_name, course_name = self.extract_course_name(url)
+        course_id, portal_name = self._extract_subscription_course_info(url)
-        course = {
+        course = self._extract_course_info_json(url, course_id, portal_name)
            "portal_name": portal_name
        }
        if not is_subscription_course:
            results = self._subscribed_courses(portal_name=portal_name, course_name=course_name)
            course = self._extract_course(response=results, course_name=course_name)
            if not course:
                results = self._my_courses(portal_name=portal_name)
                course = self._extract_course(response=results, course_name=course_name)
            if not course:
                results = self._subscribed_collection_courses(portal_name=portal_name)
                course = self._extract_course(response=results, course_name=course_name)
            if not course:
                results = self._archived_courses(portal_name=portal_name)
                course = self._extract_course(response=results, course_name=course_name)
        if not course or is_subscription_course:
            course_id = self._extract_subscription_course_info(url)
            course = self._extract_course_info_json(url, course_id)
        if course:
            return course.get("id"), course
@ -898,11 +888,11 @@ class Udemy:
                "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.",
            )
            logger.info(
-                "Trying to logout now...",
+                "Terminating Session...",
            )
            self.session.terminate()
            logger.info(
-                "Logged out successfully.",
+                "Session terminated.",
            )
            sys.exit(1)
@ -1009,6 +999,7 @@ class Udemy:
        return lecture
 class Session(object):
    def __init__(self):
        self._headers = HEADERS
@ -1023,11 +1014,10 @@ class Session(object):
    def _set_auth_headers(self, bearer_token=""):
        self._headers["Authorization"] = "Bearer {}".format(bearer_token)
        self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token)
        self._headers["Cookie"] = cookies
    def _get(self, url):
        for i in range(10):
-            session = self._session.get(url, headers=self._headers)
+            session = self._session.get(url, headers=self._headers, cookies=cj)
            if session.ok or session.status_code in [502, 503]:
                return session
            if not session.ok:
@ -1036,7 +1026,7 @@ class Session(object):
                time.sleep(0.8)
    def _post(self, url, data, redirect=True):
-        session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect)
+        session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect, cookies=cj)
        if session.ok:
            return session
        if not session.ok:
@ -1140,14 +1130,12 @@ class UdemyAuth(object):
        self._cache = cache_session
        self._session = Session()
-    def authenticate(self, bearer_token=""):
+    def authenticate(self, bearer_token=None):
        if bearer_token:
            self._session._set_auth_headers(bearer_token=bearer_token)
-            self._session._session.cookies.update({"bearer_token": bearer_token})
+            return self._session
            return self._session, bearer_token
        else:
-            self._session._set_auth_headers()
+            return None
            return None, None
 def durationtoseconds(period):
@ -1197,9 +1185,7 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path):
                transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path
            )
        else:
-            command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
+            command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path)
                video_filepath, audio_filepath, video_title, output_path
            )
    else:
        if use_h265:
            command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
@ -1538,7 +1524,18 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
                    source_type = source.get("type")
                    if source_type == "hls":
                        temp_filepath = lecture_path.replace(".mp4", ".%(ext)s")
-                        cmd = ["yt-dlp",  "--enable-file-urls", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"]
+                        cmd = [
                            "yt-dlp",
                            "--enable-file-urls",
                            "--force-generic-extractor",
                            "--concurrent-fragments",
                            f"{concurrent_downloads}",
                            "--downloader",
                            "aria2c",
                            "-o",
                            f"{temp_filepath}",
                            f"{url}",
                        ]
                        if disable_ipv6:
                            cmd.append("--downloader-args")
                            cmd.append('aria2c:"--disable-ipv6"')
@ -1574,7 +1571,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
            logger.error("      > Missing sources for lecture", lecture)
 def process_quiz(udemy: Udemy, lecture, chapter_dir):
    lecture_title = lecture.get("lecture_title")
    lecture_index = lecture.get("lecture_index")
@ -1594,7 +1590,6 @@ def process_quiz(udemy: Udemy, lecture, chapter_dir):
            f.write(html)
 def parse_new(udemy: Udemy, udemy_object: dict):
    total_chapters = udemy_object.get("total_chapters")
    total_lectures = udemy_object.get("total_lectures")
@ -1851,9 +1846,9 @@ def main():
        counter = -1
        if resource:
-            logger.info("> Trying to logout")
+            logger.info("> Terminating Session...")
            udemy.session.terminate()
-            logger.info("> Logged out.")
+            logger.info("> Session Terminated.")
        if course:
            logger.info("> Processing course data, this may take a minute. ")
--- a/requirements.txt
+++ b/requirements.txt
@ -15,3 +15,4 @@ lxml
 six
 pathvalidate
 coloredlogs
 browser_cookie3
`@ -1 +1 @@`
	`__version__ = "1.2.10"`	`__version__ = "1.2.10-cookies"`