Merge pull request #177 from Puyodead1/feat/cookies

Merge feat/cookies
2025-04-30 00:34:24 +02:00 · 2023-08-12 23:56:15 -04:00 · 2023-08-12 23:56:15 -04:00 · 84eb17b793
commit 84eb17b793
parent e9b9d8a6a4 43f6085e91
4 changed files with 179 additions and 133 deletions
--- a/README.md
+++ b/README.md
@ -61,19 +61,31 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
 -   ![keyfile example](https://i.imgur.com/e5aU0ng.png)
 -   ![example key and kid from console](https://i.imgur.com/awgndZA.png)

-## Start Downloading
+## Cookies
+
+To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
+
+The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
+
+-   chrome
+-   firefox
+-   opera
+-   edge
+-   brave
+-   chromium
+-   vivaldi
+-   safari
+
+## Ready to go

 You can now run the program, see the examples below. The course will download to `out_dir`.

-# Udemy Subscription Plans
-
-You will need to use a different branch of the program, please see [feat/cookies](https://github.com/Puyodead1/udemy-downloader/tree/feat/cookies).
-
 # Advanced Usage

 ```
-usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls]
-               [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
+usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--download-quizzes]
+               [--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}]
+               [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]

 Udemy Downloader

@ -92,6 +104,7 @@ options:
  --skip-lectures       If specified, lectures won't be downloaded
  --download-assets     If specified, lecture assets will be downloaded
  --download-captions   If specified, captions will be downloaded
+  --download-quizzes    If specified, quizzes will be downloaded
  --keep-vtt            If specified, .vtt files won't be removed
  --skip-hls            If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)
  --info                If specified, only course information will be printed, nothing will be downloaded
@ -104,6 +117,8 @@ options:
                        time)
  --log-level LOG_LEVEL
                        Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)
+  --browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}
+                        The browser to extract cookies from
  --use-h265            If specified, videos will be encoded with the H.265 codec
  --h265-crf H265_CRF   Set a custom CRF value for H.265 encoding. FFMPEG default is 28
  --h265-preset H265_PRESET
--- a/_version.py
+++ b/_version.py
@ -1 +1 @@
-__version__ = "1.2.10"
+__version__ = "1.2.10-cookies"
--- a/main.py
+++ b/main.py
@ -12,6 +12,7 @@ from html.parser import HTMLParser as compat_HTMLParser
 from pathlib import Path
 from typing import IO

+import browser_cookie3
 import m3u8
 import requests
 import yt_dlp
@ -29,7 +30,6 @@ from utils import extract_kid
 from vtt_to_srt import convert

 retry = 3
-cookies = ""
 downloader = None
 logger: logging.Logger = None
 dl_assets = False
@ -56,6 +56,8 @@ use_h265 = False
 h265_crf = 28
 h265_preset = "medium"
 use_nvenc = False
+browser = None
+cj = None


 # from https://stackoverflow.com/a/21978778/9785713
@ -68,7 +70,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]):

 # this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist
 def pre_run():
-    global cookies, dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc
+    global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser

    # make sure the directory exists
    if not os.path.exists(DOWNLOAD_DIR):
@ -187,6 +189,12 @@ def pre_run():
        type=str,
        help="Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)",
    )
+    parser.add_argument(
+        "--browser",
+        dest="browser",
+        help="The browser to extract cookies from",
+        choices=["chrome", "firefox", "opera", "edge", "brave", "chromium", "vivaldi", "safari"],
+    )
    parser.add_argument(
        "--use-h265",
        dest="use_h265",
@ -304,6 +312,8 @@ def pre_run():
        id_as_course_name = args.id_as_course_name
    if args.is_subscription_course:
        is_subscription_course = args.is_subscription_course
+    if args.browser:
+        browser = args.browser

    Path(DOWNLOAD_DIR).mkdir(parents=True, exist_ok=True)
    Path(SAVED_DIR).mkdir(parents=True, exist_ok=True)
@ -315,33 +325,41 @@ def pre_run():
    else:
        logger.warning("> Keyfile not found! You won't be able to decrypt videos!")

-    # Read cookies from file
-    if os.path.exists(COOKIE_FILE_PATH):
-        with open(COOKIE_FILE_PATH, encoding="utf8", mode="r") as cookiefile:
-            cookies = cookiefile.read()
-            cookies = cookies.rstrip()
-    else:
-        logger.warning(
-            "No cookies.txt file was found, you won't be able to download subscription courses! You can ignore ignore this if you don't plan to download a course included in a subscription plan."
-        )
-

 class Udemy:
    def __init__(self, bearer_token):
+        global cj
+
        self.session = None
        self.bearer_token = None
        self.auth = UdemyAuth(cache_session=False)
        if not self.session:
-            self.session, self.bearer_token = self.auth.authenticate(bearer_token=bearer_token)
+            self.session = self.auth.authenticate(bearer_token=bearer_token)

-        if self.session and self.bearer_token:
-            self.session._headers.update({"Authorization": "Bearer {}".format(self.bearer_token)})
-            self.session._headers.update({"X-Udemy-Authorization": "Bearer {}".format(self.bearer_token)})
-            logger.info("Login Success")
-        else:
-            logger.fatal("Login Failure! You are probably missing an access token!")
+        if not self.session:
+            if browser == None:
+                logger.error("No bearer token was provided, and no browser for cookie extraction was specified.")
                sys.exit(1)

+            logger.warning("No bearer token was provided, attempting to use browser cookies.")
+
+            self.session = self.auth._session
+
+            if browser == "chrome":
+                cj = browser_cookie3.chrome()
+            elif browser == "firefox":
+                cj = browser_cookie3.firefox()
+            elif browser == "opera":
+                cj = browser_cookie3.opera()
+            elif browser == "edge":
+                cj = browser_cookie3.edge()
+            elif browser == "brave":
+                cj = browser_cookie3.brave()
+            elif browser == "chromium":
+                cj = browser_cookie3.chromium()
+            elif browser == "vivaldi":
+                cj = browser_cookie3.vivaldi()
+
    def _get_quiz(self, quiz_id):
        print(portal_name)
        self.session._headers.update(
@ -547,7 +565,8 @@ class Udemy:
                    continue
                width, height = resolution

-                if height in seen: continue
+                if height in seen:
+                    continue

                # we need to save the individual playlists to disk also
                playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8")
@ -869,9 +888,7 @@ class Udemy:
    def _extract_course_info(self, url):
        global portal_name
        portal_name, course_name = self.extract_course_name(url)
-        course = {
-            "portal_name": portal_name
-        }
+        course = {"portal_name": portal_name}

        if not is_subscription_course:
            results = self._subscribed_courses(portal_name=portal_name, course_name=course_name)
@ -898,11 +915,11 @@ class Udemy:
                "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.",
            )
            logger.info(
-                "Trying to logout now...",
+                "Terminating Session...",
            )
            self.session.terminate()
            logger.info(
-                "Logged out successfully.",
+                "Session terminated.",
            )
            sys.exit(1)

@ -1009,6 +1026,7 @@ class Udemy:

        return lecture

+
 class Session(object):
    def __init__(self):
        self._headers = HEADERS
@ -1023,11 +1041,10 @@ class Session(object):
    def _set_auth_headers(self, bearer_token=""):
        self._headers["Authorization"] = "Bearer {}".format(bearer_token)
        self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token)
-        self._headers["Cookie"] = cookies

    def _get(self, url):
        for i in range(10):
-            session = self._session.get(url, headers=self._headers)
+            session = self._session.get(url, headers=self._headers, cookies=cj)
            if session.ok or session.status_code in [502, 503]:
                return session
            if not session.ok:
@ -1036,7 +1053,7 @@ class Session(object):
                time.sleep(0.8)

    def _post(self, url, data, redirect=True):
-        session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect)
+        session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect, cookies=cj)
        if session.ok:
            return session
        if not session.ok:
@ -1140,14 +1157,12 @@ class UdemyAuth(object):
        self._cache = cache_session
        self._session = Session()

-    def authenticate(self, bearer_token=""):
+    def authenticate(self, bearer_token=None):
        if bearer_token:
            self._session._set_auth_headers(bearer_token=bearer_token)
-            self._session._session.cookies.update({"bearer_token": bearer_token})
-            return self._session, bearer_token
+            return self._session
        else:
-            self._session._set_auth_headers()
-            return None, None
+            return None


 def durationtoseconds(period):
@ -1197,9 +1212,7 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path):
                transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path
            )
        else:
-            command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
-                video_filepath, audio_filepath, video_title, output_path
-            )
+            command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path)
    else:
        if use_h265:
            command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
@ -1538,7 +1551,18 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
                    source_type = source.get("type")
                    if source_type == "hls":
                        temp_filepath = lecture_path.replace(".mp4", ".%(ext)s")
-                        cmd = ["yt-dlp",  "--enable-file-urls", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"]
+                        cmd = [
+                            "yt-dlp",
+                            "--enable-file-urls",
+                            "--force-generic-extractor",
+                            "--concurrent-fragments",
+                            f"{concurrent_downloads}",
+                            "--downloader",
+                            "aria2c",
+                            "-o",
+                            f"{temp_filepath}",
+                            f"{url}",
+                        ]
                        if disable_ipv6:
                            cmd.append("--downloader-args")
                            cmd.append('aria2c:"--disable-ipv6"')
@ -1574,7 +1598,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
            logger.error("      > Missing sources for lecture", lecture)


-
 def process_quiz(udemy: Udemy, lecture, chapter_dir):
    lecture_title = lecture.get("lecture_title")
    lecture_index = lecture.get("lecture_index")
@ -1594,7 +1617,6 @@ def process_quiz(udemy: Udemy, lecture, chapter_dir):
            f.write(html)


-
 def parse_new(udemy: Udemy, udemy_object: dict):
    total_chapters = udemy_object.get("total_chapters")
    total_lectures = udemy_object.get("total_lectures")
@ -1848,28 +1870,37 @@ def main():
        udemy_object["title"] = title
        udemy_object["course_title"] = course_title
        udemy_object["chapters"] = []
-        counter = -1
+        chapter_index_counter = -1

        if resource:
-            logger.info("> Trying to logout")
+            logger.info("> Terminating Session...")
            udemy.session.terminate()
-            logger.info("> Logged out.")
+            logger.info("> Session Terminated.")

        if course:
            logger.info("> Processing course data, this may take a minute. ")
            lecture_counter = 0
+            lectures = []
+            
            for entry in course:
                clazz = entry.get("_class")

                if clazz == "chapter":
+                    # add all lectures for the previous chapter
+                    if len(lectures) > 0:
+                        udemy_object["chapters"][chapter_index_counter]["lectures"] = lectures
+                        udemy_object["chapters"][chapter_index_counter]["lecture_count"] = len(lectures)
+
+                    # reset lecture tracking
                    lecture_counter = 0
                    lectures = []
+
                    chapter_index = entry.get("object_index")
                    chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))

                    if chapter_title not in udemy_object["chapters"]:
                        udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": []})
-                        counter += 1
+                        chapter_index_counter += 1
                elif clazz == "lecture":
                    lecture_counter += 1
                    lecture_id = entry.get("id")
@ -1889,8 +1920,8 @@ def main():
                        lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))

                        lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
-                    udemy_object["chapters"][counter]["lectures"] = lectures
-                    udemy_object["chapters"][counter]["lecture_count"] = len(lectures)
+                    else:
+                        logger.debug("Lecture: ID is None, skipping")
                elif clazz == "quiz":
                    lecture_counter += 1
                    lecture_id = entry.get("id")
@ -1910,9 +1941,8 @@ def main():
                        lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))

                        lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
-
-                    udemy_object["chapters"][counter]["lectures"] = lectures
-                    udemy_object["chapters"][counter]["lectures_count"] = len(lectures)
+                    else:
+                        logger.debug("Quiz: ID is None, skipping")

            udemy_object["total_chapters"] = len(udemy_object["chapters"])
            udemy_object["total_lectures"] = sum([entry.get("lecture_count", 0) for entry in udemy_object["chapters"] if entry])
--- a/requirements.txt
+++ b/requirements.txt
@ -15,3 +15,4 @@ lxml
 six
 pathvalidate
 coloredlogs
+browser_cookie3