fix large courses

2025-04-29 19:34:25 +02:00 · 2024-02-21 23:10:49 -05:00 · 2024-02-21 23:10:49 -05:00 · 1b1b5d81bd
commit 1b1b5d81bd
parent db7b0490e6
3 changed files with 68 additions and 56 deletions
--- a/README.md
+++ b/README.md
@ -20,11 +20,24 @@
 Utility script to download Udemy courses, has support for DRM videos but requires the user to acquire the decryption key (for legal reasons).<br>
 Windows is the primary development OS, but I've made an effort to support Linux also (Mac untested).

+> [!CAUTION]
+> The ability to download captions automatically is currently broken due to changes in Udemy's API!
+
+> [!IMPORTANT]  
+> This tool will not work on encrypted courses without decryption keys being provided!
+>
+> Downloading courses is against Udemy's Terms of Service, I am NOT held responsible for your account getting suspended as a result from the use of this program!
+>
+> This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program.
+
 # Requirements

 The following are a list of required third-party tools, you will need to ensure they are in your systems path and that typing their name in a terminal invokes them.

-_**Note**:_ _These are seperate requirements that are not installed with the pip command! You will need to download and install these manually!_
+> [!NOTE]  
+> These are seperate requirements that are not installed with the pip command!
+>
+> You will need to download and install these manually!

 -   [Python 3](https://python.org/)
 -   [ffmpeg](https://www.ffmpeg.org/) - This tool is also available in Linux package repositories.
@ -35,8 +48,6 @@ _**Note**:_ _These are seperate requirements that are not installed with the pip

 # Usage

-_quick and dirty how-to_
-
 You will need to get a few things before you can use this program:

 -   Decryption Key ID
@ -58,7 +69,10 @@ You will need to get a few things before you can use this program:

 ## Key ID and Key

-It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help acquiring these, decrypting DRM protected content can be considered piracy. The tool required for this has already been discused in a GitHub issue.
+> [!IMPORTANT]  
+> For courses that are encrypted, It is up to you to acquire the decryption keys.
+>
+> Please **DO NOT** ask me for help acquiring these!

 -   Enter the key and key id in the `keyfile.json`
 -   ![keyfile example](https://i.imgur.com/e5aU0ng.png)
@ -66,18 +80,21 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help

 ## Cookies

+> [!TIP]
+> Cookies are not required for individually purchased courses.
+
 To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.

 The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:

-   chrome
-   firefox
-   opera
-   edge
-   brave
-   chromium
-   vivaldi
-   safari
+-   `chrome`
+-   `firefox`
+-   `opera`
+-   `edge`
+-   `brave`
+-   `chromium`
+-   `vivaldi`
+-   `safari`

 ## Ready to go

--- a/constants.py
+++ b/constants.py
@ -11,7 +11,8 @@ HEADERS = {
 }
 LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc"
 LOGOUT_URL = "https://www.udemy.com/user/logout"
-COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
+# COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
+COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/?page_size=100&fields[asset]=asset_type,length,media_license_token,course_is_drmed,media_sources,thumbnail_sprite,slides,slide_urls,filename,download_urls,external_url&fields[chapter]=object_index,title&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&caching_intent=True"
 COURSE_INFO_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/"
 COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}"
 SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12"
@ -24,8 +25,7 @@ SAVED_DIR = os.path.join(os.getcwd(), "saved")
 KEY_FILE_PATH = os.path.join(os.getcwd(), "keyfile.json")
 COOKIE_FILE_PATH = os.path.join(os.getcwd(), "cookies.txt")
 LOG_DIR_PATH = os.path.join(os.getcwd(), "logs")
-LOG_FILE_PATH = os.path.join(
-    os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
-LOG_FORMAT = '[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s'
-LOG_DATE_FORMAT = '%I:%M:%S'
+LOG_FILE_PATH = os.path.join(os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
+LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s"
+LOG_DATE_FORMAT = "%I:%M:%S"
 LOG_LEVEL = logging.INFO
--- a/main.py
+++ b/main.py
@ -3,6 +3,7 @@ import argparse
 import glob
 import json
 import logging
+import math
 import os
 import re
 import subprocess
@ -337,7 +338,7 @@ def pre_run():
        with open(KEY_FILE_PATH, encoding="utf8", mode="r") as keyfile:
            keys = json.loads(keyfile.read())
    else:
-        logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
+        logger.warning("> Keyfile not found! You won't be able to decrypt any encrypted videos!")


 class Udemy:
@ -387,7 +388,7 @@ class Udemy:
        try:
            resp = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"[-] Udemy Says: Connection error, {error}")
+            logger.fatal(f"[-] Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -794,11 +795,11 @@ class Udemy:
            webpage = webpage.decode("utf8", "ignore")
            webpage = json.loads(webpage)
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error} on {url}")
+            logger.fatal(f"{error} on {url}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -811,7 +812,7 @@ class Udemy:
        try:
            resp = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -820,39 +821,23 @@ class Udemy:
    def _extract_course_json(self, url, course_id, portal_name):
        self.session._headers.update({"Referer": url})
        url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
-        try:
-            resp = self.session._get(url)
-            if resp.status_code in [502, 503, 504]:
-                logger.info("> The course content is large, using large content extractor...")
-                resp = self._extract_large_course_content(url=url)
-            else:
-                resp = resp.json()
-        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
-            time.sleep(0.8)
-            sys.exit(1)
-        except (ValueError, Exception):
-            resp = self._extract_large_course_content(url=url)
-            return resp
-        else:
-            return resp
-
-    def _extract_large_course_content(self, url):
-        url = url.replace("10000", "50") if url.endswith("10000") else url
+        page = 1
        try:
            data = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
            _next = data.get("next")
+            _count = data.get("count")
+            est_page_count = math.ceil(_count / 100)  # 100 is the max results per page
            while _next:
-                logger.info("> Downloading course information.. ")
+                logger.info(f"> Downloading course information.. (Page {page + 1}/{est_page_count})")
                try:
                    resp = self.session._get(_next).json()
                except conn_error as error:
-                    logger.fatal(f"Udemy Says: Connection error, {error}")
+                    logger.fatal(f"Connection error: {error}")
                    time.sleep(0.8)
                    sys.exit(1)
                else:
@ -861,6 +846,7 @@ class Udemy:
                    if results and isinstance(results, list):
                        for d in resp["results"]:
                            data["results"].append(d)
+                        page = page + 1
            return data

    def _extract_course(self, response, course_name):
@ -880,11 +866,11 @@ class Udemy:
            url = MY_COURSES_URL.format(portal_name=portal_name)
            webpage = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -897,11 +883,11 @@ class Udemy:
        try:
            webpage = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -917,11 +903,11 @@ class Udemy:
            url = f"{url}&is_archived=true"
            webpage = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -934,11 +920,11 @@ class Udemy:
            url = MY_COURSES_URL.format(portal_name=portal_name)
            webpage = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -951,11 +937,11 @@ class Udemy:
        try:
            webpage = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -971,11 +957,11 @@ class Udemy:
            url = f"{url}&is_archived=true"
            webpage = self.session._get(url).json()
        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
            time.sleep(0.8)
            sys.exit(1)
        except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
            time.sleep(0.8)
            sys.exit(1)
        else:
@ -1792,6 +1778,15 @@ def _print_course_info(udemy: Udemy, udemy_object: dict):
    chapter_count = udemy_object.get("total_chapters")
    lecture_count = udemy_object.get("total_lectures")

+    if lecture_count > 100:
+        logger.warning(
+            "This course has a lot of lectures! Fetching all the information can take a long time as well as spams Udemy's servers. It is NOT recommended to continue! Are you sure you want to do this?"
+        )
+        yn = input("(y/n): ")
+        if yn.lower() != "y":
+            logger.info("Probably wise. Please remove the --info argument and try again.")
+            sys.exit(0)
+
    logger.info("> Course: {}".format(course_title))
    logger.info("> Total Chapters: {}".format(chapter_count))
    logger.info("> Total Lectures: {}".format(lecture_count))