From 1b1b5d81bd8147e693d147402d2aecd04c85b191 Mon Sep 17 00:00:00 2001
From: Puyodead1 <puyodead@proton.me>
Date: Wed, 21 Feb 2024 23:10:49 -0500
Subject: [PATCH] fix large courses

---
 README.md    | 41 ++++++++++++++++++++---------
 constants.py | 10 +++----
 main.py      | 73 ++++++++++++++++++++++++----------------------------
 3 files changed, 68 insertions(+), 56 deletions(-)
diff --git a/README.md b/README.md
index 19bd284..7990c36 100644
--- a/README.md
+++ b/README.md
@@ -20,11 +20,24 @@
 Utility script to download Udemy courses, has support for DRM videos but requires the user to acquire the decryption key (for legal reasons).<br>
 Windows is the primary development OS, but I've made an effort to support Linux also (Mac untested).
 
+> [!CAUTION]
+> The ability to download captions automatically is currently broken due to changes in Udemy's API!
+
+> [!IMPORTANT]  
+> This tool will not work on encrypted courses without decryption keys being provided!
+>
+> Downloading courses is against Udemy's Terms of Service, I am NOT held responsible for your account getting suspended as a result from the use of this program!
+>
+> This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program.
+
 # Requirements
 
 The following are a list of required third-party tools, you will need to ensure they are in your systems path and that typing their name in a terminal invokes them.
 
-_**Note**:_ _These are seperate requirements that are not installed with the pip command! You will need to download and install these manually!_
+> [!NOTE]  
+> These are seperate requirements that are not installed with the pip command!
+>
+> You will need to download and install these manually!
 
 -   [Python 3](https://python.org/)
 -   [ffmpeg](https://www.ffmpeg.org/) - This tool is also available in Linux package repositories.
@@ -35,8 +48,6 @@ _**Note**:_ _These are seperate requirements that are not installed with the pip
 
 # Usage
 
-_quick and dirty how-to_
-
 You will need to get a few things before you can use this program:
 
 -   Decryption Key ID
@@ -58,7 +69,10 @@ You will need to get a few things before you can use this program:
 
 ## Key ID and Key
 
-It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help acquiring these, decrypting DRM protected content can be considered piracy. The tool required for this has already been discused in a GitHub issue.
+> [!IMPORTANT]  
+> For courses that are encrypted, It is up to you to acquire the decryption keys.
+>
+> Please **DO NOT** ask me for help acquiring these!
 
 -   Enter the key and key id in the `keyfile.json`
 -   ![keyfile example](https://i.imgur.com/e5aU0ng.png)
@@ -66,18 +80,21 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
 
 ## Cookies
 
+> [!TIP]
+> Cookies are not required for individually purchased courses.
+
 To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
 
 The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
 
--   chrome
--   firefox
--   opera
--   edge
--   brave
--   chromium
--   vivaldi
--   safari
+-   `chrome`
+-   `firefox`
+-   `opera`
+-   `edge`
+-   `brave`
+-   `chromium`
+-   `vivaldi`
+-   `safari`
 
 ## Ready to go
 
diff --git a/constants.py b/constants.py
index 7898381..23a05bb 100644
--- a/constants.py
+++ b/constants.py
@@ -11,7 +11,8 @@ HEADERS = {
 }
 LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc"
 LOGOUT_URL = "https://www.udemy.com/user/logout"
-COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
+# COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
+COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/?page_size=100&fields[asset]=asset_type,length,media_license_token,course_is_drmed,media_sources,thumbnail_sprite,slides,slide_urls,filename,download_urls,external_url&fields[chapter]=object_index,title&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&caching_intent=True"
 COURSE_INFO_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/"
 COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}"
 SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12"
@@ -24,8 +25,7 @@ SAVED_DIR = os.path.join(os.getcwd(), "saved")
 KEY_FILE_PATH = os.path.join(os.getcwd(), "keyfile.json")
 COOKIE_FILE_PATH = os.path.join(os.getcwd(), "cookies.txt")
 LOG_DIR_PATH = os.path.join(os.getcwd(), "logs")
-LOG_FILE_PATH = os.path.join(
-    os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
-LOG_FORMAT = '[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s'
-LOG_DATE_FORMAT = '%I:%M:%S'
+LOG_FILE_PATH = os.path.join(os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
+LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s"
+LOG_DATE_FORMAT = "%I:%M:%S"
 LOG_LEVEL = logging.INFO
diff --git a/main.py b/main.py
index 8e0ed6b..2c0760f 100644
--- a/main.py
+++ b/main.py
@@ -3,6 +3,7 @@ import argparse
 import glob
 import json
 import logging
+import math
 import os
 import re
 import subprocess
@@ -337,7 +338,7 @@ def pre_run():
         with open(KEY_FILE_PATH, encoding="utf8", mode="r") as keyfile:
             keys = json.loads(keyfile.read())
     else:
-        logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
+        logger.warning("> Keyfile not found! You won't be able to decrypt any encrypted videos!")
 
 
 class Udemy:
@@ -387,7 +388,7 @@ class Udemy:
         try:
             resp = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"[-] Udemy Says: Connection error, {error}")
+            logger.fatal(f"[-] Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -794,11 +795,11 @@ class Udemy:
             webpage = webpage.decode("utf8", "ignore")
             webpage = json.loads(webpage)
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error} on {url}")
+            logger.fatal(f"{error} on {url}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -811,7 +812,7 @@ class Udemy:
         try:
             resp = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -820,39 +821,23 @@ class Udemy:
     def _extract_course_json(self, url, course_id, portal_name):
         self.session._headers.update({"Referer": url})
         url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
-        try:
-            resp = self.session._get(url)
-            if resp.status_code in [502, 503, 504]:
-                logger.info("> The course content is large, using large content extractor...")
-                resp = self._extract_large_course_content(url=url)
-            else:
-                resp = resp.json()
-        except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
-            time.sleep(0.8)
-            sys.exit(1)
-        except (ValueError, Exception):
-            resp = self._extract_large_course_content(url=url)
-            return resp
-        else:
-            return resp
-
-    def _extract_large_course_content(self, url):
-        url = url.replace("10000", "50") if url.endswith("10000") else url
+        page = 1
         try:
             data = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
             _next = data.get("next")
+            _count = data.get("count")
+            est_page_count = math.ceil(_count / 100)  # 100 is the max results per page
             while _next:
-                logger.info("> Downloading course information.. ")
+                logger.info(f"> Downloading course information.. (Page {page + 1}/{est_page_count})")
                 try:
                     resp = self.session._get(_next).json()
                 except conn_error as error:
-                    logger.fatal(f"Udemy Says: Connection error, {error}")
+                    logger.fatal(f"Connection error: {error}")
                     time.sleep(0.8)
                     sys.exit(1)
                 else:
@@ -861,6 +846,7 @@ class Udemy:
                     if results and isinstance(results, list):
                         for d in resp["results"]:
                             data["results"].append(d)
+                        page = page + 1
             return data
 
     def _extract_course(self, response, course_name):
@@ -880,11 +866,11 @@ class Udemy:
             url = MY_COURSES_URL.format(portal_name=portal_name)
             webpage = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -897,11 +883,11 @@ class Udemy:
         try:
             webpage = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -917,11 +903,11 @@ class Udemy:
             url = f"{url}&is_archived=true"
             webpage = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -934,11 +920,11 @@ class Udemy:
             url = MY_COURSES_URL.format(portal_name=portal_name)
             webpage = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -951,11 +937,11 @@ class Udemy:
         try:
             webpage = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -971,11 +957,11 @@ class Udemy:
             url = f"{url}&is_archived=true"
             webpage = self.session._get(url).json()
         except conn_error as error:
-            logger.fatal(f"Udemy Says: Connection error, {error}")
+            logger.fatal(f"Connection error: {error}")
             time.sleep(0.8)
             sys.exit(1)
         except (ValueError, Exception) as error:
-            logger.fatal(f"Udemy Says: {error}")
+            logger.fatal(f"{error}")
             time.sleep(0.8)
             sys.exit(1)
         else:
@@ -1792,6 +1778,15 @@ def _print_course_info(udemy: Udemy, udemy_object: dict):
     chapter_count = udemy_object.get("total_chapters")
     lecture_count = udemy_object.get("total_lectures")
 
+    if lecture_count > 100:
+        logger.warning(
+            "This course has a lot of lectures! Fetching all the information can take a long time as well as spams Udemy's servers. It is NOT recommended to continue! Are you sure you want to do this?"
+        )
+        yn = input("(y/n): ")
+        if yn.lower() != "y":
+            logger.info("Probably wise. Please remove the --info argument and try again.")
+            sys.exit(0)
+
     logger.info("> Course: {}".format(course_title))
     logger.info("> Total Chapters: {}".format(chapter_count))
     logger.info("> Total Lectures: {}".format(lecture_count))