fix large courses

This commit is contained in:
Puyodead1 2024-02-21 23:10:49 -05:00
parent db7b0490e6
commit 1b1b5d81bd
No known key found for this signature in database
GPG Key ID: A4FA4FEC0DD353FC
3 changed files with 68 additions and 56 deletions

View File

@ -20,11 +20,24 @@
Utility script to download Udemy courses, has support for DRM videos but requires the user to acquire the decryption key (for legal reasons).<br>
Windows is the primary development OS, but I've made an effort to support Linux also (Mac untested).
> [!CAUTION]
> The ability to download captions automatically is currently broken due to changes in Udemy's API!
> [!IMPORTANT]
> This tool will not work on encrypted courses without decryption keys being provided!
>
> Downloading courses is against Udemy's Terms of Service, I am NOT held responsible for your account getting suspended as a result from the use of this program!
>
> This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program.
# Requirements
The following are a list of required third-party tools, you will need to ensure they are in your systems path and that typing their name in a terminal invokes them.
_**Note**:_ _These are seperate requirements that are not installed with the pip command! You will need to download and install these manually!_
> [!NOTE]
> These are seperate requirements that are not installed with the pip command!
>
> You will need to download and install these manually!
- [Python 3](https://python.org/)
- [ffmpeg](https://www.ffmpeg.org/) - This tool is also available in Linux package repositories.
@ -35,8 +48,6 @@ _**Note**:_ _These are seperate requirements that are not installed with the pip
# Usage
_quick and dirty how-to_
You will need to get a few things before you can use this program:
- Decryption Key ID
@ -58,7 +69,10 @@ You will need to get a few things before you can use this program:
## Key ID and Key
It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help acquiring these, decrypting DRM protected content can be considered piracy. The tool required for this has already been discused in a GitHub issue.
> [!IMPORTANT]
> For courses that are encrypted, It is up to you to acquire the decryption keys.
>
> Please **DO NOT** ask me for help acquiring these!
- Enter the key and key id in the `keyfile.json`
- ![keyfile example](https://i.imgur.com/e5aU0ng.png)
@ -66,18 +80,21 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
## Cookies
> [!TIP]
> Cookies are not required for individually purchased courses.
To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
- chrome
- firefox
- opera
- edge
- brave
- chromium
- vivaldi
- safari
- `chrome`
- `firefox`
- `opera`
- `edge`
- `brave`
- `chromium`
- `vivaldi`
- `safari`
## Ready to go

View File

@ -11,7 +11,8 @@ HEADERS = {
}
LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc"
LOGOUT_URL = "https://www.udemy.com/user/logout"
COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
# COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/?page_size=100&fields[asset]=asset_type,length,media_license_token,course_is_drmed,media_sources,thumbnail_sprite,slides,slide_urls,filename,download_urls,external_url&fields[chapter]=object_index,title&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&caching_intent=True"
COURSE_INFO_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/"
COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}"
SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12"
@ -24,8 +25,7 @@ SAVED_DIR = os.path.join(os.getcwd(), "saved")
KEY_FILE_PATH = os.path.join(os.getcwd(), "keyfile.json")
COOKIE_FILE_PATH = os.path.join(os.getcwd(), "cookies.txt")
LOG_DIR_PATH = os.path.join(os.getcwd(), "logs")
LOG_FILE_PATH = os.path.join(
os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
LOG_FORMAT = '[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s'
LOG_DATE_FORMAT = '%I:%M:%S'
LOG_FILE_PATH = os.path.join(os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s"
LOG_DATE_FORMAT = "%I:%M:%S"
LOG_LEVEL = logging.INFO

73
main.py
View File

@ -3,6 +3,7 @@ import argparse
import glob
import json
import logging
import math
import os
import re
import subprocess
@ -337,7 +338,7 @@ def pre_run():
with open(KEY_FILE_PATH, encoding="utf8", mode="r") as keyfile:
keys = json.loads(keyfile.read())
else:
logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
logger.warning("> Keyfile not found! You won't be able to decrypt any encrypted videos!")
class Udemy:
@ -387,7 +388,7 @@ class Udemy:
try:
resp = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"[-] Udemy Says: Connection error, {error}")
logger.fatal(f"[-] Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -794,11 +795,11 @@ class Udemy:
webpage = webpage.decode("utf8", "ignore")
webpage = json.loads(webpage)
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception) as error:
logger.fatal(f"Udemy Says: {error} on {url}")
logger.fatal(f"{error} on {url}")
time.sleep(0.8)
sys.exit(1)
else:
@ -811,7 +812,7 @@ class Udemy:
try:
resp = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -820,39 +821,23 @@ class Udemy:
def _extract_course_json(self, url, course_id, portal_name):
self.session._headers.update({"Referer": url})
url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
try:
resp = self.session._get(url)
if resp.status_code in [502, 503, 504]:
logger.info("> The course content is large, using large content extractor...")
resp = self._extract_large_course_content(url=url)
else:
resp = resp.json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception):
resp = self._extract_large_course_content(url=url)
return resp
else:
return resp
def _extract_large_course_content(self, url):
url = url.replace("10000", "50") if url.endswith("10000") else url
page = 1
try:
data = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
else:
_next = data.get("next")
_count = data.get("count")
est_page_count = math.ceil(_count / 100) # 100 is the max results per page
while _next:
logger.info("> Downloading course information.. ")
logger.info(f"> Downloading course information.. (Page {page + 1}/{est_page_count})")
try:
resp = self.session._get(_next).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -861,6 +846,7 @@ class Udemy:
if results and isinstance(results, list):
for d in resp["results"]:
data["results"].append(d)
page = page + 1
return data
def _extract_course(self, response, course_name):
@ -880,11 +866,11 @@ class Udemy:
url = MY_COURSES_URL.format(portal_name=portal_name)
webpage = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception) as error:
logger.fatal(f"Udemy Says: {error}")
logger.fatal(f"{error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -897,11 +883,11 @@ class Udemy:
try:
webpage = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception) as error:
logger.fatal(f"Udemy Says: {error}")
logger.fatal(f"{error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -917,11 +903,11 @@ class Udemy:
url = f"{url}&is_archived=true"
webpage = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception) as error:
logger.fatal(f"Udemy Says: {error}")
logger.fatal(f"{error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -934,11 +920,11 @@ class Udemy:
url = MY_COURSES_URL.format(portal_name=portal_name)
webpage = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception) as error:
logger.fatal(f"Udemy Says: {error}")
logger.fatal(f"{error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -951,11 +937,11 @@ class Udemy:
try:
webpage = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception) as error:
logger.fatal(f"Udemy Says: {error}")
logger.fatal(f"{error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -971,11 +957,11 @@ class Udemy:
url = f"{url}&is_archived=true"
webpage = self.session._get(url).json()
except conn_error as error:
logger.fatal(f"Udemy Says: Connection error, {error}")
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
sys.exit(1)
except (ValueError, Exception) as error:
logger.fatal(f"Udemy Says: {error}")
logger.fatal(f"{error}")
time.sleep(0.8)
sys.exit(1)
else:
@ -1792,6 +1778,15 @@ def _print_course_info(udemy: Udemy, udemy_object: dict):
chapter_count = udemy_object.get("total_chapters")
lecture_count = udemy_object.get("total_lectures")
if lecture_count > 100:
logger.warning(
"This course has a lot of lectures! Fetching all the information can take a long time as well as spams Udemy's servers. It is NOT recommended to continue! Are you sure you want to do this?"
)
yn = input("(y/n): ")
if yn.lower() != "y":
logger.info("Probably wise. Please remove the --info argument and try again.")
sys.exit(0)
logger.info("> Course: {}".format(course_title))
logger.info("> Total Chapters: {}".format(chapter_count))
logger.info("> Total Lectures: {}".format(lecture_count))