mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-04-29 19:34:25 +02:00
fix large courses
This commit is contained in:
parent
db7b0490e6
commit
1b1b5d81bd
41
README.md
41
README.md
@ -20,11 +20,24 @@
|
||||
Utility script to download Udemy courses, has support for DRM videos but requires the user to acquire the decryption key (for legal reasons).<br>
|
||||
Windows is the primary development OS, but I've made an effort to support Linux also (Mac untested).
|
||||
|
||||
> [!CAUTION]
|
||||
> The ability to download captions automatically is currently broken due to changes in Udemy's API!
|
||||
|
||||
> [!IMPORTANT]
|
||||
> This tool will not work on encrypted courses without decryption keys being provided!
|
||||
>
|
||||
> Downloading courses is against Udemy's Terms of Service, I am NOT held responsible for your account getting suspended as a result from the use of this program!
|
||||
>
|
||||
> This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program.
|
||||
|
||||
# Requirements
|
||||
|
||||
The following are a list of required third-party tools, you will need to ensure they are in your systems path and that typing their name in a terminal invokes them.
|
||||
|
||||
_**Note**:_ _These are seperate requirements that are not installed with the pip command! You will need to download and install these manually!_
|
||||
> [!NOTE]
|
||||
> These are seperate requirements that are not installed with the pip command!
|
||||
>
|
||||
> You will need to download and install these manually!
|
||||
|
||||
- [Python 3](https://python.org/)
|
||||
- [ffmpeg](https://www.ffmpeg.org/) - This tool is also available in Linux package repositories.
|
||||
@ -35,8 +48,6 @@ _**Note**:_ _These are seperate requirements that are not installed with the pip
|
||||
|
||||
# Usage
|
||||
|
||||
_quick and dirty how-to_
|
||||
|
||||
You will need to get a few things before you can use this program:
|
||||
|
||||
- Decryption Key ID
|
||||
@ -58,7 +69,10 @@ You will need to get a few things before you can use this program:
|
||||
|
||||
## Key ID and Key
|
||||
|
||||
It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help acquiring these, decrypting DRM protected content can be considered piracy. The tool required for this has already been discused in a GitHub issue.
|
||||
> [!IMPORTANT]
|
||||
> For courses that are encrypted, It is up to you to acquire the decryption keys.
|
||||
>
|
||||
> Please **DO NOT** ask me for help acquiring these!
|
||||
|
||||
- Enter the key and key id in the `keyfile.json`
|
||||
- 
|
||||
@ -66,18 +80,21 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
|
||||
|
||||
## Cookies
|
||||
|
||||
> [!TIP]
|
||||
> Cookies are not required for individually purchased courses.
|
||||
|
||||
To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
|
||||
|
||||
The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
|
||||
|
||||
- chrome
|
||||
- firefox
|
||||
- opera
|
||||
- edge
|
||||
- brave
|
||||
- chromium
|
||||
- vivaldi
|
||||
- safari
|
||||
- `chrome`
|
||||
- `firefox`
|
||||
- `opera`
|
||||
- `edge`
|
||||
- `brave`
|
||||
- `chromium`
|
||||
- `vivaldi`
|
||||
- `safari`
|
||||
|
||||
## Ready to go
|
||||
|
||||
|
10
constants.py
10
constants.py
@ -11,7 +11,8 @@ HEADERS = {
|
||||
}
|
||||
LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc"
|
||||
LOGOUT_URL = "https://www.udemy.com/user/logout"
|
||||
COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
|
||||
# COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
|
||||
COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/?page_size=100&fields[asset]=asset_type,length,media_license_token,course_is_drmed,media_sources,thumbnail_sprite,slides,slide_urls,filename,download_urls,external_url&fields[chapter]=object_index,title&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&caching_intent=True"
|
||||
COURSE_INFO_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/"
|
||||
COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}"
|
||||
SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12"
|
||||
@ -24,8 +25,7 @@ SAVED_DIR = os.path.join(os.getcwd(), "saved")
|
||||
KEY_FILE_PATH = os.path.join(os.getcwd(), "keyfile.json")
|
||||
COOKIE_FILE_PATH = os.path.join(os.getcwd(), "cookies.txt")
|
||||
LOG_DIR_PATH = os.path.join(os.getcwd(), "logs")
|
||||
LOG_FILE_PATH = os.path.join(
|
||||
os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
|
||||
LOG_FORMAT = '[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s'
|
||||
LOG_DATE_FORMAT = '%I:%M:%S'
|
||||
LOG_FILE_PATH = os.path.join(os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
|
||||
LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s"
|
||||
LOG_DATE_FORMAT = "%I:%M:%S"
|
||||
LOG_LEVEL = logging.INFO
|
||||
|
73
main.py
73
main.py
@ -3,6 +3,7 @@ import argparse
|
||||
import glob
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@ -337,7 +338,7 @@ def pre_run():
|
||||
with open(KEY_FILE_PATH, encoding="utf8", mode="r") as keyfile:
|
||||
keys = json.loads(keyfile.read())
|
||||
else:
|
||||
logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
|
||||
logger.warning("> Keyfile not found! You won't be able to decrypt any encrypted videos!")
|
||||
|
||||
|
||||
class Udemy:
|
||||
@ -387,7 +388,7 @@ class Udemy:
|
||||
try:
|
||||
resp = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"[-] Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"[-] Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -794,11 +795,11 @@ class Udemy:
|
||||
webpage = webpage.decode("utf8", "ignore")
|
||||
webpage = json.loads(webpage)
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception) as error:
|
||||
logger.fatal(f"Udemy Says: {error} on {url}")
|
||||
logger.fatal(f"{error} on {url}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -811,7 +812,7 @@ class Udemy:
|
||||
try:
|
||||
resp = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -820,39 +821,23 @@ class Udemy:
|
||||
def _extract_course_json(self, url, course_id, portal_name):
|
||||
self.session._headers.update({"Referer": url})
|
||||
url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
|
||||
try:
|
||||
resp = self.session._get(url)
|
||||
if resp.status_code in [502, 503, 504]:
|
||||
logger.info("> The course content is large, using large content extractor...")
|
||||
resp = self._extract_large_course_content(url=url)
|
||||
else:
|
||||
resp = resp.json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception):
|
||||
resp = self._extract_large_course_content(url=url)
|
||||
return resp
|
||||
else:
|
||||
return resp
|
||||
|
||||
def _extract_large_course_content(self, url):
|
||||
url = url.replace("10000", "50") if url.endswith("10000") else url
|
||||
page = 1
|
||||
try:
|
||||
data = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
_next = data.get("next")
|
||||
_count = data.get("count")
|
||||
est_page_count = math.ceil(_count / 100) # 100 is the max results per page
|
||||
while _next:
|
||||
logger.info("> Downloading course information.. ")
|
||||
logger.info(f"> Downloading course information.. (Page {page + 1}/{est_page_count})")
|
||||
try:
|
||||
resp = self.session._get(_next).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -861,6 +846,7 @@ class Udemy:
|
||||
if results and isinstance(results, list):
|
||||
for d in resp["results"]:
|
||||
data["results"].append(d)
|
||||
page = page + 1
|
||||
return data
|
||||
|
||||
def _extract_course(self, response, course_name):
|
||||
@ -880,11 +866,11 @@ class Udemy:
|
||||
url = MY_COURSES_URL.format(portal_name=portal_name)
|
||||
webpage = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception) as error:
|
||||
logger.fatal(f"Udemy Says: {error}")
|
||||
logger.fatal(f"{error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -897,11 +883,11 @@ class Udemy:
|
||||
try:
|
||||
webpage = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception) as error:
|
||||
logger.fatal(f"Udemy Says: {error}")
|
||||
logger.fatal(f"{error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -917,11 +903,11 @@ class Udemy:
|
||||
url = f"{url}&is_archived=true"
|
||||
webpage = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception) as error:
|
||||
logger.fatal(f"Udemy Says: {error}")
|
||||
logger.fatal(f"{error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -934,11 +920,11 @@ class Udemy:
|
||||
url = MY_COURSES_URL.format(portal_name=portal_name)
|
||||
webpage = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception) as error:
|
||||
logger.fatal(f"Udemy Says: {error}")
|
||||
logger.fatal(f"{error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -951,11 +937,11 @@ class Udemy:
|
||||
try:
|
||||
webpage = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception) as error:
|
||||
logger.fatal(f"Udemy Says: {error}")
|
||||
logger.fatal(f"{error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -971,11 +957,11 @@ class Udemy:
|
||||
url = f"{url}&is_archived=true"
|
||||
webpage = self.session._get(url).json()
|
||||
except conn_error as error:
|
||||
logger.fatal(f"Udemy Says: Connection error, {error}")
|
||||
logger.fatal(f"Connection error: {error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
except (ValueError, Exception) as error:
|
||||
logger.fatal(f"Udemy Says: {error}")
|
||||
logger.fatal(f"{error}")
|
||||
time.sleep(0.8)
|
||||
sys.exit(1)
|
||||
else:
|
||||
@ -1792,6 +1778,15 @@ def _print_course_info(udemy: Udemy, udemy_object: dict):
|
||||
chapter_count = udemy_object.get("total_chapters")
|
||||
lecture_count = udemy_object.get("total_lectures")
|
||||
|
||||
if lecture_count > 100:
|
||||
logger.warning(
|
||||
"This course has a lot of lectures! Fetching all the information can take a long time as well as spams Udemy's servers. It is NOT recommended to continue! Are you sure you want to do this?"
|
||||
)
|
||||
yn = input("(y/n): ")
|
||||
if yn.lower() != "y":
|
||||
logger.info("Probably wise. Please remove the --info argument and try again.")
|
||||
sys.exit(0)
|
||||
|
||||
logger.info("> Course: {}".format(course_title))
|
||||
logger.info("> Total Chapters: {}".format(chapter_count))
|
||||
logger.info("> Total Lectures: {}".format(lecture_count))
|
||||
|
Loading…
x
Reference in New Issue
Block a user