mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-09 09:04:28 +02:00
update to allow downloading if using udemy subscription
+ New requirements: `beautifulsoup4` and `lxml` + Added support for downloading courses included in subscription plans + Updated README to reflect changes
This commit is contained in:
parent
ecc46deb6b
commit
eb3257f374
3
.gitignore
vendored
3
.gitignore
vendored
@ -122,4 +122,5 @@ manifest.mpd
|
|||||||
saved
|
saved
|
||||||
*.aria2
|
*.aria2
|
||||||
info.py
|
info.py
|
||||||
.idea/
|
.idea/
|
||||||
|
cookies.txt
|
22
README.md
22
README.md
@ -28,7 +28,7 @@ Windows is the primary development OS, but I've made an effort to support Linux
|
|||||||
|
|
||||||
# Requirements
|
# Requirements
|
||||||
|
|
||||||
1. You will need to download `ffmpeg`, `aria2c`, `mp4decrypt` (from Bento4 SDK) and ``yt-dlp`` (``pip install yt-dlp``). Ensure they are in the system path (typing their name in cmd should invoke them).
|
1. You will need to download `ffmpeg`, `aria2c`, `mp4decrypt` (from Bento4 SDK) and `yt-dlp` (`pip install yt-dlp`). Ensure they are in the system path (typing their name in cmd should invoke them).
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
|
|
||||||
@ -54,7 +54,7 @@ You will need to get a few things before you can use this program:
|
|||||||
|
|
||||||
### Key ID and Key
|
### Key ID and Key
|
||||||
|
|
||||||
It is up to you to aquire the key and key ID. Please don't ask me for help acquiring these, decrypting DRM protected content can be considered piracy.
|
It is up to you to aquire the key and key ID. Please **DO NOT** ask me for help acquiring these, decrypting DRM protected content can be considered piracy.
|
||||||
|
|
||||||
- Enter the key and key id in the `keyfile.json`
|
- Enter the key and key id in the `keyfile.json`
|
||||||
- 
|
- 
|
||||||
@ -64,6 +64,24 @@ It is up to you to aquire the key and key ID. Please don't ask me for help acqui
|
|||||||
|
|
||||||
You can now run the program, see the examples below. The course will download to `out_dir`.
|
You can now run the program, see the examples below. The course will download to `out_dir`.
|
||||||
|
|
||||||
|
# Udemy Subscription Plans
|
||||||
|
|
||||||
|
To download a course included in a subscription plan that you did not purchase individually, you will need to follow a few more steps to get setup.
|
||||||
|
|
||||||
|
## Getting your cookies
|
||||||
|
|
||||||
|
- Go to the page of the course you want to download
|
||||||
|
- press `control` + `shift` + `i` (this may be different depending on your OS, just google how to open developer tools)
|
||||||
|
- click the `Console` tab
|
||||||
|
- copy and paste `document.cookie` and press enter
|
||||||
|
- copy the text between the quotes
|
||||||
|
|
||||||
|
## Setup token file
|
||||||
|
|
||||||
|
- Create a file called `cookies.txt` in the same folder as `main.py`
|
||||||
|
- Paste the cookie into the file
|
||||||
|
- save and close the file
|
||||||
|
|
||||||
# Advanced Usage
|
# Advanced Usage
|
||||||
|
|
||||||
```
|
```
|
||||||
|
95
main.py
95
main.py
@ -19,12 +19,15 @@ from sanitize import sanitize, slugify, SLUG_OK
|
|||||||
from utils import extract_kid
|
from utils import extract_kid
|
||||||
from vtt_to_srt import convert
|
from vtt_to_srt import convert
|
||||||
from _version import __version__
|
from _version import __version__
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
home_dir = os.getcwd()
|
home_dir = os.getcwd()
|
||||||
download_dir = os.path.join(os.getcwd(), "out_dir")
|
download_dir = os.path.join(os.getcwd(), "out_dir")
|
||||||
saved_dir = os.path.join(os.getcwd(), "saved")
|
saved_dir = os.path.join(os.getcwd(), "saved")
|
||||||
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
||||||
|
cookiefile_path = os.path.join(os.getcwd(), "cookies.txt")
|
||||||
retry = 3
|
retry = 3
|
||||||
|
cookies = {}
|
||||||
downloader = None
|
downloader = None
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
"Origin": "www.udemy.com",
|
"Origin": "www.udemy.com",
|
||||||
@ -36,6 +39,7 @@ HEADERS = {
|
|||||||
LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc"
|
LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc"
|
||||||
LOGOUT_URL = "https://www.udemy.com/user/logout"
|
LOGOUT_URL = "https://www.udemy.com/user/logout"
|
||||||
COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
|
COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
|
||||||
|
COURSE_INFO_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/"
|
||||||
COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}"
|
COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}"
|
||||||
SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12"
|
SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12"
|
||||||
MY_COURSES_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&ordering=-last_accessed,-access_time&page=1&page_size=10000"
|
MY_COURSES_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&ordering=-last_accessed,-access_time&page=1&page_size=10000"
|
||||||
@ -44,6 +48,18 @@ COLLECTION_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-co
|
|||||||
Path(download_dir).mkdir(parents=True, exist_ok=True)
|
Path(download_dir).mkdir(parents=True, exist_ok=True)
|
||||||
Path(saved_dir).mkdir(parents=True, exist_ok=True)
|
Path(saved_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Get the keys
|
||||||
|
with open(keyfile_path, 'r') as keyfile:
|
||||||
|
keyfile = keyfile.read()
|
||||||
|
keyfile = json.loads(keyfile)
|
||||||
|
|
||||||
|
# Read cookies from file
|
||||||
|
if os.path.exists(cookiefile_path):
|
||||||
|
with open(cookiefile_path, 'r') as cookiefile:
|
||||||
|
cookies = cookiefile.read()
|
||||||
|
else:
|
||||||
|
print("No cookies.txt file was found, you won't be able to download subscription courses! You can ignore ignore this if you don't plan to download a course included in a subscription plan.")
|
||||||
|
|
||||||
|
|
||||||
def _clean(text):
|
def _clean(text):
|
||||||
ok = re.compile(r'[^\\/:*?!"<>|]')
|
ok = re.compile(r'[^\\/:*?!"<>|]')
|
||||||
@ -363,6 +379,11 @@ class Udemy:
|
|||||||
if obj:
|
if obj:
|
||||||
return obj.group("portal_name"), obj.group("name_or_id")
|
return obj.group("portal_name"), obj.group("name_or_id")
|
||||||
|
|
||||||
|
def extract_portal_name(self, url):
|
||||||
|
obj = re.search(r"(?i)(?://(?P<portal_name>.+?).udemy.com)", url)
|
||||||
|
if obj:
|
||||||
|
return obj.group("portal_name")
|
||||||
|
|
||||||
def _subscribed_courses(self, portal_name, course_name):
|
def _subscribed_courses(self, portal_name, course_name):
|
||||||
results = []
|
results = []
|
||||||
self.session._headers.update({
|
self.session._headers.update({
|
||||||
@ -388,6 +409,19 @@ class Udemy:
|
|||||||
results = webpage.get("results", [])
|
results = webpage.get("results", [])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def _extract_course_info_json(self, url, course_id, portal_name):
|
||||||
|
self.session._headers.update({"Referer": url})
|
||||||
|
url = COURSE_INFO_URL.format(
|
||||||
|
portal_name=portal_name, course_id=course_id)
|
||||||
|
try:
|
||||||
|
resp = self.session._get(url).json()
|
||||||
|
except conn_error as error:
|
||||||
|
print(f"Udemy Says: Connection error, {error}")
|
||||||
|
time.sleep(0.8)
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
return resp
|
||||||
|
|
||||||
def _extract_course_json(self, url, course_id, portal_name):
|
def _extract_course_json(self, url, course_id, portal_name):
|
||||||
self.session._headers.update({"Referer": url})
|
self.session._headers.update({"Referer": url})
|
||||||
url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
|
url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
|
||||||
@ -436,7 +470,7 @@ class Udemy:
|
|||||||
data["results"].append(d)
|
data["results"].append(d)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def __extract_course(self, response, course_name):
|
def _extract_course(self, response, course_name):
|
||||||
_temp = {}
|
_temp = {}
|
||||||
if response:
|
if response:
|
||||||
for entry in response:
|
for entry in response:
|
||||||
@ -566,21 +600,32 @@ class Udemy:
|
|||||||
course = {}
|
course = {}
|
||||||
results = self._subscribed_courses(portal_name=portal_name,
|
results = self._subscribed_courses(portal_name=portal_name,
|
||||||
course_name=course_name)
|
course_name=course_name)
|
||||||
course = self.__extract_course(response=results,
|
course = self._extract_course(response=results,
|
||||||
course_name=course_name)
|
course_name=course_name)
|
||||||
if not course:
|
if not course:
|
||||||
results = self._my_courses(portal_name=portal_name)
|
results = self._my_courses(portal_name=portal_name)
|
||||||
course = self.__extract_course(response=results,
|
course = self._extract_course(response=results,
|
||||||
course_name=course_name)
|
course_name=course_name)
|
||||||
if not course:
|
if not course:
|
||||||
results = self._subscribed_collection_courses(
|
results = self._subscribed_collection_courses(
|
||||||
portal_name=portal_name)
|
portal_name=portal_name)
|
||||||
course = self.__extract_course(response=results,
|
course = self._extract_course(response=results,
|
||||||
course_name=course_name)
|
course_name=course_name)
|
||||||
if not course:
|
if not course:
|
||||||
results = self._archived_courses(portal_name=portal_name)
|
results = self._archived_courses(portal_name=portal_name)
|
||||||
course = self.__extract_course(response=results,
|
course = self._extract_course(response=results,
|
||||||
course_name=course_name)
|
course_name=course_name)
|
||||||
|
|
||||||
|
if not course:
|
||||||
|
course_html = self.session._get(url).text
|
||||||
|
soup = BeautifulSoup(course_html, "lxml")
|
||||||
|
data_args = soup.find(
|
||||||
|
"div", {"class": "ud-component--course-taking--app"}).attrs["data-module-args"]
|
||||||
|
data_json = json.loads(data_args)
|
||||||
|
course_id = data_json.get("courseId", None)
|
||||||
|
portal_name = self.extract_portal_name(url)
|
||||||
|
course = self._extract_course_info_json(
|
||||||
|
url, course_id, portal_name)
|
||||||
|
|
||||||
if course:
|
if course:
|
||||||
course.update({"portal_name": portal_name})
|
course.update({"portal_name": portal_name})
|
||||||
@ -601,10 +646,12 @@ class Session(object):
|
|||||||
self._headers = HEADERS
|
self._headers = HEADERS
|
||||||
self._session = requests.sessions.Session()
|
self._session = requests.sessions.Session()
|
||||||
|
|
||||||
def _set_auth_headers(self, access_token="", client_id=""):
|
def _set_auth_headers(self, access_token=""):
|
||||||
self._headers["Authorization"] = "Bearer {}".format(access_token)
|
self._headers["Authorization"] = "Bearer {}".format(access_token)
|
||||||
self._headers["X-Udemy-Authorization"] = "Bearer {}".format(
|
self._headers["X-Udemy-Authorization"] = "Bearer {}".format(
|
||||||
access_token)
|
access_token)
|
||||||
|
self._headers[
|
||||||
|
"Cookie"] = cookies
|
||||||
|
|
||||||
def _get(self, url):
|
def _get(self, url):
|
||||||
for i in range(10):
|
for i in range(10):
|
||||||
@ -754,28 +801,9 @@ class UdemyAuth(object):
|
|||||||
})
|
})
|
||||||
return login_form
|
return login_form
|
||||||
|
|
||||||
def authenticate(self, access_token="", client_id=""):
|
def authenticate(self, access_token=""):
|
||||||
if not access_token and not client_id:
|
|
||||||
data = self._form_hidden_input(form_id="login-form")
|
|
||||||
self._cloudsc.headers.update({"Referer": LOGIN_URL})
|
|
||||||
auth_response = self._cloudsc.post(LOGIN_URL,
|
|
||||||
data=data,
|
|
||||||
allow_redirects=False)
|
|
||||||
auth_cookies = auth_response.cookies
|
|
||||||
|
|
||||||
access_token = auth_cookies.get("access_token", "")
|
|
||||||
client_id = auth_cookies.get("client_id", "")
|
|
||||||
|
|
||||||
if access_token:
|
if access_token:
|
||||||
# dump cookies to configs
|
self._session._set_auth_headers(access_token=access_token)
|
||||||
# if self._cache:
|
|
||||||
# _ = to_configs(
|
|
||||||
# username=self.username,
|
|
||||||
# password=self.password,
|
|
||||||
# cookies=f"access_token={access_token}",
|
|
||||||
# )
|
|
||||||
self._session._set_auth_headers(access_token=access_token,
|
|
||||||
client_id=client_id)
|
|
||||||
self._session._session.cookies.update(
|
self._session._session.cookies.update(
|
||||||
{"access_token": access_token})
|
{"access_token": access_token})
|
||||||
return self._session, access_token
|
return self._session, access_token
|
||||||
@ -787,11 +815,6 @@ class UdemyAuth(object):
|
|||||||
if not os.path.exists(download_dir):
|
if not os.path.exists(download_dir):
|
||||||
os.makedirs(download_dir)
|
os.makedirs(download_dir)
|
||||||
|
|
||||||
# Get the keys
|
|
||||||
with open(keyfile_path, 'r') as keyfile:
|
|
||||||
keyfile = keyfile.read()
|
|
||||||
keyfile = json.loads(keyfile)
|
|
||||||
|
|
||||||
|
|
||||||
def durationtoseconds(period):
|
def durationtoseconds(period):
|
||||||
"""
|
"""
|
||||||
|
@ -12,3 +12,5 @@ yt-dlp
|
|||||||
bitstring
|
bitstring
|
||||||
cloudscraper
|
cloudscraper
|
||||||
unidecode
|
unidecode
|
||||||
|
beautifulsoup4
|
||||||
|
lxml
|
Loading…
x
Reference in New Issue
Block a user