mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-02 21:54:25 +02:00
Cookie extraction
- Removed cloudscraper - Added cookie extraction from browser
This commit is contained in:
parent
e9b9d8a6a4
commit
e5450b6f85
29
README.md
29
README.md
@ -61,19 +61,31 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
|
||||
- 
|
||||
- 
|
||||
|
||||
## Start Downloading
|
||||
## Cookies
|
||||
|
||||
To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
|
||||
|
||||
The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
|
||||
|
||||
- chrome
|
||||
- firefox
|
||||
- opera
|
||||
- edge
|
||||
- brave
|
||||
- chromium
|
||||
- vivaldi
|
||||
- safari
|
||||
|
||||
## Ready to go
|
||||
|
||||
You can now run the program, see the examples below. The course will download to `out_dir`.
|
||||
|
||||
# Udemy Subscription Plans
|
||||
|
||||
You will need to use a different branch of the program, please see [feat/cookies](https://github.com/Puyodead1/udemy-downloader/tree/feat/cookies).
|
||||
|
||||
# Advanced Usage
|
||||
|
||||
```
|
||||
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls]
|
||||
[--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
|
||||
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--download-quizzes]
|
||||
[--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}]
|
||||
[--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
|
||||
|
||||
Udemy Downloader
|
||||
|
||||
@ -92,6 +104,7 @@ options:
|
||||
--skip-lectures If specified, lectures won't be downloaded
|
||||
--download-assets If specified, lecture assets will be downloaded
|
||||
--download-captions If specified, captions will be downloaded
|
||||
--download-quizzes If specified, quizzes will be downloaded
|
||||
--keep-vtt If specified, .vtt files won't be removed
|
||||
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)
|
||||
--info If specified, only course information will be printed, nothing will be downloaded
|
||||
@ -104,6 +117,8 @@ options:
|
||||
time)
|
||||
--log-level LOG_LEVEL
|
||||
Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)
|
||||
--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}
|
||||
The browser to extract cookies from
|
||||
--use-h265 If specified, videos will be encoded with the H.265 codec
|
||||
--h265-crf H265_CRF Set a custom CRF value for H.265 encoding. FFMPEG default is 28
|
||||
--h265-preset H265_PRESET
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "1.2.10"
|
||||
__version__ = "1.2.10-cookies"
|
||||
|
133
main.py
133
main.py
@ -12,6 +12,7 @@ from html.parser import HTMLParser as compat_HTMLParser
|
||||
from pathlib import Path
|
||||
from typing import IO
|
||||
|
||||
import browser_cookie3
|
||||
import m3u8
|
||||
import requests
|
||||
import yt_dlp
|
||||
@ -29,7 +30,6 @@ from utils import extract_kid
|
||||
from vtt_to_srt import convert
|
||||
|
||||
retry = 3
|
||||
cookies = ""
|
||||
downloader = None
|
||||
logger: logging.Logger = None
|
||||
dl_assets = False
|
||||
@ -51,11 +51,12 @@ course_url = None
|
||||
info = None
|
||||
keys = {}
|
||||
id_as_course_name = False
|
||||
is_subscription_course = False
|
||||
use_h265 = False
|
||||
h265_crf = 28
|
||||
h265_preset = "medium"
|
||||
use_nvenc = False
|
||||
browser = None
|
||||
cj = None
|
||||
|
||||
|
||||
# from https://stackoverflow.com/a/21978778/9785713
|
||||
@ -68,7 +69,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]):
|
||||
|
||||
# this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist
|
||||
def pre_run():
|
||||
global cookies, dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc
|
||||
global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser
|
||||
|
||||
# make sure the directory exists
|
||||
if not os.path.exists(DOWNLOAD_DIR):
|
||||
@ -162,13 +163,6 @@ def pre_run():
|
||||
action="store_true",
|
||||
help="If specified, the course id will be used in place of the course name for the output directory. This is a 'hack' to reduce the path length",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-sc",
|
||||
"--subscription-course",
|
||||
dest="is_subscription_course",
|
||||
action="store_true",
|
||||
help="Mark the course as a subscription based course, use this if you are having problems with the program auto detecting it",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save-to-file",
|
||||
dest="save_to_file",
|
||||
@ -187,6 +181,12 @@ def pre_run():
|
||||
type=str,
|
||||
help="Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--browser",
|
||||
dest="browser",
|
||||
help="The browser to extract cookies from",
|
||||
choices=["chrome", "firefox", "opera", "edge", "brave", "chromium", "vivaldi", "safari"],
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-h265",
|
||||
dest="use_h265",
|
||||
@ -302,8 +302,8 @@ def pre_run():
|
||||
|
||||
if args.id_as_course_name:
|
||||
id_as_course_name = args.id_as_course_name
|
||||
if args.is_subscription_course:
|
||||
is_subscription_course = args.is_subscription_course
|
||||
if args.browser:
|
||||
browser = args.browser
|
||||
|
||||
Path(DOWNLOAD_DIR).mkdir(parents=True, exist_ok=True)
|
||||
Path(SAVED_DIR).mkdir(parents=True, exist_ok=True)
|
||||
@ -315,33 +315,41 @@ def pre_run():
|
||||
else:
|
||||
logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
|
||||
|
||||
# Read cookies from file
|
||||
if os.path.exists(COOKIE_FILE_PATH):
|
||||
with open(COOKIE_FILE_PATH, encoding="utf8", mode="r") as cookiefile:
|
||||
cookies = cookiefile.read()
|
||||
cookies = cookies.rstrip()
|
||||
else:
|
||||
logger.warning(
|
||||
"No cookies.txt file was found, you won't be able to download subscription courses! You can ignore ignore this if you don't plan to download a course included in a subscription plan."
|
||||
)
|
||||
|
||||
|
||||
class Udemy:
|
||||
def __init__(self, bearer_token):
|
||||
global cj
|
||||
|
||||
self.session = None
|
||||
self.bearer_token = None
|
||||
self.auth = UdemyAuth(cache_session=False)
|
||||
if not self.session:
|
||||
self.session, self.bearer_token = self.auth.authenticate(bearer_token=bearer_token)
|
||||
self.session = self.auth.authenticate(bearer_token=bearer_token)
|
||||
|
||||
if self.session and self.bearer_token:
|
||||
self.session._headers.update({"Authorization": "Bearer {}".format(self.bearer_token)})
|
||||
self.session._headers.update({"X-Udemy-Authorization": "Bearer {}".format(self.bearer_token)})
|
||||
logger.info("Login Success")
|
||||
else:
|
||||
logger.fatal("Login Failure! You are probably missing an access token!")
|
||||
if not self.session:
|
||||
if browser == None:
|
||||
logger.error("No bearer token was provided, and no browser for cookie extraction was specified.")
|
||||
sys.exit(1)
|
||||
|
||||
logger.warning("No bearer token was provided, attempting to use browser cookies.")
|
||||
|
||||
self.session = self.auth._session
|
||||
|
||||
if browser == "chrome":
|
||||
cj = browser_cookie3.chrome()
|
||||
elif browser == "firefox":
|
||||
cj = browser_cookie3.firefox()
|
||||
elif browser == "opera":
|
||||
cj = browser_cookie3.opera()
|
||||
elif browser == "edge":
|
||||
cj = browser_cookie3.edge()
|
||||
elif browser == "brave":
|
||||
cj = browser_cookie3.brave()
|
||||
elif browser == "chromium":
|
||||
cj = browser_cookie3.chromium()
|
||||
elif browser == "vivaldi":
|
||||
cj = browser_cookie3.vivaldi()
|
||||
|
||||
def _get_quiz(self, quiz_id):
|
||||
print(portal_name)
|
||||
self.session._headers.update(
|
||||
@ -547,7 +555,8 @@ class Udemy:
|
||||
continue
|
||||
width, height = resolution
|
||||
|
||||
if height in seen: continue
|
||||
if height in seen:
|
||||
continue
|
||||
|
||||
# we need to save the individual playlists to disk also
|
||||
playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8")
|
||||
@ -868,27 +877,8 @@ class Udemy:
|
||||
|
||||
def _extract_course_info(self, url):
|
||||
global portal_name
|
||||
portal_name, course_name = self.extract_course_name(url)
|
||||
course = {
|
||||
"portal_name": portal_name
|
||||
}
|
||||
|
||||
if not is_subscription_course:
|
||||
results = self._subscribed_courses(portal_name=portal_name, course_name=course_name)
|
||||
course = self._extract_course(response=results, course_name=course_name)
|
||||
if not course:
|
||||
results = self._my_courses(portal_name=portal_name)
|
||||
course = self._extract_course(response=results, course_name=course_name)
|
||||
if not course:
|
||||
results = self._subscribed_collection_courses(portal_name=portal_name)
|
||||
course = self._extract_course(response=results, course_name=course_name)
|
||||
if not course:
|
||||
results = self._archived_courses(portal_name=portal_name)
|
||||
course = self._extract_course(response=results, course_name=course_name)
|
||||
|
||||
if not course or is_subscription_course:
|
||||
course_id = self._extract_subscription_course_info(url)
|
||||
course = self._extract_course_info_json(url, course_id)
|
||||
course_id, portal_name = self._extract_subscription_course_info(url)
|
||||
course = self._extract_course_info_json(url, course_id, portal_name)
|
||||
|
||||
if course:
|
||||
return course.get("id"), course
|
||||
@ -898,11 +888,11 @@ class Udemy:
|
||||
"It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.",
|
||||
)
|
||||
logger.info(
|
||||
"Trying to logout now...",
|
||||
"Terminating Session...",
|
||||
)
|
||||
self.session.terminate()
|
||||
logger.info(
|
||||
"Logged out successfully.",
|
||||
"Session terminated.",
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
@ -1009,6 +999,7 @@ class Udemy:
|
||||
|
||||
return lecture
|
||||
|
||||
|
||||
class Session(object):
|
||||
def __init__(self):
|
||||
self._headers = HEADERS
|
||||
@ -1023,11 +1014,10 @@ class Session(object):
|
||||
def _set_auth_headers(self, bearer_token=""):
|
||||
self._headers["Authorization"] = "Bearer {}".format(bearer_token)
|
||||
self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token)
|
||||
self._headers["Cookie"] = cookies
|
||||
|
||||
def _get(self, url):
|
||||
for i in range(10):
|
||||
session = self._session.get(url, headers=self._headers)
|
||||
session = self._session.get(url, headers=self._headers, cookies=cj)
|
||||
if session.ok or session.status_code in [502, 503]:
|
||||
return session
|
||||
if not session.ok:
|
||||
@ -1036,7 +1026,7 @@ class Session(object):
|
||||
time.sleep(0.8)
|
||||
|
||||
def _post(self, url, data, redirect=True):
|
||||
session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect)
|
||||
session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect, cookies=cj)
|
||||
if session.ok:
|
||||
return session
|
||||
if not session.ok:
|
||||
@ -1140,14 +1130,12 @@ class UdemyAuth(object):
|
||||
self._cache = cache_session
|
||||
self._session = Session()
|
||||
|
||||
def authenticate(self, bearer_token=""):
|
||||
def authenticate(self, bearer_token=None):
|
||||
if bearer_token:
|
||||
self._session._set_auth_headers(bearer_token=bearer_token)
|
||||
self._session._session.cookies.update({"bearer_token": bearer_token})
|
||||
return self._session, bearer_token
|
||||
return self._session
|
||||
else:
|
||||
self._session._set_auth_headers()
|
||||
return None, None
|
||||
return None
|
||||
|
||||
|
||||
def durationtoseconds(period):
|
||||
@ -1197,9 +1185,7 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path):
|
||||
transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path
|
||||
)
|
||||
else:
|
||||
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
|
||||
video_filepath, audio_filepath, video_title, output_path
|
||||
)
|
||||
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path)
|
||||
else:
|
||||
if use_h265:
|
||||
command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
|
||||
@ -1538,7 +1524,18 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
|
||||
source_type = source.get("type")
|
||||
if source_type == "hls":
|
||||
temp_filepath = lecture_path.replace(".mp4", ".%(ext)s")
|
||||
cmd = ["yt-dlp", "--enable-file-urls", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"]
|
||||
cmd = [
|
||||
"yt-dlp",
|
||||
"--enable-file-urls",
|
||||
"--force-generic-extractor",
|
||||
"--concurrent-fragments",
|
||||
f"{concurrent_downloads}",
|
||||
"--downloader",
|
||||
"aria2c",
|
||||
"-o",
|
||||
f"{temp_filepath}",
|
||||
f"{url}",
|
||||
]
|
||||
if disable_ipv6:
|
||||
cmd.append("--downloader-args")
|
||||
cmd.append('aria2c:"--disable-ipv6"')
|
||||
@ -1574,7 +1571,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
|
||||
logger.error(" > Missing sources for lecture", lecture)
|
||||
|
||||
|
||||
|
||||
def process_quiz(udemy: Udemy, lecture, chapter_dir):
|
||||
lecture_title = lecture.get("lecture_title")
|
||||
lecture_index = lecture.get("lecture_index")
|
||||
@ -1594,7 +1590,6 @@ def process_quiz(udemy: Udemy, lecture, chapter_dir):
|
||||
f.write(html)
|
||||
|
||||
|
||||
|
||||
def parse_new(udemy: Udemy, udemy_object: dict):
|
||||
total_chapters = udemy_object.get("total_chapters")
|
||||
total_lectures = udemy_object.get("total_lectures")
|
||||
@ -1851,9 +1846,9 @@ def main():
|
||||
counter = -1
|
||||
|
||||
if resource:
|
||||
logger.info("> Trying to logout")
|
||||
logger.info("> Terminating Session...")
|
||||
udemy.session.terminate()
|
||||
logger.info("> Logged out.")
|
||||
logger.info("> Session Terminated.")
|
||||
|
||||
if course:
|
||||
logger.info("> Processing course data, this may take a minute. ")
|
||||
|
@ -15,3 +15,4 @@ lxml
|
||||
six
|
||||
pathvalidate
|
||||
coloredlogs
|
||||
browser_cookie3
|
Loading…
x
Reference in New Issue
Block a user