Cookie extraction

- Removed cloudscraper
- Added cookie extraction from browser
This commit is contained in:
Puyodead1 2023-07-02 17:49:04 -04:00
parent e9b9d8a6a4
commit e5450b6f85
No known key found for this signature in database
GPG Key ID: A4FA4FEC0DD353FC
4 changed files with 165 additions and 154 deletions

View File

@ -61,19 +61,31 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
- ![keyfile example](https://i.imgur.com/e5aU0ng.png) - ![keyfile example](https://i.imgur.com/e5aU0ng.png)
- ![example key and kid from console](https://i.imgur.com/awgndZA.png) - ![example key and kid from console](https://i.imgur.com/awgndZA.png)
## Start Downloading ## Cookies
To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
- chrome
- firefox
- opera
- edge
- brave
- chromium
- vivaldi
- safari
## Ready to go
You can now run the program, see the examples below. The course will download to `out_dir`. You can now run the program, see the examples below. The course will download to `out_dir`.
# Udemy Subscription Plans
You will need to use a different branch of the program, please see [feat/cookies](https://github.com/Puyodead1/udemy-downloader/tree/feat/cookies).
# Advanced Usage # Advanced Usage
``` ```
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls] usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--download-quizzes]
[--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v] [--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}]
[--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
Udemy Downloader Udemy Downloader
@ -92,6 +104,7 @@ options:
--skip-lectures If specified, lectures won't be downloaded --skip-lectures If specified, lectures won't be downloaded
--download-assets If specified, lecture assets will be downloaded --download-assets If specified, lecture assets will be downloaded
--download-captions If specified, captions will be downloaded --download-captions If specified, captions will be downloaded
--download-quizzes If specified, quizzes will be downloaded
--keep-vtt If specified, .vtt files won't be removed --keep-vtt If specified, .vtt files won't be removed
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures) --skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)
--info If specified, only course information will be printed, nothing will be downloaded --info If specified, only course information will be printed, nothing will be downloaded
@ -104,6 +117,8 @@ options:
time) time)
--log-level LOG_LEVEL --log-level LOG_LEVEL
Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO) Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)
--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}
The browser to extract cookies from
--use-h265 If specified, videos will be encoded with the H.265 codec --use-h265 If specified, videos will be encoded with the H.265 codec
--h265-crf H265_CRF Set a custom CRF value for H.265 encoding. FFMPEG default is 28 --h265-crf H265_CRF Set a custom CRF value for H.265 encoding. FFMPEG default is 28
--h265-preset H265_PRESET --h265-preset H265_PRESET

View File

@ -1 +1 @@
__version__ = "1.2.10" __version__ = "1.2.10-cookies"

133
main.py
View File

@ -12,6 +12,7 @@ from html.parser import HTMLParser as compat_HTMLParser
from pathlib import Path from pathlib import Path
from typing import IO from typing import IO
import browser_cookie3
import m3u8 import m3u8
import requests import requests
import yt_dlp import yt_dlp
@ -29,7 +30,6 @@ from utils import extract_kid
from vtt_to_srt import convert from vtt_to_srt import convert
retry = 3 retry = 3
cookies = ""
downloader = None downloader = None
logger: logging.Logger = None logger: logging.Logger = None
dl_assets = False dl_assets = False
@ -51,11 +51,12 @@ course_url = None
info = None info = None
keys = {} keys = {}
id_as_course_name = False id_as_course_name = False
is_subscription_course = False
use_h265 = False use_h265 = False
h265_crf = 28 h265_crf = 28
h265_preset = "medium" h265_preset = "medium"
use_nvenc = False use_nvenc = False
browser = None
cj = None
# from https://stackoverflow.com/a/21978778/9785713 # from https://stackoverflow.com/a/21978778/9785713
@ -68,7 +69,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]):
# this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist # this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist
def pre_run(): def pre_run():
global cookies, dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser
# make sure the directory exists # make sure the directory exists
if not os.path.exists(DOWNLOAD_DIR): if not os.path.exists(DOWNLOAD_DIR):
@ -162,13 +163,6 @@ def pre_run():
action="store_true", action="store_true",
help="If specified, the course id will be used in place of the course name for the output directory. This is a 'hack' to reduce the path length", help="If specified, the course id will be used in place of the course name for the output directory. This is a 'hack' to reduce the path length",
) )
parser.add_argument(
"-sc",
"--subscription-course",
dest="is_subscription_course",
action="store_true",
help="Mark the course as a subscription based course, use this if you are having problems with the program auto detecting it",
)
parser.add_argument( parser.add_argument(
"--save-to-file", "--save-to-file",
dest="save_to_file", dest="save_to_file",
@ -187,6 +181,12 @@ def pre_run():
type=str, type=str,
help="Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)", help="Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)",
) )
parser.add_argument(
"--browser",
dest="browser",
help="The browser to extract cookies from",
choices=["chrome", "firefox", "opera", "edge", "brave", "chromium", "vivaldi", "safari"],
)
parser.add_argument( parser.add_argument(
"--use-h265", "--use-h265",
dest="use_h265", dest="use_h265",
@ -302,8 +302,8 @@ def pre_run():
if args.id_as_course_name: if args.id_as_course_name:
id_as_course_name = args.id_as_course_name id_as_course_name = args.id_as_course_name
if args.is_subscription_course: if args.browser:
is_subscription_course = args.is_subscription_course browser = args.browser
Path(DOWNLOAD_DIR).mkdir(parents=True, exist_ok=True) Path(DOWNLOAD_DIR).mkdir(parents=True, exist_ok=True)
Path(SAVED_DIR).mkdir(parents=True, exist_ok=True) Path(SAVED_DIR).mkdir(parents=True, exist_ok=True)
@ -315,33 +315,41 @@ def pre_run():
else: else:
logger.warning("> Keyfile not found! You won't be able to decrypt videos!") logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
# Read cookies from file
if os.path.exists(COOKIE_FILE_PATH):
with open(COOKIE_FILE_PATH, encoding="utf8", mode="r") as cookiefile:
cookies = cookiefile.read()
cookies = cookies.rstrip()
else:
logger.warning(
"No cookies.txt file was found, you won't be able to download subscription courses! You can ignore ignore this if you don't plan to download a course included in a subscription plan."
)
class Udemy: class Udemy:
def __init__(self, bearer_token): def __init__(self, bearer_token):
global cj
self.session = None self.session = None
self.bearer_token = None self.bearer_token = None
self.auth = UdemyAuth(cache_session=False) self.auth = UdemyAuth(cache_session=False)
if not self.session: if not self.session:
self.session, self.bearer_token = self.auth.authenticate(bearer_token=bearer_token) self.session = self.auth.authenticate(bearer_token=bearer_token)
if self.session and self.bearer_token: if not self.session:
self.session._headers.update({"Authorization": "Bearer {}".format(self.bearer_token)}) if browser == None:
self.session._headers.update({"X-Udemy-Authorization": "Bearer {}".format(self.bearer_token)}) logger.error("No bearer token was provided, and no browser for cookie extraction was specified.")
logger.info("Login Success")
else:
logger.fatal("Login Failure! You are probably missing an access token!")
sys.exit(1) sys.exit(1)
logger.warning("No bearer token was provided, attempting to use browser cookies.")
self.session = self.auth._session
if browser == "chrome":
cj = browser_cookie3.chrome()
elif browser == "firefox":
cj = browser_cookie3.firefox()
elif browser == "opera":
cj = browser_cookie3.opera()
elif browser == "edge":
cj = browser_cookie3.edge()
elif browser == "brave":
cj = browser_cookie3.brave()
elif browser == "chromium":
cj = browser_cookie3.chromium()
elif browser == "vivaldi":
cj = browser_cookie3.vivaldi()
def _get_quiz(self, quiz_id): def _get_quiz(self, quiz_id):
print(portal_name) print(portal_name)
self.session._headers.update( self.session._headers.update(
@ -547,7 +555,8 @@ class Udemy:
continue continue
width, height = resolution width, height = resolution
if height in seen: continue if height in seen:
continue
# we need to save the individual playlists to disk also # we need to save the individual playlists to disk also
playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8") playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8")
@ -868,27 +877,8 @@ class Udemy:
def _extract_course_info(self, url): def _extract_course_info(self, url):
global portal_name global portal_name
portal_name, course_name = self.extract_course_name(url) course_id, portal_name = self._extract_subscription_course_info(url)
course = { course = self._extract_course_info_json(url, course_id, portal_name)
"portal_name": portal_name
}
if not is_subscription_course:
results = self._subscribed_courses(portal_name=portal_name, course_name=course_name)
course = self._extract_course(response=results, course_name=course_name)
if not course:
results = self._my_courses(portal_name=portal_name)
course = self._extract_course(response=results, course_name=course_name)
if not course:
results = self._subscribed_collection_courses(portal_name=portal_name)
course = self._extract_course(response=results, course_name=course_name)
if not course:
results = self._archived_courses(portal_name=portal_name)
course = self._extract_course(response=results, course_name=course_name)
if not course or is_subscription_course:
course_id = self._extract_subscription_course_info(url)
course = self._extract_course_info_json(url, course_id)
if course: if course:
return course.get("id"), course return course.get("id"), course
@ -898,11 +888,11 @@ class Udemy:
"It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.", "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.",
) )
logger.info( logger.info(
"Trying to logout now...", "Terminating Session...",
) )
self.session.terminate() self.session.terminate()
logger.info( logger.info(
"Logged out successfully.", "Session terminated.",
) )
sys.exit(1) sys.exit(1)
@ -1009,6 +999,7 @@ class Udemy:
return lecture return lecture
class Session(object): class Session(object):
def __init__(self): def __init__(self):
self._headers = HEADERS self._headers = HEADERS
@ -1023,11 +1014,10 @@ class Session(object):
def _set_auth_headers(self, bearer_token=""): def _set_auth_headers(self, bearer_token=""):
self._headers["Authorization"] = "Bearer {}".format(bearer_token) self._headers["Authorization"] = "Bearer {}".format(bearer_token)
self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token) self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token)
self._headers["Cookie"] = cookies
def _get(self, url): def _get(self, url):
for i in range(10): for i in range(10):
session = self._session.get(url, headers=self._headers) session = self._session.get(url, headers=self._headers, cookies=cj)
if session.ok or session.status_code in [502, 503]: if session.ok or session.status_code in [502, 503]:
return session return session
if not session.ok: if not session.ok:
@ -1036,7 +1026,7 @@ class Session(object):
time.sleep(0.8) time.sleep(0.8)
def _post(self, url, data, redirect=True): def _post(self, url, data, redirect=True):
session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect) session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect, cookies=cj)
if session.ok: if session.ok:
return session return session
if not session.ok: if not session.ok:
@ -1140,14 +1130,12 @@ class UdemyAuth(object):
self._cache = cache_session self._cache = cache_session
self._session = Session() self._session = Session()
def authenticate(self, bearer_token=""): def authenticate(self, bearer_token=None):
if bearer_token: if bearer_token:
self._session._set_auth_headers(bearer_token=bearer_token) self._session._set_auth_headers(bearer_token=bearer_token)
self._session._session.cookies.update({"bearer_token": bearer_token}) return self._session
return self._session, bearer_token
else: else:
self._session._set_auth_headers() return None
return None, None
def durationtoseconds(period): def durationtoseconds(period):
@ -1197,9 +1185,7 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path):
transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path
) )
else: else:
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format( command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path)
video_filepath, audio_filepath, video_title, output_path
)
else: else:
if use_h265: if use_h265:
command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format( command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
@ -1538,7 +1524,18 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
source_type = source.get("type") source_type = source.get("type")
if source_type == "hls": if source_type == "hls":
temp_filepath = lecture_path.replace(".mp4", ".%(ext)s") temp_filepath = lecture_path.replace(".mp4", ".%(ext)s")
cmd = ["yt-dlp", "--enable-file-urls", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"] cmd = [
"yt-dlp",
"--enable-file-urls",
"--force-generic-extractor",
"--concurrent-fragments",
f"{concurrent_downloads}",
"--downloader",
"aria2c",
"-o",
f"{temp_filepath}",
f"{url}",
]
if disable_ipv6: if disable_ipv6:
cmd.append("--downloader-args") cmd.append("--downloader-args")
cmd.append('aria2c:"--disable-ipv6"') cmd.append('aria2c:"--disable-ipv6"')
@ -1574,7 +1571,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
logger.error(" > Missing sources for lecture", lecture) logger.error(" > Missing sources for lecture", lecture)
def process_quiz(udemy: Udemy, lecture, chapter_dir): def process_quiz(udemy: Udemy, lecture, chapter_dir):
lecture_title = lecture.get("lecture_title") lecture_title = lecture.get("lecture_title")
lecture_index = lecture.get("lecture_index") lecture_index = lecture.get("lecture_index")
@ -1594,7 +1590,6 @@ def process_quiz(udemy: Udemy, lecture, chapter_dir):
f.write(html) f.write(html)
def parse_new(udemy: Udemy, udemy_object: dict): def parse_new(udemy: Udemy, udemy_object: dict):
total_chapters = udemy_object.get("total_chapters") total_chapters = udemy_object.get("total_chapters")
total_lectures = udemy_object.get("total_lectures") total_lectures = udemy_object.get("total_lectures")
@ -1851,9 +1846,9 @@ def main():
counter = -1 counter = -1
if resource: if resource:
logger.info("> Trying to logout") logger.info("> Terminating Session...")
udemy.session.terminate() udemy.session.terminate()
logger.info("> Logged out.") logger.info("> Session Terminated.")
if course: if course:
logger.info("> Processing course data, this may take a minute. ") logger.info("> Processing course data, this may take a minute. ")

View File

@ -15,3 +15,4 @@ lxml
six six
pathvalidate pathvalidate
coloredlogs coloredlogs
browser_cookie3