Merge pull request #177 from Puyodead1/feat/cookies

Merge feat/cookies
This commit is contained in:
Puyodead1 2023-08-12 23:56:15 -04:00 committed by GitHub
commit 84eb17b793
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 179 additions and 133 deletions

View File

@ -61,19 +61,31 @@ It is up to you to acquire the key and key ID. Please **DO NOT** ask me for help
- ![keyfile example](https://i.imgur.com/e5aU0ng.png)
- ![example key and kid from console](https://i.imgur.com/awgndZA.png)
## Start Downloading
## Cookies
To download a course included in a subscription plan that you did not purchase individually, you will need to use cookies. You can also use cookies as an alternative to Bearer Tokens.
The program can automatically extract them from your browser. You can specify what browser to extract cookies from with the `--browser` argument. Supported browsers are:
- chrome
- firefox
- opera
- edge
- brave
- chromium
- vivaldi
- safari
## Ready to go
You can now run the program, see the examples below. The course will download to `out_dir`.
# Udemy Subscription Plans
You will need to use a different branch of the program, please see [feat/cookies](https://github.com/Puyodead1/udemy-downloader/tree/feat/cookies).
# Advanced Usage
```
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--keep-vtt] [--skip-hls]
[--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--disable-ipv6] [--skip-lectures] [--download-assets] [--download-captions] [--download-quizzes]
[--keep-vtt] [--skip-hls] [--info] [--id-as-course-name] [-sc] [--save-to-file] [--load-from-file] [--log-level LOG_LEVEL] [--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}]
[--use-h265] [--h265-crf H265_CRF] [--h265-preset H265_PRESET] [--use-nvenc] [-v]
Udemy Downloader
@ -92,6 +104,7 @@ options:
--skip-lectures If specified, lectures won't be downloaded
--download-assets If specified, lecture assets will be downloaded
--download-captions If specified, captions will be downloaded
--download-quizzes If specified, quizzes will be downloaded
--keep-vtt If specified, .vtt files won't be removed
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)
--info If specified, only course information will be printed, nothing will be downloaded
@ -104,6 +117,8 @@ options:
time)
--log-level LOG_LEVEL
Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)
--browser {chrome,firefox,opera,edge,brave,chromium,vivaldi,safari}
The browser to extract cookies from
--use-h265 If specified, videos will be encoded with the H.265 codec
--h265-crf H265_CRF Set a custom CRF value for H.265 encoding. FFMPEG default is 28
--h265-preset H265_PRESET

View File

@ -1 +1 @@
__version__ = "1.2.10"
__version__ = "1.2.10-cookies"

126
main.py
View File

@ -12,6 +12,7 @@ from html.parser import HTMLParser as compat_HTMLParser
from pathlib import Path
from typing import IO
import browser_cookie3
import m3u8
import requests
import yt_dlp
@ -29,7 +30,6 @@ from utils import extract_kid
from vtt_to_srt import convert
retry = 3
cookies = ""
downloader = None
logger: logging.Logger = None
dl_assets = False
@ -56,6 +56,8 @@ use_h265 = False
h265_crf = 28
h265_preset = "medium"
use_nvenc = False
browser = None
cj = None
# from https://stackoverflow.com/a/21978778/9785713
@ -68,7 +70,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]):
# this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist
def pre_run():
global cookies, dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc
global dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc, browser
# make sure the directory exists
if not os.path.exists(DOWNLOAD_DIR):
@ -187,6 +189,12 @@ def pre_run():
type=str,
help="Logging level: one of DEBUG, INFO, ERROR, WARNING, CRITICAL (Default is INFO)",
)
parser.add_argument(
"--browser",
dest="browser",
help="The browser to extract cookies from",
choices=["chrome", "firefox", "opera", "edge", "brave", "chromium", "vivaldi", "safari"],
)
parser.add_argument(
"--use-h265",
dest="use_h265",
@ -304,6 +312,8 @@ def pre_run():
id_as_course_name = args.id_as_course_name
if args.is_subscription_course:
is_subscription_course = args.is_subscription_course
if args.browser:
browser = args.browser
Path(DOWNLOAD_DIR).mkdir(parents=True, exist_ok=True)
Path(SAVED_DIR).mkdir(parents=True, exist_ok=True)
@ -315,33 +325,41 @@ def pre_run():
else:
logger.warning("> Keyfile not found! You won't be able to decrypt videos!")
# Read cookies from file
if os.path.exists(COOKIE_FILE_PATH):
with open(COOKIE_FILE_PATH, encoding="utf8", mode="r") as cookiefile:
cookies = cookiefile.read()
cookies = cookies.rstrip()
else:
logger.warning(
"No cookies.txt file was found, you won't be able to download subscription courses! You can ignore ignore this if you don't plan to download a course included in a subscription plan."
)
class Udemy:
def __init__(self, bearer_token):
global cj
self.session = None
self.bearer_token = None
self.auth = UdemyAuth(cache_session=False)
if not self.session:
self.session, self.bearer_token = self.auth.authenticate(bearer_token=bearer_token)
self.session = self.auth.authenticate(bearer_token=bearer_token)
if self.session and self.bearer_token:
self.session._headers.update({"Authorization": "Bearer {}".format(self.bearer_token)})
self.session._headers.update({"X-Udemy-Authorization": "Bearer {}".format(self.bearer_token)})
logger.info("Login Success")
else:
logger.fatal("Login Failure! You are probably missing an access token!")
if not self.session:
if browser == None:
logger.error("No bearer token was provided, and no browser for cookie extraction was specified.")
sys.exit(1)
logger.warning("No bearer token was provided, attempting to use browser cookies.")
self.session = self.auth._session
if browser == "chrome":
cj = browser_cookie3.chrome()
elif browser == "firefox":
cj = browser_cookie3.firefox()
elif browser == "opera":
cj = browser_cookie3.opera()
elif browser == "edge":
cj = browser_cookie3.edge()
elif browser == "brave":
cj = browser_cookie3.brave()
elif browser == "chromium":
cj = browser_cookie3.chromium()
elif browser == "vivaldi":
cj = browser_cookie3.vivaldi()
def _get_quiz(self, quiz_id):
print(portal_name)
self.session._headers.update(
@ -547,7 +565,8 @@ class Udemy:
continue
width, height = resolution
if height in seen: continue
if height in seen:
continue
# we need to save the individual playlists to disk also
playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8")
@ -869,9 +888,7 @@ class Udemy:
def _extract_course_info(self, url):
global portal_name
portal_name, course_name = self.extract_course_name(url)
course = {
"portal_name": portal_name
}
course = {"portal_name": portal_name}
if not is_subscription_course:
results = self._subscribed_courses(portal_name=portal_name, course_name=course_name)
@ -898,11 +915,11 @@ class Udemy:
"It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.",
)
logger.info(
"Trying to logout now...",
"Terminating Session...",
)
self.session.terminate()
logger.info(
"Logged out successfully.",
"Session terminated.",
)
sys.exit(1)
@ -1009,6 +1026,7 @@ class Udemy:
return lecture
class Session(object):
def __init__(self):
self._headers = HEADERS
@ -1023,11 +1041,10 @@ class Session(object):
def _set_auth_headers(self, bearer_token=""):
self._headers["Authorization"] = "Bearer {}".format(bearer_token)
self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token)
self._headers["Cookie"] = cookies
def _get(self, url):
for i in range(10):
session = self._session.get(url, headers=self._headers)
session = self._session.get(url, headers=self._headers, cookies=cj)
if session.ok or session.status_code in [502, 503]:
return session
if not session.ok:
@ -1036,7 +1053,7 @@ class Session(object):
time.sleep(0.8)
def _post(self, url, data, redirect=True):
session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect)
session = self._session.post(url, data, headers=self._headers, allow_redirects=redirect, cookies=cj)
if session.ok:
return session
if not session.ok:
@ -1140,14 +1157,12 @@ class UdemyAuth(object):
self._cache = cache_session
self._session = Session()
def authenticate(self, bearer_token=""):
def authenticate(self, bearer_token=None):
if bearer_token:
self._session._set_auth_headers(bearer_token=bearer_token)
self._session._session.cookies.update({"bearer_token": bearer_token})
return self._session, bearer_token
return self._session
else:
self._session._set_auth_headers()
return None, None
return None
def durationtoseconds(period):
@ -1197,9 +1212,7 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path):
transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path
)
else:
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
video_filepath, audio_filepath, video_title, output_path
)
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path)
else:
if use_h265:
command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
@ -1538,7 +1551,18 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
source_type = source.get("type")
if source_type == "hls":
temp_filepath = lecture_path.replace(".mp4", ".%(ext)s")
cmd = ["yt-dlp", "--enable-file-urls", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"]
cmd = [
"yt-dlp",
"--enable-file-urls",
"--force-generic-extractor",
"--concurrent-fragments",
f"{concurrent_downloads}",
"--downloader",
"aria2c",
"-o",
f"{temp_filepath}",
f"{url}",
]
if disable_ipv6:
cmd.append("--downloader-args")
cmd.append('aria2c:"--disable-ipv6"')
@ -1574,7 +1598,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
logger.error(" > Missing sources for lecture", lecture)
def process_quiz(udemy: Udemy, lecture, chapter_dir):
lecture_title = lecture.get("lecture_title")
lecture_index = lecture.get("lecture_index")
@ -1594,7 +1617,6 @@ def process_quiz(udemy: Udemy, lecture, chapter_dir):
f.write(html)
def parse_new(udemy: Udemy, udemy_object: dict):
total_chapters = udemy_object.get("total_chapters")
total_lectures = udemy_object.get("total_lectures")
@ -1848,28 +1870,37 @@ def main():
udemy_object["title"] = title
udemy_object["course_title"] = course_title
udemy_object["chapters"] = []
counter = -1
chapter_index_counter = -1
if resource:
logger.info("> Trying to logout")
logger.info("> Terminating Session...")
udemy.session.terminate()
logger.info("> Logged out.")
logger.info("> Session Terminated.")
if course:
logger.info("> Processing course data, this may take a minute. ")
lecture_counter = 0
lectures = []
for entry in course:
clazz = entry.get("_class")
if clazz == "chapter":
# add all lectures for the previous chapter
if len(lectures) > 0:
udemy_object["chapters"][chapter_index_counter]["lectures"] = lectures
udemy_object["chapters"][chapter_index_counter]["lecture_count"] = len(lectures)
# reset lecture tracking
lecture_counter = 0
lectures = []
chapter_index = entry.get("object_index")
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": []})
counter += 1
chapter_index_counter += 1
elif clazz == "lecture":
lecture_counter += 1
lecture_id = entry.get("id")
@ -1889,8 +1920,8 @@ def main():
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
udemy_object["chapters"][counter]["lectures"] = lectures
udemy_object["chapters"][counter]["lecture_count"] = len(lectures)
else:
logger.debug("Lecture: ID is None, skipping")
elif clazz == "quiz":
lecture_counter += 1
lecture_id = entry.get("id")
@ -1910,9 +1941,8 @@ def main():
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
udemy_object["chapters"][counter]["lectures"] = lectures
udemy_object["chapters"][counter]["lectures_count"] = len(lectures)
else:
logger.debug("Quiz: ID is None, skipping")
udemy_object["total_chapters"] = len(udemy_object["chapters"])
udemy_object["total_lectures"] = sum([entry.get("lecture_count", 0) for entry in udemy_object["chapters"] if entry])

View File

@ -15,3 +15,4 @@ lxml
six
pathvalidate
coloredlogs
browser_cookie3