mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-04-30 05:34:25 +02:00

- Removed a few unused imports + Reworked the way paths are formed so they shouldn't be a problem on other operating systems NOTE: the new dependencies in requirements.txt are NOT required at this time
700 lines
27 KiB
Python
700 lines
27 KiB
Python
import os, requests, json, glob, argparse, sys, re
|
|
from sanitize_filename import sanitize
|
|
from tqdm import tqdm
|
|
from dotenv import load_dotenv
|
|
from mpegdash.parser import MPEGDASHParser
|
|
from utils import extract_kid
|
|
from vtt_to_srt import convert
|
|
|
|
download_dir = os.path.join(os.getcwd(), "out_dir")
|
|
working_dir = os.path.join(os.getcwd(), "working_dir")
|
|
retry = 3
|
|
home_dir = os.getcwd()
|
|
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
|
valid_qualities = [144, 360, 480, 720, 1080]
|
|
|
|
if not os.path.exists(working_dir):
|
|
os.makedirs(working_dir)
|
|
|
|
if not os.path.exists(download_dir):
|
|
os.makedirs(download_dir)
|
|
|
|
#Get the keys
|
|
with open(keyfile_path, 'r') as keyfile:
|
|
keyfile = keyfile.read()
|
|
keyfile = json.loads(keyfile)
|
|
|
|
|
|
def extract_course_name(url):
|
|
"""
|
|
@author r0oth3x49
|
|
"""
|
|
obj = re.search(
|
|
r"(?i)(?://(?P<portal_name>.+?).udemy.com/(?:course(/draft)*/)?(?P<name_or_id>[a-zA-Z0-9_-]+))",
|
|
url,
|
|
)
|
|
if obj:
|
|
return obj.group("portal_name"), obj.group("name_or_id")
|
|
|
|
|
|
def durationtoseconds(period):
|
|
"""
|
|
@author Jayapraveen
|
|
"""
|
|
|
|
#Duration format in PTxDxHxMxS
|
|
if (period[:2] == "PT"):
|
|
period = period[2:]
|
|
day = int(period.split("D")[0] if 'D' in period else 0)
|
|
hour = int(period.split("H")[0].split("D")[-1] if 'H' in period else 0)
|
|
minute = int(
|
|
period.split("M")[0].split("H")[-1] if 'M' in period else 0)
|
|
second = period.split("S")[0].split("M")[-1]
|
|
print("Total time: " + str(day) + " days " + str(hour) + " hours " +
|
|
str(minute) + " minutes and " + str(second) + " seconds")
|
|
total_time = float(
|
|
str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) +
|
|
(int(second.split('.')[0]))) + '.' +
|
|
str(int(second.split('.')[-1])))
|
|
return total_time
|
|
|
|
else:
|
|
print("Duration Format Error")
|
|
return None
|
|
|
|
|
|
def download_media(filename, url, lecture_working_dir, epoch=0):
|
|
if (os.path.isfile(filename)):
|
|
print("Segment already downloaded.. skipping..")
|
|
else:
|
|
media = requests.get(url, stream=True)
|
|
media_length = int(media.headers.get("content-length"))
|
|
if media.status_code == 200:
|
|
if (os.path.isfile(filename)
|
|
and os.path.getsize(filename) >= media_length):
|
|
print("Segment already downloaded.. skipping write to disk..")
|
|
else:
|
|
try:
|
|
pbar = tqdm(total=media_length,
|
|
initial=0,
|
|
unit='B',
|
|
unit_scale=True,
|
|
desc=filename)
|
|
with open(os.path.join(lecture_working_dir, filename),
|
|
'wb') as video_file:
|
|
for chunk in media.iter_content(chunk_size=1024):
|
|
if chunk:
|
|
video_file.write(chunk)
|
|
pbar.update(1024)
|
|
pbar.close()
|
|
print("Segment downloaded: " + filename)
|
|
return False #Successfully downloaded the file
|
|
except:
|
|
print(
|
|
"Connection error: Reattempting download of segment..")
|
|
download_media(filename, url, lecture_working_dir,
|
|
epoch + 1)
|
|
|
|
if os.path.getsize(filename) >= media_length:
|
|
pass
|
|
else:
|
|
print("Segment is faulty.. Redownloading...")
|
|
download_media(filename, url, lecture_working_dir, epoch + 1)
|
|
elif (media.status_code == 404):
|
|
print("Probably end hit!\n", url)
|
|
return True #Probably hit the last of the file
|
|
else:
|
|
if (epoch > retry):
|
|
exit("Error fetching segment, exceeded retry times.")
|
|
print("Error fetching segment file.. Redownloading...")
|
|
download_media(filename, url, lecture_working_dir, epoch + 1)
|
|
|
|
|
|
def cleanup(path):
|
|
"""
|
|
@author Jayapraveen
|
|
"""
|
|
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
|
|
mpd_files = glob.glob(path + '/*.mpd', recursive=True)
|
|
leftover_files = leftover_files + mpd_files
|
|
for file_list in leftover_files:
|
|
try:
|
|
os.remove(file_list)
|
|
except OSError:
|
|
print(f"Error deleting file: {file_list}")
|
|
|
|
|
|
def mux_process(video_title, lecture_working_dir, outfile):
|
|
"""
|
|
@author Jayapraveen
|
|
"""
|
|
if os.name == "nt":
|
|
command = "ffmpeg -y -i \"{}\" -i \"{}\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
|
|
os.path.join(lecture_working_dir, "decrypted_audio.mp4"),
|
|
os.path.join(lecture_working_dir, "decrypted_video.mp4"),
|
|
video_title, outfile)
|
|
else:
|
|
command = "nice -n 7 ffmpeg -y -i \"{}\" -i \"{}\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
|
|
os.path.join(lecture_working_dir, "decrypted_audio.mp4"),
|
|
os.path.join(lecture_working_dir, "decrypted_video.mp4"),
|
|
video_title, outfile)
|
|
os.system(command)
|
|
|
|
|
|
def decrypt(kid, filename, lecture_working_dir):
|
|
"""
|
|
@author Jayapraveen
|
|
"""
|
|
try:
|
|
key = keyfile[kid.lower()]
|
|
except KeyError as error:
|
|
exit("Key not found")
|
|
if (os.name == "nt"):
|
|
os.system("mp4decrypt --key 1:{} \"{}\" \"{}\"".format(
|
|
key,
|
|
os.path.join(lecture_working_dir,
|
|
"encrypted_{}.mp4".format(filename)),
|
|
os.path.join(lecture_working_dir,
|
|
"decrypted{}.mp4".format(filename))))
|
|
else:
|
|
os.system("nice -n 7 mp4decrypt --key 1:{} \"{}\" \"{}\"".format(
|
|
key,
|
|
os.path.join(lecture_working_dir,
|
|
"encrypted_{}.mp4".format(filename)),
|
|
os.path.join(lecture_working_dir,
|
|
"decrypted{}.mp4".format(filename))))
|
|
|
|
|
|
def handle_irregular_segments(media_info, video_title, lecture_working_dir,
|
|
output_path):
|
|
"""
|
|
@author Jayapraveen
|
|
"""
|
|
no_segment, video_url, video_init, video_extension, no_segment, audio_url, audio_init, audio_extension = media_info
|
|
download_media("video_0.seg.mp4", video_init, lecture_working_dir)
|
|
video_kid = extract_kid(
|
|
os.path.join(lecture_working_dir, "video_0.seg.mp4"))
|
|
print("KID for video file is: " + video_kid)
|
|
download_media("audio_0.seg.mp4", audio_init, lecture_working_dir)
|
|
audio_kid = extract_kid(
|
|
os.path.join(lecture_working_dir, "audio_0.seg.mp4"))
|
|
print("KID for audio file is: " + audio_kid)
|
|
for count in range(1, no_segment):
|
|
video_segment_url = video_url.replace("$Number$", str(count))
|
|
audio_segment_url = audio_url.replace("$Number$", str(count))
|
|
video_status = download_media(
|
|
f"video_{str(count)}.seg.{video_extension}", video_segment_url,
|
|
lecture_working_dir)
|
|
audio_status = download_media(
|
|
f"audio_{str(count)}.seg.{audio_extension}", audio_segment_url,
|
|
lecture_working_dir)
|
|
os.chdir(lecture_working_dir)
|
|
if (video_status):
|
|
if os.name == "nt":
|
|
video_concat_command = "copy /b " + "+".join([
|
|
f"video_{i}.seg.{video_extension}"
|
|
for i in range(0, count)
|
|
]) + " encrypted_video.mp4"
|
|
audio_concat_command = "copy /b " + "+".join([
|
|
f"audio_{i}.seg.{audio_extension}"
|
|
for i in range(0, count)
|
|
]) + " encrypted_audio.mp4"
|
|
else:
|
|
video_concat_command = "cat " + " ".join([
|
|
f"video_{i}.seg.{video_extension}"
|
|
for i in range(0, count)
|
|
]) + " > encrypted_video.mp4"
|
|
audio_concat_command = "cat " + " ".join([
|
|
f"audio_{i}.seg.{audio_extension}"
|
|
for i in range(0, count)
|
|
]) + " > encrypted_audio.mp4"
|
|
os.system(video_concat_command)
|
|
os.system(audio_concat_command)
|
|
decrypt(video_kid, "video", lecture_working_dir)
|
|
decrypt(audio_kid, "audio", lecture_working_dir)
|
|
os.chdir(home_dir)
|
|
mux_process(video_title, lecture_working_dir, output_path)
|
|
break
|
|
|
|
|
|
def manifest_parser(mpd_url):
|
|
"""
|
|
@author Jayapraveen
|
|
"""
|
|
video = []
|
|
audio = []
|
|
manifest = requests.get(mpd_url).text
|
|
with open("manifest.mpd", 'w') as manifest_handler:
|
|
manifest_handler.write(manifest)
|
|
mpd = MPEGDASHParser.parse("./manifest.mpd")
|
|
running_time = durationtoseconds(mpd.media_presentation_duration)
|
|
for period in mpd.periods:
|
|
for adapt_set in period.adaptation_sets:
|
|
print("Processing " + adapt_set.mime_type)
|
|
content_type = adapt_set.mime_type
|
|
if quality and content_type == "video/mp4":
|
|
print(adapt_set.representations[0].height, quality)
|
|
repr = next((x for x in adapt_set.representations
|
|
if x.height == quality), None)
|
|
if not repr:
|
|
qualities = []
|
|
for rep in adapt_set.representations:
|
|
qualities.append(rep.height)
|
|
print(quality, qualities)
|
|
if quality < qualities[0]:
|
|
# they want a lower quality than whats available
|
|
repr = adapt_set.representations[0] # Lowest Quality
|
|
elif quality > qualities[-1]:
|
|
# they want a higher quality than whats available
|
|
repr = adapt_set.representations[-1] # Max Quality
|
|
print(
|
|
"> Could not find video with requested quality, falling back to closest!"
|
|
)
|
|
print("> Using quality of %s" % repr.height)
|
|
else:
|
|
print("> Found MPD representation with quality %s" %
|
|
repr.height)
|
|
else:
|
|
repr = adapt_set.representations[-1] # Max Quality
|
|
print("> Using max quality of %s" % repr.height)
|
|
for segment in repr.segment_templates:
|
|
if (segment.duration):
|
|
print("Media segments are of equal timeframe")
|
|
segment_time = segment.duration / segment.timescale
|
|
total_segments = running_time / segment_time
|
|
else:
|
|
print("Media segments are of inequal timeframe")
|
|
|
|
approx_no_segments = round(
|
|
running_time /
|
|
6) + 10 # aproximate of 6 sec per segment
|
|
print("Expected No of segments:", approx_no_segments)
|
|
if (content_type == "audio/mp4"):
|
|
segment_extension = segment.media.split(".")[-1]
|
|
audio.append(approx_no_segments)
|
|
audio.append(segment.media)
|
|
audio.append(segment.initialization)
|
|
audio.append(segment_extension)
|
|
elif (content_type == "video/mp4"):
|
|
segment_extension = segment.media.split(".")[-1]
|
|
video.append(approx_no_segments)
|
|
video.append(segment.media)
|
|
video.append(segment.initialization)
|
|
video.append(segment_extension)
|
|
return video + audio
|
|
|
|
|
|
def download(url, path, filename):
|
|
"""
|
|
@author Puyodead1
|
|
"""
|
|
file_size = int(requests.head(url).headers["Content-Length"])
|
|
if os.path.exists(path):
|
|
first_byte = os.path.getsize(path)
|
|
else:
|
|
first_byte = 0
|
|
if first_byte >= file_size:
|
|
return file_size
|
|
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
|
|
pbar = tqdm(total=file_size,
|
|
initial=first_byte,
|
|
unit='B',
|
|
unit_scale=True,
|
|
desc=filename)
|
|
res = requests.get(url, headers=header, stream=True)
|
|
res.raise_for_status()
|
|
with (open(path, 'ab')) as f:
|
|
for chunk in res.iter_content(chunk_size=1024):
|
|
if chunk:
|
|
f.write(chunk)
|
|
pbar.update(1024)
|
|
pbar.close()
|
|
return file_size
|
|
|
|
|
|
def process_caption(caption,
|
|
lecture_index,
|
|
lecture_title,
|
|
lecture_dir,
|
|
tries=0):
|
|
filename = f"%s. %s_%s.%s" % (lecture_index, sanitize(lecture_title),
|
|
caption.get("locale_id"), caption.get("ext"))
|
|
filename_no_ext = f"%s. %s_%s" % (lecture_index, sanitize(lecture_title),
|
|
caption.get("locale_id"))
|
|
filepath = os.path.join(lecture_dir, filename)
|
|
|
|
if os.path.isfile(filepath):
|
|
print("> Captions '%s' already downloaded." % filename)
|
|
else:
|
|
print(f"> Downloading captions: '%s'" % filename)
|
|
try:
|
|
download(caption.get("url"), filepath, filename)
|
|
except Exception as e:
|
|
if tries >= 3:
|
|
print(
|
|
f"> Error downloading captions: {e}. Exceeded retries, skipping."
|
|
)
|
|
return
|
|
else:
|
|
print(
|
|
f"> Error downloading captions: {e}. Will retry {3-tries} more times."
|
|
)
|
|
process_caption(caption, lecture_index, lecture_title,
|
|
lecture_dir, tries + 1)
|
|
if caption.get("ext") == "vtt":
|
|
try:
|
|
print("> Converting captions to SRT format...")
|
|
convert(lecture_dir, filename_no_ext)
|
|
print("> Caption conversion complete.")
|
|
os.remove(filepath)
|
|
except Exception as e:
|
|
print(f"> Error converting captions: {e}")
|
|
|
|
|
|
def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
|
skip_lectures, dl_assets, dl_captions, caption_locale):
|
|
lecture_title = lecture["title"]
|
|
lecture_asset = lecture["asset"]
|
|
if not skip_lectures:
|
|
if lecture_asset["media_license_token"] == None:
|
|
# not encrypted
|
|
media_sources = lecture_asset["media_sources"]
|
|
if quality: # if quality is specified, try to find the requested quality
|
|
lecture_url = next(
|
|
(x["src"]
|
|
for x in media_sources if x["label"] == str(quality)),
|
|
media_sources[0]["src"]
|
|
) # find the quality requested or return the best available
|
|
else:
|
|
lecture_url = media_sources[0][
|
|
"src"] # best quality is the first index
|
|
|
|
if not os.path.isfile(lecture_path):
|
|
try:
|
|
download(lecture_url, lecture_path, lecture_title)
|
|
except Exception as e:
|
|
# We could add a retry here
|
|
print(f"> Error downloading lecture: {e}. Skipping...")
|
|
else:
|
|
print(f"> Lecture '%s' is already downloaded, skipping..." %
|
|
lecture_title)
|
|
else:
|
|
# encrypted
|
|
print(f"> Lecture '%s' has DRM, attempting to download" %
|
|
lecture_title)
|
|
lecture_working_dir = os.path.join(
|
|
working_dir, str(lecture_asset["id"])
|
|
) # set the folder to download ephemeral files
|
|
media_sources = lecture_asset["media_sources"]
|
|
if not os.path.exists(lecture_working_dir):
|
|
os.mkdir(lecture_working_dir)
|
|
if not os.path.isfile(lecture_path):
|
|
mpd_url = next((x["src"] for x in media_sources
|
|
if x["type"] == "application/dash+xml"), None)
|
|
if not mpd_url:
|
|
print(
|
|
"> Couldn't find dash url for lecture '%s', skipping...",
|
|
lecture_title)
|
|
return
|
|
media_info = manifest_parser(mpd_url)
|
|
handle_irregular_segments(media_info, lecture_title,
|
|
lecture_working_dir, lecture_path)
|
|
cleanup(lecture_working_dir)
|
|
else:
|
|
print("> Lecture '%s' is already downloaded, skipping..." %
|
|
lecture_title)
|
|
|
|
# process assets
|
|
if dl_assets:
|
|
assets = []
|
|
text_assets = ""
|
|
all_assets = lecture["supplementary_assets"]
|
|
for asset in all_assets:
|
|
if asset["asset_type"] == "File":
|
|
assets.append(asset)
|
|
asset_filename = asset["filename"]
|
|
download_url = next((x["file"]
|
|
for x in asset["download_urls"]["File"]
|
|
if x["label"] == "download"), None)
|
|
if download_url:
|
|
try:
|
|
download(download_url,
|
|
os.path.join(lecture_dir, asset_filename),
|
|
asset_filename)
|
|
except Exception as e:
|
|
print(
|
|
f"> Error downloading lecture asset: {e}. Skipping"
|
|
)
|
|
continue
|
|
elif asset["asset_type"] == "Article":
|
|
assets.append(asset)
|
|
asset_path = os.path.join(lecture_dir, sanitize(lecture_title))
|
|
with open(asset_path, 'w') as f:
|
|
f.write(asset["body"])
|
|
elif asset["asset_type"] == "ExternalLink":
|
|
assets.append(asset)
|
|
asset_path = os.path.join(
|
|
lecture_dir, "{}. External URLs.txt".format(lecture_index))
|
|
# with open(asset_path, 'a') as f:
|
|
# f.write(f"%s : %s\n" %
|
|
# (asset["title"], asset["external_url"]))
|
|
text_assets += "{}: {}\n".format(asset["title"],
|
|
asset["external_url"])
|
|
|
|
if not text_assets == "":
|
|
with open(asset_path, 'w') as f:
|
|
f.write(text_assets)
|
|
|
|
print("> Found %s assets for lecture '%s'" %
|
|
(len(assets), lecture_title))
|
|
|
|
# process captions
|
|
if dl_captions:
|
|
captions = []
|
|
for caption in lecture_asset.get("captions"):
|
|
if not isinstance(caption, dict):
|
|
continue
|
|
if caption.get("_class") != "caption":
|
|
continue
|
|
download_url = caption.get("url")
|
|
if not download_url or not isinstance(download_url, str):
|
|
continue
|
|
lang = (caption.get("language") or caption.get("srclang")
|
|
or caption.get("label")
|
|
or caption.get("locale_id").split("_")[0])
|
|
ext = "vtt" if "vtt" in download_url.rsplit(".", 1)[-1] else "srt"
|
|
if caption_locale == "all" or caption_locale == lang:
|
|
captions.append({
|
|
"language": lang,
|
|
"locale_id": caption.get("locale_id"),
|
|
"ext": ext,
|
|
"url": download_url
|
|
})
|
|
|
|
for caption in captions:
|
|
process_caption(caption, lecture_index, lecture_title, lecture_dir)
|
|
|
|
|
|
def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
|
quality, caption_locale):
|
|
course_dir = os.path.join(download_dir, course_name)
|
|
if not os.path.exists(course_dir):
|
|
os.mkdir(course_dir)
|
|
chapters = []
|
|
lectures = []
|
|
|
|
for obj in data:
|
|
if obj["_class"] == "chapter":
|
|
obj["lectures"] = []
|
|
chapters.append(obj)
|
|
elif obj["_class"] == "lecture" and obj["asset"][
|
|
"asset_type"] == "Video":
|
|
try:
|
|
chapters[-1]["lectures"].append(obj)
|
|
except IndexError:
|
|
# This is caused by there not being a starting chapter
|
|
lectures.append(obj)
|
|
lecture_index = lectures.index(obj) + 1
|
|
lecture_path = os.path.join(
|
|
course_dir, "{}. {}.mp4".format(lecture_index,
|
|
sanitize(obj["title"])))
|
|
process_lecture(obj, lecture_index, lecture_path, download_dir,
|
|
quality, skip_lectures, dl_assets, dl_captions,
|
|
caption_locale)
|
|
|
|
for chapter in chapters:
|
|
chapter_dir = os.path.join(
|
|
course_dir, "{}. {}".format(
|
|
chapters.index(chapter) + 1, sanitize(chapter["title"])))
|
|
if not os.path.exists(chapter_dir):
|
|
os.mkdir(chapter_dir)
|
|
|
|
for lecture in chapter["lectures"]:
|
|
lecture_index = chapter["lectures"].index(lecture) + 1
|
|
lecture_path = os.path.join(
|
|
chapter_dir, "{}. {}.mp4".format(lecture_index,
|
|
sanitize(lecture["title"])))
|
|
process_lecture(lecture, lecture_index, lecture_path, chapter_dir,
|
|
quality, skip_lectures, dl_assets, dl_captions,
|
|
caption_locale)
|
|
print("\n\n\n\n\n\n\n\n=====================")
|
|
print("All downloads completed for course!")
|
|
print("=====================")
|
|
|
|
|
|
def fetch_subscribed_courses_json(bearer_token, portal_name):
|
|
res = requests.get(
|
|
"https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&ordering=-last_accessed,-access_time&page=1&page_size=10000"
|
|
.format(portal_name=portal_name),
|
|
headers={
|
|
"Authorization":
|
|
bearer_token,
|
|
"x-udemy-authorization":
|
|
bearer_token,
|
|
"Host":
|
|
"{portal_name}.udemy.com".format(portal_name=portal_name),
|
|
"Referer":
|
|
"https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}"
|
|
.format(portal_name=portal_name, course_name=course_name)
|
|
})
|
|
res.raise_for_status()
|
|
data = res.json()
|
|
return data
|
|
|
|
|
|
def fetch_course_json(course_id, bearer_token, portal_name, course_name):
|
|
res = requests.get(
|
|
"https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
|
|
.format(portal_name=portal_name, course_id=course_id),
|
|
headers={
|
|
"Authorization": bearer_token,
|
|
"x-udemy-authorization": bearer_token,
|
|
"Host": "{portal_name}.udemy.com".format(portal_name=portal_name),
|
|
"Referer": "https://{portal_name}.udemy.com/"
|
|
})
|
|
res.raise_for_status()
|
|
return res.json()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description='Udemy Downloader')
|
|
parser.add_argument("-c",
|
|
"--course-url",
|
|
dest="course_url",
|
|
type=str,
|
|
help="The URL of the course to download",
|
|
required=True)
|
|
parser.add_argument(
|
|
"-b",
|
|
"--bearer",
|
|
dest="bearer_token",
|
|
type=str,
|
|
help="The Bearer token to use",
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--debug",
|
|
dest="debug",
|
|
action="store_true",
|
|
help="Use test_data.json rather than fetch from the udemy api.",
|
|
)
|
|
parser.add_argument(
|
|
"-q",
|
|
"--quality",
|
|
dest="quality",
|
|
type=int,
|
|
help="Download specific video quality. (144, 360, 480, 720, 1080)",
|
|
)
|
|
parser.add_argument(
|
|
"-l",
|
|
"--lang",
|
|
dest="lang",
|
|
type=str,
|
|
help="The language to download for captions (Default is en)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-lectures",
|
|
dest="skip_lectures",
|
|
action="store_true",
|
|
help="If specified, lectures won't be downloaded.",
|
|
)
|
|
parser.add_argument(
|
|
"--download-assets",
|
|
dest="download_assets",
|
|
action="store_true",
|
|
help="If specified, lecture assets will be downloaded.",
|
|
)
|
|
parser.add_argument(
|
|
"--download-captions",
|
|
dest="download_captions",
|
|
action="store_true",
|
|
help="If specified, captions will be downloaded.",
|
|
)
|
|
|
|
dl_assets = False
|
|
skip_lectures = False
|
|
dl_captions = False
|
|
caption_locale = "en"
|
|
quality = None
|
|
bearer_token = None
|
|
portal_name = None
|
|
course_name = None
|
|
|
|
args = parser.parse_args()
|
|
if args.download_assets:
|
|
dl_assets = True
|
|
if args.lang:
|
|
caption_locale = args.lang
|
|
if args.download_captions:
|
|
dl_captions = True
|
|
if args.skip_lectures:
|
|
skip_lectures = True
|
|
if args.quality:
|
|
if not args.quality in valid_qualities:
|
|
print("Invalid quality specified! %s" % quality)
|
|
sys.exit(1)
|
|
else:
|
|
quality = args.quality
|
|
|
|
load_dotenv()
|
|
if args.bearer_token:
|
|
bearer_token = f"Bearer %s" % args.bearer_token
|
|
else:
|
|
bearer_token = f"Bearer %s" % os.getenv("UDEMY_BEARER")
|
|
|
|
if args.course_url:
|
|
portal_name, course_name = extract_course_name(args.course_url)
|
|
|
|
if not course_name:
|
|
print("> Unable to extract course name from URL!")
|
|
sys.exit(1)
|
|
if not portal_name:
|
|
print("> Unable to extract portal name from URL!")
|
|
sys.exit(1)
|
|
if not bearer_token:
|
|
print("> Missing Bearer Token!")
|
|
sys.exit(1)
|
|
|
|
print(f"> Fetching subscribed course data...")
|
|
try:
|
|
subscribed_courses = fetch_subscribed_courses_json(
|
|
bearer_token, portal_name)
|
|
except Exception as e:
|
|
print("> Failed to fetch subscribed course information: %s" % e)
|
|
|
|
course = next((x for x in subscribed_courses["results"]
|
|
if x["published_title"] == course_name), None)
|
|
if not course:
|
|
print("> Failed to find course in course list!")
|
|
sys.exit(1)
|
|
|
|
course_id = course["id"]
|
|
course_title = course["title"]
|
|
|
|
print(
|
|
f"> Fetching information for course '%s', this might take a minute..."
|
|
% course_name)
|
|
try:
|
|
course_data = fetch_course_json(course_id, bearer_token, portal_name,
|
|
course_name)
|
|
except Exception as e:
|
|
print("> Failed to fetch course information: %s" % e)
|
|
sys.exit(1)
|
|
|
|
if not course_data:
|
|
print("> Failed to fetch course data!")
|
|
|
|
print("> Course information retrieved!")
|
|
|
|
if args.debug:
|
|
# this is for development purposes so we dont need to make tons of requests when testing
|
|
# course data json is just stored and read from a file
|
|
with open("test_data.json", encoding="utf8") as f:
|
|
course_data = json.loads(f.read())
|
|
parse(course_data["results"], course_id, course_name,
|
|
skip_lectures, dl_assets, dl_captions, quality,
|
|
caption_locale)
|
|
else:
|
|
parse(course_data["results"], course_id, course_name, skip_lectures,
|
|
dl_assets, dl_captions, quality, caption_locale)
|