This commit is contained in:
Puyodead1 2023-12-28 14:57:22 -05:00
parent b50dbd1ee2
commit db7b0490e6
No known key found for this signature in database
GPG Key ID: A4FA4FEC0DD353FC

281
main.py
View File

@ -82,7 +82,9 @@ def pre_run():
os.makedirs(LOG_DIR_PATH, exist_ok=True) os.makedirs(LOG_DIR_PATH, exist_ok=True)
parser = argparse.ArgumentParser(description="Udemy Downloader") parser = argparse.ArgumentParser(description="Udemy Downloader")
parser.add_argument("-c", "--course-url", dest="course_url", type=str, help="The URL of the course to download", required=True) parser.add_argument(
"-c", "--course-url", dest="course_url", type=str, help="The URL of the course to download", required=True
)
parser.add_argument( parser.add_argument(
"-b", "-b",
"--bearer", "--bearer",
@ -217,13 +219,15 @@ def pre_run():
help="Whether to use the NVIDIA hardware transcoding for H.265. Only works if you have a supported NVIDIA GPU and ffmpeg with nvenc support", help="Whether to use the NVIDIA hardware transcoding for H.265. Only works if you have a supported NVIDIA GPU and ffmpeg with nvenc support",
) )
parser.add_argument( parser.add_argument(
"--out", "-o", "--out",
"-o",
dest="out", dest="out",
type=str, type=str,
help="Set the path to the output directory", help="Set the path to the output directory",
) )
parser.add_argument( parser.add_argument(
"--continue-lecture-numbers", "-n", "--continue-lecture-numbers",
"-n",
dest="use_continuous_lecture_numbers", dest="use_continuous_lecture_numbers",
action="store_true", action="store_true",
help="Use continuous lecture numbering instead of per-chapter", help="Use continuous lecture numbering instead of per-chapter",
@ -374,7 +378,9 @@ class Udemy:
self.session._headers.update( self.session._headers.update(
{ {
"Host": "{portal_name}.udemy.com".format(portal_name=portal_name), "Host": "{portal_name}.udemy.com".format(portal_name=portal_name),
"Referer": "https://{portal_name}.udemy.com/course/{course_name}/learn/quiz/{quiz_id}".format(portal_name=portal_name, course_name=course_name, quiz_id=quiz_id), "Referer": "https://{portal_name}.udemy.com/course/{course_name}/learn/quiz/{quiz_id}".format(
portal_name=portal_name, course_name=course_name, quiz_id=quiz_id
),
} }
) )
url = QUIZ_URL.format(portal_name=portal_name, quiz_id=quiz_id) url = QUIZ_URL.format(portal_name=portal_name, quiz_id=quiz_id)
@ -386,12 +392,12 @@ class Udemy:
sys.exit(1) sys.exit(1)
else: else:
return resp.get("results") return resp.get("results")
def _get_elem_value_or_none(self, elem, key): def _get_elem_value_or_none(self, elem, key):
return elem[key] if elem and key in elem else "(None)" return elem[key] if elem and key in elem else "(None)"
def _get_quiz_with_info(self, quiz_id): def _get_quiz_with_info(self, quiz_id):
resp = { "_class": None, "_type": None, "contents": None } resp = {"_class": None, "_type": None, "contents": None}
quiz_json = self._get_quiz(quiz_id) quiz_json = self._get_quiz(quiz_id)
is_only_one = len(quiz_json) == 1 and quiz_json[0]["_class"] == "assessment" is_only_one = len(quiz_json) == 1 and quiz_json[0]["_class"] == "assessment"
is_coding_assignment = quiz_json[0]["assessment_type"] == "coding-problem" is_coding_assignment = quiz_json[0]["assessment_type"] == "coding-problem"
@ -401,9 +407,9 @@ class Udemy:
if is_only_one and is_coding_assignment: if is_only_one and is_coding_assignment:
assignment = quiz_json[0] assignment = quiz_json[0]
prompt = assignment["prompt"] prompt = assignment["prompt"]
resp["_type"] = assignment["assessment_type"] resp["_type"] = assignment["assessment_type"]
resp["contents"] = { resp["contents"] = {
"instructions": self._get_elem_value_or_none(prompt, "instructions"), "instructions": self._get_elem_value_or_none(prompt, "instructions"),
"tests": self._get_elem_value_or_none(prompt, "test_files"), "tests": self._get_elem_value_or_none(prompt, "test_files"),
@ -413,10 +419,10 @@ class Udemy:
resp["hasInstructions"] = False if resp["contents"]["instructions"] == "(None)" else True resp["hasInstructions"] = False if resp["contents"]["instructions"] == "(None)" else True
resp["hasTests"] = False if isinstance(resp["contents"]["tests"], str) else True resp["hasTests"] = False if isinstance(resp["contents"]["tests"], str) else True
resp["hasSolutions"] = False if isinstance(resp["contents"]["solutions"], str) else True resp["hasSolutions"] = False if isinstance(resp["contents"]["solutions"], str) else True
else: # Normal quiz else: # Normal quiz
resp["_type"] = "normal-quiz" resp["_type"] = "normal-quiz"
resp["contents"] = quiz_json resp["contents"] = quiz_json
return resp return resp
def _extract_supplementary_assets(self, supp_assets, lecture_counter): def _extract_supplementary_assets(self, supp_assets, lecture_counter):
@ -432,14 +438,41 @@ class Udemy:
if download_urls and isinstance(download_urls, dict): if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else "" extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("File", [])[0].get("file") download_url = download_urls.get("File", [])[0].get("file")
_temp.append({"type": "file", "title": title, "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id}) _temp.append(
{
"type": "file",
"title": title,
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
elif asset_type == "sourcecode": elif asset_type == "sourcecode":
if download_urls and isinstance(download_urls, dict): if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else "" extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("SourceCode", [])[0].get("file") download_url = download_urls.get("SourceCode", [])[0].get("file")
_temp.append({"type": "source_code", "title": title, "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id}) _temp.append(
{
"type": "source_code",
"title": title,
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
elif asset_type == "externallink": elif asset_type == "externallink":
_temp.append({"type": "external_link", "title": title, "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": "txt", "download_url": external_url, "id": id}) _temp.append(
{
"type": "external_link",
"title": title,
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": "txt",
"download_url": external_url,
"id": id,
}
)
return _temp return _temp
def _extract_ppt(self, asset, lecture_counter): def _extract_ppt(self, asset, lecture_counter):
@ -450,7 +483,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict): if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else "" extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("Presentation", [])[0].get("file") download_url = download_urls.get("Presentation", [])[0].get("file")
_temp.append({"type": "presentation", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id}) _temp.append(
{
"type": "presentation",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp return _temp
def _extract_file(self, asset, lecture_counter): def _extract_file(self, asset, lecture_counter):
@ -461,7 +502,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict): if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else "" extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("File", [])[0].get("file") download_url = download_urls.get("File", [])[0].get("file")
_temp.append({"type": "file", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id}) _temp.append(
{
"type": "file",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp return _temp
def _extract_ebook(self, asset, lecture_counter): def _extract_ebook(self, asset, lecture_counter):
@ -472,7 +521,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict): if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else "" extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("E-Book", [])[0].get("file") download_url = download_urls.get("E-Book", [])[0].get("file")
_temp.append({"type": "ebook", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id}) _temp.append(
{
"type": "ebook",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp return _temp
def _extract_audio(self, asset, lecture_counter): def _extract_audio(self, asset, lecture_counter):
@ -483,7 +540,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict): if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else "" extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("Audio", [])[0].get("file") download_url = download_urls.get("Audio", [])[0].get("file")
_temp.append({"type": "audio", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id}) _temp.append(
{
"type": "audio",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp return _temp
def _extract_sources(self, sources, skip_hls): def _extract_sources(self, sources, skip_hls):
@ -555,7 +620,12 @@ class Udemy:
download_url = track.get("url") download_url = track.get("url")
if not download_url or not isinstance(download_url, str): if not download_url or not isinstance(download_url, str):
continue continue
lang = track.get("language") or track.get("srclang") or track.get("label") or track["locale_id"].split("_")[0] lang = (
track.get("language")
or track.get("srclang")
or track.get("label")
or track["locale_id"].split("_")[0]
)
ext = "vtt" if "vtt" in download_url.rsplit(".", 1)[-1] else "srt" ext = "vtt" if "vtt" in download_url.rsplit(".", 1)[-1] else "srt"
_temp.append( _temp.append(
{ {
@ -653,7 +723,9 @@ class Udemy:
r.raise_for_status() r.raise_for_status()
f.write(r.content) f.write(r.content)
ytdl = yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True, "allow_unplayable_formats": True, "enable_file_urls": True}) ytdl = yt_dlp.YoutubeDL(
{"quiet": True, "no_warnings": True, "allow_unplayable_formats": True, "enable_file_urls": True}
)
results = ytdl.extract_info(mpd_path.as_uri(), download=False, force_generic_extractor=True) results = ytdl.extract_info(mpd_path.as_uri(), download=False, force_generic_extractor=True)
seen = set() seen = set()
formats = results.get("formats") formats = results.get("formats")
@ -711,7 +783,9 @@ class Udemy:
self.session._headers.update( self.session._headers.update(
{ {
"Host": "{portal_name}.udemy.com".format(portal_name=portal_name), "Host": "{portal_name}.udemy.com".format(portal_name=portal_name),
"Referer": "https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}".format(portal_name=portal_name, course_name=course_name), "Referer": "https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}".format(
portal_name=portal_name, course_name=course_name
),
} }
) )
url = COURSE_SEARCH.format(portal_name=portal_name, course_name=course_name) url = COURSE_SEARCH.format(portal_name=portal_name, course_name=course_name)
@ -1009,7 +1083,7 @@ class Udemy:
"sources_count": sources_count, "sources_count": sources_count,
"is_encrypted": False, "is_encrypted": False,
"asset_id": asset.get("id"), "asset_id": asset.get("id"),
"type": asset.get("asset_type") "type": asset.get("asset_type"),
} }
else: else:
lecture.pop("data") # remove the raw data object after processing lecture.pop("data") # remove the raw data object after processing
@ -1023,7 +1097,7 @@ class Udemy:
"sources_count": 0, "sources_count": 0,
"is_encrypted": False, "is_encrypted": False,
"asset_id": asset.get("id"), "asset_id": asset.get("id"),
"type": asset.get("asset_type") "type": asset.get("asset_type"),
} }
else: else:
# encrypted # encrypted
@ -1047,7 +1121,7 @@ class Udemy:
"sources_count": sources_count, "sources_count": sources_count,
"is_encrypted": True, "is_encrypted": True,
"asset_id": asset.get("id"), "asset_id": asset.get("id"),
"type": asset.get("asset_type") "type": asset.get("asset_type"),
} }
else: else:
@ -1062,7 +1136,7 @@ class Udemy:
"sources_count": 0, "sources_count": 0,
"is_encrypted": False, "is_encrypted": False,
"asset_id": asset.get("id"), "asset_id": asset.get("id"),
"type": asset.get("asset_type") "type": asset.get("asset_type"),
} }
else: else:
lecture = { lecture = {
@ -1070,7 +1144,7 @@ class Udemy:
"assets": retVal, "assets": retVal,
"assets_count": len(retVal), "assets_count": len(retVal),
"asset_id": lecture_data.get("id"), "asset_id": lecture_data.get("id"),
"type": lecture_data.get("type") "type": lecture_data.get("type"),
} }
return lecture return lecture
@ -1142,7 +1216,11 @@ def durationtoseconds(period):
second = period.split("S")[0].split("M")[-1] second = period.split("S")[0].split("M")[-1]
# logger.debug("Total time: " + str(day) + " days " + str(hour) + " hours " + # logger.debug("Total time: " + str(day) + " days " + str(hour) + " hours " +
# str(minute) + " minutes and " + str(second) + " seconds") # str(minute) + " minutes and " + str(second) + " seconds")
total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split(".")[0]))) + "." + str(int(second.split(".")[-1]))) total_time = float(
str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split(".")[0])))
+ "."
+ str(int(second.split(".")[-1]))
)
return total_time return total_time
else: else:
@ -1162,7 +1240,9 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path):
transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path
) )
else: else:
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path) command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
video_filepath, audio_filepath, video_title, output_path
)
else: else:
if use_h265: if use_h265:
command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format( command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
@ -1206,7 +1286,7 @@ def decrypt(kid, in_filepath, out_filepath):
def handle_segments(url, format_id, lecture_id, video_title, output_path, chapter_dir): def handle_segments(url, format_id, lecture_id, video_title, output_path, chapter_dir):
os.chdir(os.path.join(chapter_dir)) os.chdir(os.path.join(chapter_dir))
video_filepath_enc = lecture_id + ".encrypted.mp4" video_filepath_enc = lecture_id + ".encrypted.mp4"
audio_filepath_enc = lecture_id + ".encrypted.m4a" audio_filepath_enc = lecture_id + ".encrypted.m4a"
video_filepath_dec = lecture_id + ".decrypted.mp4" video_filepath_dec = lecture_id + ".decrypted.mp4"
@ -1302,7 +1382,9 @@ def check_for_aria():
except FileNotFoundError: except FileNotFoundError:
return False return False
except Exception: except Exception:
logger.exception("> Unexpected exception while checking for Aria2c, please tell the program author about this! ") logger.exception(
"> Unexpected exception while checking for Aria2c, please tell the program author about this! "
)
return True return True
@ -1313,7 +1395,9 @@ def check_for_ffmpeg():
except FileNotFoundError: except FileNotFoundError:
return False return False
except Exception: except Exception:
logger.exception("> Unexpected exception while checking for FFMPEG, please tell the program author about this! ") logger.exception(
"> Unexpected exception while checking for FFMPEG, please tell the program author about this! "
)
return True return True
@ -1324,7 +1408,9 @@ def check_for_shaka():
except FileNotFoundError: except FileNotFoundError:
return False return False
except Exception: except Exception:
logger.exception("> Unexpected exception while checking for shaka-packager, please tell the program author about this! ") logger.exception(
"> Unexpected exception while checking for shaka-packager, please tell the program author about this! "
)
return True return True
@ -1343,7 +1429,7 @@ def download(url, path, filename):
pbar = tqdm(total=file_size, initial=first_byte, unit="B", unit_scale=True, desc=filename) pbar = tqdm(total=file_size, initial=first_byte, unit="B", unit_scale=True, desc=filename)
res = requests.get(url, headers=header, stream=True) res = requests.get(url, headers=header, stream=True)
res.raise_for_status() res.raise_for_status()
with (open(path, encoding="utf8", mode="ab")) as f: with open(path, encoding="utf8", mode="ab") as f:
for chunk in res.iter_content(chunk_size=1024): for chunk in res.iter_content(chunk_size=1024):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
@ -1356,7 +1442,22 @@ def download_aria(url, file_dir, filename):
""" """
@author Puyodead1 @author Puyodead1
""" """
args = ["aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16", "-c", "--auto-file-renaming=false", "--summary-interval=0", "--disable-ipv6", "--follow-torrent=false"] args = [
"aria2c",
url,
"-o",
filename,
"-d",
file_dir,
"-j16",
"-s20",
"-x16",
"-c",
"--auto-file-renaming=false",
"--summary-interval=0",
"--disable-ipv6",
"--follow-torrent=false",
]
process = subprocess.Popen(args) process = subprocess.Popen(args)
log_subprocess_output("ARIA2-STDOUT", process.stdout) log_subprocess_output("ARIA2-STDOUT", process.stdout)
log_subprocess_output("ARIA2-STDERR", process.stderr) log_subprocess_output("ARIA2-STDERR", process.stderr)
@ -1408,7 +1509,14 @@ def process_lecture(lecture, lecture_path, chapter_dir):
if isinstance(quality, int): if isinstance(quality, int):
source = min(lecture_sources, key=lambda x: abs(int(x.get("height")) - quality)) source = min(lecture_sources, key=lambda x: abs(int(x.get("height")) - quality))
logger.info(f" > Lecture '{lecture_title}' has DRM, attempting to download") logger.info(f" > Lecture '{lecture_title}' has DRM, attempting to download")
handle_segments(source.get("download_url"), source.get("format_id"), str(lecture_id), lecture_title, lecture_path, chapter_dir) handle_segments(
source.get("download_url"),
source.get("format_id"),
str(lecture_id),
lecture_title,
lecture_path,
chapter_dir,
)
else: else:
logger.info(f" > Lecture '{lecture_title}' is missing media links") logger.info(f" > Lecture '{lecture_title}' is missing media links")
logger.debug(f"Lecture source count: {len(lecture_sources)}") logger.debug(f"Lecture source count: {len(lecture_sources)}")
@ -1451,7 +1559,20 @@ def process_lecture(lecture, lecture_path, chapter_dir):
if use_h265: if use_h265:
codec = "hevc_nvenc" if use_nvenc else "libx265" codec = "hevc_nvenc" if use_nvenc else "libx265"
transcode = "-hwaccel cuda -hwaccel_output_format cuda".split(" ") if use_nvenc else [] transcode = "-hwaccel cuda -hwaccel_output_format cuda".split(" ") if use_nvenc else []
cmd = ["ffmpeg", *transcode, "-y", "-i", lecture_path, "-c:v", codec, "-c:a", "copy", "-f", "mp4", tmp_file_path] cmd = [
"ffmpeg",
*transcode,
"-y",
"-i",
lecture_path,
"-c:v",
codec,
"-c:a",
"copy",
"-f",
"mp4",
tmp_file_path,
]
process = subprocess.Popen(cmd) process = subprocess.Popen(cmd)
log_subprocess_output("FFMPEG-STDOUT", process.stdout) log_subprocess_output("FFMPEG-STDOUT", process.stdout)
log_subprocess_output("FFMPEG-STDERR", process.stderr) log_subprocess_output("FFMPEG-STDERR", process.stderr)
@ -1477,7 +1598,7 @@ def process_quiz(udemy: Udemy, lecture, chapter_dir):
quiz = udemy._get_quiz_with_info(lecture.get("id")) quiz = udemy._get_quiz_with_info(lecture.get("id"))
if quiz["_type"] == "coding-problem": if quiz["_type"] == "coding-problem":
process_coding_assignment(quiz, lecture, chapter_dir) process_coding_assignment(quiz, lecture, chapter_dir)
else: # Normal quiz else: # Normal quiz
process_normal_quiz(quiz, lecture, chapter_dir) process_normal_quiz(quiz, lecture, chapter_dir)
@ -1507,7 +1628,7 @@ def process_coding_assignment(quiz, lecture, chapter_dir):
lecture_path = os.path.join(chapter_dir, lecture_file_name) lecture_path = os.path.join(chapter_dir, lecture_file_name)
logger.info(f" > Processing quiz {lecture_index} (coding assignment)") logger.info(f" > Processing quiz {lecture_index} (coding assignment)")
with open("coding_assignment_template.html", "r") as f: with open("coding_assignment_template.html", "r") as f:
html = f.read() html = f.read()
quiz_data = { quiz_data = {
@ -1628,7 +1749,14 @@ def parse_new(udemy: Udemy, udemy_object: dict):
"If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: " "If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: "
) )
logger.warning("AssetType: Video; AssetData: ", asset) logger.warning("AssetType: Video; AssetData: ", asset)
elif asset_type == "audio" or asset_type == "e-book" or asset_type == "file" or asset_type == "presentation" or asset_type == "ebook" or asset_type == "source_code": elif (
asset_type == "audio"
or asset_type == "e-book"
or asset_type == "file"
or asset_type == "presentation"
or asset_type == "ebook"
or asset_type == "source_code"
):
try: try:
ret_code = download_aria(download_url, chapter_dir, filename) ret_code = download_aria(download_url, chapter_dir, filename)
logger.debug(f" > Download return code: {ret_code}") logger.debug(f" > Download return code: {ret_code}")
@ -1648,7 +1776,9 @@ def parse_new(udemy: Udemy, udemy_object: dict):
filename = os.path.join(savedirs, filename) filename = os.path.join(savedirs, filename)
file_data = [] file_data = []
if os.path.isfile(filename): if os.path.isfile(filename):
file_data = [i.strip().lower() for i in open(filename, encoding="utf-8", errors="ignore") if i] file_data = [
i.strip().lower() for i in open(filename, encoding="utf-8", errors="ignore") if i
]
content = "\n{}\n{}\n".format(name, download_url) content = "\n{}\n{}\n".format(name, download_url)
if name.lower() not in file_data: if name.lower() not in file_data:
@ -1697,9 +1827,13 @@ def _print_course_info(udemy: Udemy, udemy_object: dict):
lecture_video_sources = sorted(lecture_video_sources, key=lambda x: int(x.get("height")), reverse=True) lecture_video_sources = sorted(lecture_video_sources, key=lambda x: int(x.get("height")), reverse=True)
if lecture_is_encrypted and lecture_video_sources != None: if lecture_is_encrypted and lecture_video_sources != None:
lecture_qualities = ["{}@{}x{}".format(x.get("type"), x.get("width"), x.get("height")) for x in lecture_video_sources] lecture_qualities = [
"{}@{}x{}".format(x.get("type"), x.get("width"), x.get("height")) for x in lecture_video_sources
]
elif lecture_is_encrypted == False and lecture_sources != None: elif lecture_is_encrypted == False and lecture_sources != None:
lecture_qualities = ["{}@{}x{}".format(x.get("type"), x.get("height"), x.get("width")) for x in lecture_sources] lecture_qualities = [
"{}@{}x{}".format(x.get("type"), x.get("height"), x.get("width")) for x in lecture_sources
]
if lecture_extension: if lecture_extension:
continue continue
@ -1759,7 +1893,9 @@ def main():
logger.info("> Fetching course content, this may take a minute...") logger.info("> Fetching course content, this may take a minute...")
if load_from_file: if load_from_file:
course_json = json.loads(open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="r").read()) course_json = json.loads(
open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="r").read()
)
title = course_json.get("title") title = course_json.get("title")
course_title = course_json.get("published_title") course_title = course_json.get("published_title")
portal_name = course_json.get("portal_name") portal_name = course_json.get("portal_name")
@ -1777,7 +1913,9 @@ def main():
resource = course_json.get("detail") resource = course_json.get("detail")
if load_from_file: if load_from_file:
udemy_object = json.loads(open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="r").read()) udemy_object = json.loads(
open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="r").read()
)
if info: if info:
_print_course_info(udemy, udemy_object) _print_course_info(udemy, udemy_object)
else: else:
@ -1800,7 +1938,7 @@ def main():
logger.info("> Processing course data, this may take a minute. ") logger.info("> Processing course data, this may take a minute. ")
lecture_counter = 0 lecture_counter = 0
lectures = [] lectures = []
for entry in course: for entry in course:
clazz = entry.get("_class") clazz = entry.get("_class")
@ -1814,7 +1952,14 @@ def main():
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]: if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": []}) udemy_object["chapters"].append(
{
"chapter_title": chapter_title,
"chapter_id": entry.get("id"),
"chapter_index": chapter_index,
"lectures": [],
}
)
chapter_index_counter += 1 chapter_index_counter += 1
elif clazz == "lecture": elif clazz == "lecture":
lecture_counter += 1 lecture_counter += 1
@ -1824,7 +1969,14 @@ def main():
chapter_index = entry.get("object_index") chapter_index = entry.get("object_index")
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]: if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": []}) udemy_object["chapters"].append(
{
"chapter_title": chapter_title,
"chapter_id": lecture_id,
"chapter_index": chapter_index,
"lectures": [],
}
)
chapter_index_counter += 1 chapter_index_counter += 1
if lecture_id: if lecture_id:
logger.info(f"Processing {course.index(entry) + 1} of {len(course)}") logger.info(f"Processing {course.index(entry) + 1} of {len(course)}")
@ -1832,7 +1984,16 @@ def main():
lecture_index = entry.get("object_index") lecture_index = entry.get("object_index")
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title")) lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry}) lectures.append(
{
"index": lecture_counter,
"lecture_index": lecture_index,
"lecture_title": lecture_title,
"_class": entry.get("_class"),
"id": lecture_id,
"data": entry,
}
)
else: else:
logger.debug("Lecture: ID is None, skipping") logger.debug("Lecture: ID is None, skipping")
elif clazz == "quiz": elif clazz == "quiz":
@ -1843,7 +2004,14 @@ def main():
chapter_index = entry.get("object_index") chapter_index = entry.get("object_index")
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]: if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": []}) udemy_object["chapters"].append(
{
"chapter_title": chapter_title,
"chapter_id": lecture_id,
"chapter_index": chapter_index,
"lectures": [],
}
)
chapter_index_counter += 1 chapter_index_counter += 1
if lecture_id: if lecture_id:
@ -1852,15 +2020,26 @@ def main():
lecture_index = entry.get("object_index") lecture_index = entry.get("object_index")
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title")) lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry}) lectures.append(
{
"index": lecture_counter,
"lecture_index": lecture_index,
"lecture_title": lecture_title,
"_class": entry.get("_class"),
"id": lecture_id,
"data": entry,
}
)
else: else:
logger.debug("Quiz: ID is None, skipping") logger.debug("Quiz: ID is None, skipping")
udemy_object["chapters"][chapter_index_counter]["lectures"] = lectures udemy_object["chapters"][chapter_index_counter]["lectures"] = lectures
udemy_object["chapters"][chapter_index_counter]["lecture_count"] = len(lectures) udemy_object["chapters"][chapter_index_counter]["lecture_count"] = len(lectures)
udemy_object["total_chapters"] = len(udemy_object["chapters"]) udemy_object["total_chapters"] = len(udemy_object["chapters"])
udemy_object["total_lectures"] = sum([entry.get("lecture_count", 0) for entry in udemy_object["chapters"] if entry]) udemy_object["total_lectures"] = sum(
[entry.get("lecture_count", 0) for entry in udemy_object["chapters"] if entry]
)
if save_to_file: if save_to_file:
with open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="w") as f: with open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="w") as f: