This commit is contained in:
Puyodead1 2023-12-28 14:57:22 -05:00
parent b50dbd1ee2
commit db7b0490e6
No known key found for this signature in database
GPG Key ID: A4FA4FEC0DD353FC

259
main.py
View File

@ -82,7 +82,9 @@ def pre_run():
os.makedirs(LOG_DIR_PATH, exist_ok=True)
parser = argparse.ArgumentParser(description="Udemy Downloader")
parser.add_argument("-c", "--course-url", dest="course_url", type=str, help="The URL of the course to download", required=True)
parser.add_argument(
"-c", "--course-url", dest="course_url", type=str, help="The URL of the course to download", required=True
)
parser.add_argument(
"-b",
"--bearer",
@ -217,13 +219,15 @@ def pre_run():
help="Whether to use the NVIDIA hardware transcoding for H.265. Only works if you have a supported NVIDIA GPU and ffmpeg with nvenc support",
)
parser.add_argument(
"--out", "-o",
"--out",
"-o",
dest="out",
type=str,
help="Set the path to the output directory",
)
parser.add_argument(
"--continue-lecture-numbers", "-n",
"--continue-lecture-numbers",
"-n",
dest="use_continuous_lecture_numbers",
action="store_true",
help="Use continuous lecture numbering instead of per-chapter",
@ -374,7 +378,9 @@ class Udemy:
self.session._headers.update(
{
"Host": "{portal_name}.udemy.com".format(portal_name=portal_name),
"Referer": "https://{portal_name}.udemy.com/course/{course_name}/learn/quiz/{quiz_id}".format(portal_name=portal_name, course_name=course_name, quiz_id=quiz_id),
"Referer": "https://{portal_name}.udemy.com/course/{course_name}/learn/quiz/{quiz_id}".format(
portal_name=portal_name, course_name=course_name, quiz_id=quiz_id
),
}
)
url = QUIZ_URL.format(portal_name=portal_name, quiz_id=quiz_id)
@ -432,14 +438,41 @@ class Udemy:
if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("File", [])[0].get("file")
_temp.append({"type": "file", "title": title, "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id})
_temp.append(
{
"type": "file",
"title": title,
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
elif asset_type == "sourcecode":
if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("SourceCode", [])[0].get("file")
_temp.append({"type": "source_code", "title": title, "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id})
_temp.append(
{
"type": "source_code",
"title": title,
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
elif asset_type == "externallink":
_temp.append({"type": "external_link", "title": title, "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": "txt", "download_url": external_url, "id": id})
_temp.append(
{
"type": "external_link",
"title": title,
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": "txt",
"download_url": external_url,
"id": id,
}
)
return _temp
def _extract_ppt(self, asset, lecture_counter):
@ -450,7 +483,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("Presentation", [])[0].get("file")
_temp.append({"type": "presentation", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id})
_temp.append(
{
"type": "presentation",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp
def _extract_file(self, asset, lecture_counter):
@ -461,7 +502,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("File", [])[0].get("file")
_temp.append({"type": "file", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id})
_temp.append(
{
"type": "file",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp
def _extract_ebook(self, asset, lecture_counter):
@ -472,7 +521,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("E-Book", [])[0].get("file")
_temp.append({"type": "ebook", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id})
_temp.append(
{
"type": "ebook",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp
def _extract_audio(self, asset, lecture_counter):
@ -483,7 +540,15 @@ class Udemy:
if download_urls and isinstance(download_urls, dict):
extension = filename.rsplit(".", 1)[-1] if "." in filename else ""
download_url = download_urls.get("Audio", [])[0].get("file")
_temp.append({"type": "audio", "filename": "{0:03d} ".format(lecture_counter) + filename, "extension": extension, "download_url": download_url, "id": id})
_temp.append(
{
"type": "audio",
"filename": "{0:03d} ".format(lecture_counter) + filename,
"extension": extension,
"download_url": download_url,
"id": id,
}
)
return _temp
def _extract_sources(self, sources, skip_hls):
@ -555,7 +620,12 @@ class Udemy:
download_url = track.get("url")
if not download_url or not isinstance(download_url, str):
continue
lang = track.get("language") or track.get("srclang") or track.get("label") or track["locale_id"].split("_")[0]
lang = (
track.get("language")
or track.get("srclang")
or track.get("label")
or track["locale_id"].split("_")[0]
)
ext = "vtt" if "vtt" in download_url.rsplit(".", 1)[-1] else "srt"
_temp.append(
{
@ -653,7 +723,9 @@ class Udemy:
r.raise_for_status()
f.write(r.content)
ytdl = yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True, "allow_unplayable_formats": True, "enable_file_urls": True})
ytdl = yt_dlp.YoutubeDL(
{"quiet": True, "no_warnings": True, "allow_unplayable_formats": True, "enable_file_urls": True}
)
results = ytdl.extract_info(mpd_path.as_uri(), download=False, force_generic_extractor=True)
seen = set()
formats = results.get("formats")
@ -711,7 +783,9 @@ class Udemy:
self.session._headers.update(
{
"Host": "{portal_name}.udemy.com".format(portal_name=portal_name),
"Referer": "https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}".format(portal_name=portal_name, course_name=course_name),
"Referer": "https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}".format(
portal_name=portal_name, course_name=course_name
),
}
)
url = COURSE_SEARCH.format(portal_name=portal_name, course_name=course_name)
@ -1009,7 +1083,7 @@ class Udemy:
"sources_count": sources_count,
"is_encrypted": False,
"asset_id": asset.get("id"),
"type": asset.get("asset_type")
"type": asset.get("asset_type"),
}
else:
lecture.pop("data") # remove the raw data object after processing
@ -1023,7 +1097,7 @@ class Udemy:
"sources_count": 0,
"is_encrypted": False,
"asset_id": asset.get("id"),
"type": asset.get("asset_type")
"type": asset.get("asset_type"),
}
else:
# encrypted
@ -1047,7 +1121,7 @@ class Udemy:
"sources_count": sources_count,
"is_encrypted": True,
"asset_id": asset.get("id"),
"type": asset.get("asset_type")
"type": asset.get("asset_type"),
}
else:
@ -1062,7 +1136,7 @@ class Udemy:
"sources_count": 0,
"is_encrypted": False,
"asset_id": asset.get("id"),
"type": asset.get("asset_type")
"type": asset.get("asset_type"),
}
else:
lecture = {
@ -1070,7 +1144,7 @@ class Udemy:
"assets": retVal,
"assets_count": len(retVal),
"asset_id": lecture_data.get("id"),
"type": lecture_data.get("type")
"type": lecture_data.get("type"),
}
return lecture
@ -1142,7 +1216,11 @@ def durationtoseconds(period):
second = period.split("S")[0].split("M")[-1]
# logger.debug("Total time: " + str(day) + " days " + str(hour) + " hours " +
# str(minute) + " minutes and " + str(second) + " seconds")
total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split(".")[0]))) + "." + str(int(second.split(".")[-1])))
total_time = float(
str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split(".")[0])))
+ "."
+ str(int(second.split(".")[-1]))
)
return total_time
else:
@ -1162,7 +1240,9 @@ def mux_process(video_title, video_filepath, audio_filepath, output_path):
transcode, video_filepath, audio_filepath, codec, h265_crf, h265_preset, video_title, output_path
)
else:
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(video_filepath, audio_filepath, video_title, output_path)
command = 'ffmpeg -y -i "{}" -i "{}" -c:v copy -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
video_filepath, audio_filepath, video_title, output_path
)
else:
if use_h265:
command = 'nice -n 7 ffmpeg {} -y -i "{}" -i "{}" -c:v libx265 -vtag hvc1 -crf {} -preset {} -c:a copy -fflags +bitexact -map_metadata -1 -metadata title="{}" "{}"'.format(
@ -1302,7 +1382,9 @@ def check_for_aria():
except FileNotFoundError:
return False
except Exception:
logger.exception("> Unexpected exception while checking for Aria2c, please tell the program author about this! ")
logger.exception(
"> Unexpected exception while checking for Aria2c, please tell the program author about this! "
)
return True
@ -1313,7 +1395,9 @@ def check_for_ffmpeg():
except FileNotFoundError:
return False
except Exception:
logger.exception("> Unexpected exception while checking for FFMPEG, please tell the program author about this! ")
logger.exception(
"> Unexpected exception while checking for FFMPEG, please tell the program author about this! "
)
return True
@ -1324,7 +1408,9 @@ def check_for_shaka():
except FileNotFoundError:
return False
except Exception:
logger.exception("> Unexpected exception while checking for shaka-packager, please tell the program author about this! ")
logger.exception(
"> Unexpected exception while checking for shaka-packager, please tell the program author about this! "
)
return True
@ -1343,7 +1429,7 @@ def download(url, path, filename):
pbar = tqdm(total=file_size, initial=first_byte, unit="B", unit_scale=True, desc=filename)
res = requests.get(url, headers=header, stream=True)
res.raise_for_status()
with (open(path, encoding="utf8", mode="ab")) as f:
with open(path, encoding="utf8", mode="ab") as f:
for chunk in res.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
@ -1356,7 +1442,22 @@ def download_aria(url, file_dir, filename):
"""
@author Puyodead1
"""
args = ["aria2c", url, "-o", filename, "-d", file_dir, "-j16", "-s20", "-x16", "-c", "--auto-file-renaming=false", "--summary-interval=0", "--disable-ipv6", "--follow-torrent=false"]
args = [
"aria2c",
url,
"-o",
filename,
"-d",
file_dir,
"-j16",
"-s20",
"-x16",
"-c",
"--auto-file-renaming=false",
"--summary-interval=0",
"--disable-ipv6",
"--follow-torrent=false",
]
process = subprocess.Popen(args)
log_subprocess_output("ARIA2-STDOUT", process.stdout)
log_subprocess_output("ARIA2-STDERR", process.stderr)
@ -1408,7 +1509,14 @@ def process_lecture(lecture, lecture_path, chapter_dir):
if isinstance(quality, int):
source = min(lecture_sources, key=lambda x: abs(int(x.get("height")) - quality))
logger.info(f" > Lecture '{lecture_title}' has DRM, attempting to download")
handle_segments(source.get("download_url"), source.get("format_id"), str(lecture_id), lecture_title, lecture_path, chapter_dir)
handle_segments(
source.get("download_url"),
source.get("format_id"),
str(lecture_id),
lecture_title,
lecture_path,
chapter_dir,
)
else:
logger.info(f" > Lecture '{lecture_title}' is missing media links")
logger.debug(f"Lecture source count: {len(lecture_sources)}")
@ -1451,7 +1559,20 @@ def process_lecture(lecture, lecture_path, chapter_dir):
if use_h265:
codec = "hevc_nvenc" if use_nvenc else "libx265"
transcode = "-hwaccel cuda -hwaccel_output_format cuda".split(" ") if use_nvenc else []
cmd = ["ffmpeg", *transcode, "-y", "-i", lecture_path, "-c:v", codec, "-c:a", "copy", "-f", "mp4", tmp_file_path]
cmd = [
"ffmpeg",
*transcode,
"-y",
"-i",
lecture_path,
"-c:v",
codec,
"-c:a",
"copy",
"-f",
"mp4",
tmp_file_path,
]
process = subprocess.Popen(cmd)
log_subprocess_output("FFMPEG-STDOUT", process.stdout)
log_subprocess_output("FFMPEG-STDERR", process.stderr)
@ -1628,7 +1749,14 @@ def parse_new(udemy: Udemy, udemy_object: dict):
"If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: "
)
logger.warning("AssetType: Video; AssetData: ", asset)
elif asset_type == "audio" or asset_type == "e-book" or asset_type == "file" or asset_type == "presentation" or asset_type == "ebook" or asset_type == "source_code":
elif (
asset_type == "audio"
or asset_type == "e-book"
or asset_type == "file"
or asset_type == "presentation"
or asset_type == "ebook"
or asset_type == "source_code"
):
try:
ret_code = download_aria(download_url, chapter_dir, filename)
logger.debug(f" > Download return code: {ret_code}")
@ -1648,7 +1776,9 @@ def parse_new(udemy: Udemy, udemy_object: dict):
filename = os.path.join(savedirs, filename)
file_data = []
if os.path.isfile(filename):
file_data = [i.strip().lower() for i in open(filename, encoding="utf-8", errors="ignore") if i]
file_data = [
i.strip().lower() for i in open(filename, encoding="utf-8", errors="ignore") if i
]
content = "\n{}\n{}\n".format(name, download_url)
if name.lower() not in file_data:
@ -1697,9 +1827,13 @@ def _print_course_info(udemy: Udemy, udemy_object: dict):
lecture_video_sources = sorted(lecture_video_sources, key=lambda x: int(x.get("height")), reverse=True)
if lecture_is_encrypted and lecture_video_sources != None:
lecture_qualities = ["{}@{}x{}".format(x.get("type"), x.get("width"), x.get("height")) for x in lecture_video_sources]
lecture_qualities = [
"{}@{}x{}".format(x.get("type"), x.get("width"), x.get("height")) for x in lecture_video_sources
]
elif lecture_is_encrypted == False and lecture_sources != None:
lecture_qualities = ["{}@{}x{}".format(x.get("type"), x.get("height"), x.get("width")) for x in lecture_sources]
lecture_qualities = [
"{}@{}x{}".format(x.get("type"), x.get("height"), x.get("width")) for x in lecture_sources
]
if lecture_extension:
continue
@ -1759,7 +1893,9 @@ def main():
logger.info("> Fetching course content, this may take a minute...")
if load_from_file:
course_json = json.loads(open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="r").read())
course_json = json.loads(
open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="r").read()
)
title = course_json.get("title")
course_title = course_json.get("published_title")
portal_name = course_json.get("portal_name")
@ -1777,7 +1913,9 @@ def main():
resource = course_json.get("detail")
if load_from_file:
udemy_object = json.loads(open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="r").read())
udemy_object = json.loads(
open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="r").read()
)
if info:
_print_course_info(udemy, udemy_object)
else:
@ -1814,7 +1952,14 @@ def main():
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": []})
udemy_object["chapters"].append(
{
"chapter_title": chapter_title,
"chapter_id": entry.get("id"),
"chapter_index": chapter_index,
"lectures": [],
}
)
chapter_index_counter += 1
elif clazz == "lecture":
lecture_counter += 1
@ -1824,7 +1969,14 @@ def main():
chapter_index = entry.get("object_index")
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": []})
udemy_object["chapters"].append(
{
"chapter_title": chapter_title,
"chapter_id": lecture_id,
"chapter_index": chapter_index,
"lectures": [],
}
)
chapter_index_counter += 1
if lecture_id:
logger.info(f"Processing {course.index(entry) + 1} of {len(course)}")
@ -1832,7 +1984,16 @@ def main():
lecture_index = entry.get("object_index")
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
lectures.append(
{
"index": lecture_counter,
"lecture_index": lecture_index,
"lecture_title": lecture_title,
"_class": entry.get("_class"),
"id": lecture_id,
"data": entry,
}
)
else:
logger.debug("Lecture: ID is None, skipping")
elif clazz == "quiz":
@ -1843,7 +2004,14 @@ def main():
chapter_index = entry.get("object_index")
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": []})
udemy_object["chapters"].append(
{
"chapter_title": chapter_title,
"chapter_id": lecture_id,
"chapter_index": chapter_index,
"lectures": [],
}
)
chapter_index_counter += 1
if lecture_id:
@ -1852,7 +2020,16 @@ def main():
lecture_index = entry.get("object_index")
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
lectures.append(
{
"index": lecture_counter,
"lecture_index": lecture_index,
"lecture_title": lecture_title,
"_class": entry.get("_class"),
"id": lecture_id,
"data": entry,
}
)
else:
logger.debug("Quiz: ID is None, skipping")
@ -1860,7 +2037,9 @@ def main():
udemy_object["chapters"][chapter_index_counter]["lecture_count"] = len(lectures)
udemy_object["total_chapters"] = len(udemy_object["chapters"])
udemy_object["total_lectures"] = sum([entry.get("lecture_count", 0) for entry in udemy_object["chapters"] if entry])
udemy_object["total_lectures"] = sum(
[entry.get("lecture_count", 0) for entry in udemy_object["chapters"] if entry]
)
if save_to_file:
with open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="w") as f: