mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-01 00:14:25 +02:00
Updates
+ Update sanitization regex to clean exclaimation points + Update _sanitize method to use _clean method + Moved lecture file exist check to parse method, this will now also check if html files exist + Update chapter folder titles to use - for separating number from name
This commit is contained in:
parent
6137e44d76
commit
ca40ff2b6d
36
main.py
36
main.py
@ -42,15 +42,15 @@ COLLECTION_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-co
|
|||||||
|
|
||||||
|
|
||||||
def _clean(text):
|
def _clean(text):
|
||||||
ok = re.compile(r'[^\\/:*?"<>|]')
|
ok = re.compile(r'[^\\/:*?!"<>|]')
|
||||||
text = "".join(x if ok.match(x) else "_" for x in text)
|
text = "".join(x if ok.match(x) else "_" for x in text)
|
||||||
text = re.sub(r"\.+$", "", text.strip())
|
text = re.sub(r"\.+$", "", text.strip())
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def _sanitize(self, unsafetext):
|
def _sanitize(self, unsafetext):
|
||||||
text = sanitize(
|
text = _clean(sanitize(
|
||||||
slugify(unsafetext, lower=False, spaces=True, ok=SLUG_OK + "().[]"))
|
slugify(unsafetext, lower=False, spaces=True, ok=SLUG_OK + "().[]")))
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
@ -1028,7 +1028,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_to
|
|||||||
|
|
||||||
if is_encrypted:
|
if is_encrypted:
|
||||||
if len(lecture_sources) > 0:
|
if len(lecture_sources) > 0:
|
||||||
if not os.path.isfile(lecture_path):
|
|
||||||
source = lecture_sources[-1] # last index is the best quality
|
source = lecture_sources[-1] # last index is the best quality
|
||||||
if isinstance(quality, int):
|
if isinstance(quality, int):
|
||||||
source = min(
|
source = min(
|
||||||
@ -1040,10 +1039,6 @@ def process_lecture(lecture, lecture_path, lecture_file_name, quality, access_to
|
|||||||
source.get(
|
source.get(
|
||||||
"format_id"), lecture_title, lecture_path, lecture_file_name,
|
"format_id"), lecture_title, lecture_path, lecture_file_name,
|
||||||
concurrent_connections, chapter_dir)
|
concurrent_connections, chapter_dir)
|
||||||
else:
|
|
||||||
print(
|
|
||||||
" > Lecture '%s' is already downloaded, skipping..." %
|
|
||||||
lecture_title)
|
|
||||||
else:
|
else:
|
||||||
print(f" > Lecture '%s' is missing media links" %
|
print(f" > Lecture '%s' is missing media links" %
|
||||||
lecture_title)
|
lecture_title)
|
||||||
@ -1117,11 +1112,28 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
|||||||
for lecture in chapter.get("lectures"):
|
for lecture in chapter.get("lectures"):
|
||||||
lecture_title = lecture.get("lecture_title")
|
lecture_title = lecture.get("lecture_title")
|
||||||
lecture_index = lecture.get("lecture_index")
|
lecture_index = lecture.get("lecture_index")
|
||||||
|
lecture_extension = lecture.get("extension")
|
||||||
|
extension = "mp4" # video lectures dont have an extension property, so we assume its mp4
|
||||||
|
if lecture_extension != None:
|
||||||
|
# if the lecture extension property isnt none, set the extension to the lecture extension
|
||||||
|
extension = lecture_extension
|
||||||
|
lecture_file_name = sanitize(lecture_title + "." + extension)
|
||||||
|
lecture_path = os.path.join(
|
||||||
|
chapter_dir,
|
||||||
|
lecture_file_name)
|
||||||
|
|
||||||
extension = lecture.get("extension")
|
|
||||||
print(
|
print(
|
||||||
f" > Processing lecture {lecture_index} of {total_lectures}")
|
f" > Processing lecture {lecture_index} of {total_lectures}")
|
||||||
if not skip_lectures:
|
if not skip_lectures:
|
||||||
|
print(lecture_file_name)
|
||||||
|
# Check if the lecture is already downloaded
|
||||||
|
if os.path.isfile(lecture_path):
|
||||||
|
print(
|
||||||
|
" > Lecture '%s' is already downloaded, skipping..." %
|
||||||
|
lecture_title)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Check if the file is an html file
|
||||||
if extension == "html":
|
if extension == "html":
|
||||||
html_content = lecture.get("html_content").encode(
|
html_content = lecture.get("html_content").encode(
|
||||||
"ascii", "ignore").decode("utf8")
|
"ascii", "ignore").decode("utf8")
|
||||||
@ -1135,10 +1147,6 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
|
|||||||
print(" > Failed to write html file: ", e)
|
print(" > Failed to write html file: ", e)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
lecture_file_name = sanitize(lecture_title + ".mp4")
|
|
||||||
lecture_path = os.path.join(
|
|
||||||
chapter_dir,
|
|
||||||
lecture_file_name)
|
|
||||||
process_lecture(lecture, lecture_path, lecture_file_name,
|
process_lecture(lecture, lecture_path, lecture_file_name,
|
||||||
quality, access_token,
|
quality, access_token,
|
||||||
concurrent_connections, chapter_dir)
|
concurrent_connections, chapter_dir)
|
||||||
@ -1521,7 +1529,7 @@ if __name__ == "__main__":
|
|||||||
lecture_counter = 0
|
lecture_counter = 0
|
||||||
lectures = []
|
lectures = []
|
||||||
chapter_index = entry.get("object_index")
|
chapter_index = entry.get("object_index")
|
||||||
chapter_title = "{0:02d} ".format(chapter_index) + _clean(
|
chapter_title = "{0:02d} - ".format(chapter_index) + _clean(
|
||||||
entry.get("title"))
|
entry.get("title"))
|
||||||
|
|
||||||
if chapter_title not in _udemy["chapters"]:
|
if chapter_title not in _udemy["chapters"]:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user