diff --git a/coding_assignment_template.html b/coding_assignment_template.html deleted file mode 100644 index 54016cf..0000000 --- a/coding_assignment_template.html +++ /dev/null @@ -1,118 +0,0 @@ - - - - - - - Coding Assignment - - - - - -

-
-

Instructions

-
-
-
-

Test(s)

-
-
-
-

Solution(s)

-
-
- - - - diff --git a/constants.py b/constants.py index 2b9892b..4a89ee2 100644 --- a/constants.py +++ b/constants.py @@ -12,24 +12,32 @@ HEADERS = { LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc" LOGOUT_URL = "https://www.udemy.com/user/logout" # COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000" -COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/?page_size=100&fields[asset]=asset_type,length,media_license_token,course_is_drmed,media_sources,thumbnail_sprite,slides,slide_urls,filename,download_urls,external_url,stream_urls&fields[chapter]=object_index,title&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&caching_intent=True" -COURSE_INFO_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/" +CURRICULUM_ITEMS_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/" +COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/" COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}" SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12" MY_COURSES_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&ordering=-last_accessed,-access_time&page=1&page_size=10000" COLLECTION_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses-collections/?collection_has_courses=True&course_limit=20&fields[course]=last_accessed_time,title,published_title&fields[user_has_subscribed_courses_collection]=@all&page=1&page_size=1000" QUIZ_URL = "https://{portal_name}.udemy.com/api-2.0/quizzes/{quiz_id}/assessments/?version=1&page_size=250&fields[assessment]=id,assessment_type,prompt,correct_response,section,question_plain,related_lectures" +CURRICULUM_ITEMS_PARAMS = { + "fields[lecture]": "title,object_index,created,asset,supplementary_assets,description,download_url", + "fields[quiz]": "title,object_index,type", + "fields[practice]": "title,object_index", + "fields[chapter]": "title,object_index", + "fields[asset]": "title,filename,asset_type,status,is_external,media_license_token,course_is_drmed,media_sources,captions,slides,slide_urls,download_urls,external_url,@min,status,delayed_asset_message,processing_errors,body", + "caching_intent": True, + "page_size": "200", +} + +COURSE_URL_PARAMS = {"fields[course]": "title", "use_remote_version": True, "caching_intent": True} + HOME_DIR = os.getcwd() SAVED_DIR = os.path.join(os.getcwd(), "saved") KEY_FILE_PATH = os.path.join(os.getcwd(), "keyfile.json") COOKIE_FILE_PATH = os.path.join(os.getcwd(), "cookies.txt") LOG_DIR_PATH = os.path.join(os.getcwd(), "logs") -LOG_FILE_PATH = os.path.join( - os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log" -) -LOG_FORMAT = ( - "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s" -) +LOG_FILE_PATH = os.path.join(os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log") +LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s" LOG_DATE_FORMAT = "%I:%M:%S" LOG_LEVEL = logging.INFO diff --git a/main.py b/main.py index 4056501..64a0333 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import argparse -import glob import json import logging import math @@ -481,6 +480,14 @@ class Udemy: ) return _temp + def _extract_article(self, asset, id): + return [{ + "type": "article", + "body": asset.get("body"), + "extension": "html", + "id": id, + }] + def _extract_ppt(self, asset, lecture_counter): _temp = [] download_urls = asset.get("download_urls") @@ -813,7 +820,7 @@ class Udemy: def _extract_course_info_json(self, url, course_id): self.session._headers.update({"Referer": url}) - url = COURSE_INFO_URL.format(portal_name=portal_name, course_id=course_id) + url = COURSE_URL.format(portal_name=portal_name, course_id=course_id) try: resp = self.session._get(url).json() except conn_error as error: @@ -823,12 +830,12 @@ class Udemy: else: return resp - def _extract_course_json(self, url, course_id, portal_name): + def _extract_course_curriculum(self, url, course_id, portal_name): self.session._headers.update({"Referer": url}) - url = COURSE_URL.format(portal_name=portal_name, course_id=course_id) + url = CURRICULUM_ITEMS_URL.format(portal_name=portal_name, course_id=course_id) page = 1 try: - data = self.session._get(url).json() + data = self.session._get(url, CURRICULUM_ITEMS_PARAMS).json() except conn_error as error: logger.fatal(f"Connection error: {error}") time.sleep(0.8) @@ -838,7 +845,7 @@ class Udemy: _count = data.get("count") est_page_count = math.ceil(_count / 100) # 100 is the max results per page while _next: - logger.info(f"> Downloading course information.. (Page {page + 1}/{est_page_count})") + logger.info(f"> Downloading course curriculum.. (Page {page + 1}/{est_page_count})") try: resp = self.session._get(_next) if not resp.ok: @@ -987,19 +994,18 @@ class Udemy: if isinstance(asset, dict): asset_type = asset.get("asset_type").lower() or asset.get("assetType").lower() if asset_type == "article": - if isinstance(supp_assets, list) and len(supp_assets) > 0: - retVal = self._extract_supplementary_assets(supp_assets, index) + retVal.extend(self._extract_article(asset, index)) elif asset_type == "video": if isinstance(supp_assets, list) and len(supp_assets) > 0: - retVal = self._extract_supplementary_assets(supp_assets, index) + retVal.extend(self._extract_supplementary_assets(supp_assets, index)) elif asset_type == "e-book": - retVal = self._extract_ebook(asset, index) + retVal.extend(self._extract_ebook(asset, index)) elif asset_type == "file": - retVal = self._extract_file(asset, index) + retVal.extend(self._extract_file(asset, index)) elif asset_type == "presentation": - retVal = self._extract_ppt(asset, index) + retVal.extend(self._extract_ppt(asset, index)) elif asset_type == "audio": - retVal = self._extract_audio(asset, index) + retVal.extend(self._extract_audio(asset, index)) else: logger.warning(f"Unknown asset type: {asset_type}") @@ -1108,9 +1114,9 @@ class Session(object): self._headers["Authorization"] = "Bearer {}".format(bearer_token) self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token) - def _get(self, url): + def _get(self, url, params = None): for i in range(10): - session = self._session.get(url, headers=self._headers, cookies=cj) + session = self._session.get(url, headers=self._headers, cookies=cj, params=params) if session.ok or session.status_code in [502, 503]: return session if not session.ok: @@ -1550,7 +1556,7 @@ def process_normal_quiz(quiz, lecture, chapter_dir): lecture_path = os.path.join(chapter_dir, lecture_file_name) logger.info(f" > Processing quiz {lecture_index}") - with open("quiz_template.html", "r") as f: + with open("./templates/quiz_template.html", "r") as f: html = f.read() quiz_data = { "quiz_id": lecture["data"].get("id"), @@ -1572,7 +1578,7 @@ def process_coding_assignment(quiz, lecture, chapter_dir): logger.info(f" > Processing quiz {lecture_index} (coding assignment)") - with open("coding_assignment_template.html", "r") as f: + with open("./templates/coding_assignment_template.html", "r") as f: html = f.read() quiz_data = { "title": lecture_title, @@ -1648,7 +1654,6 @@ def parse_new(udemy: Udemy, udemy_object: dict): try: with open(lecture_path, encoding="utf8", mode="w") as f: f.write(html_content) - f.close() except Exception: logger.exception(" > Failed to write html file") else: @@ -1673,20 +1678,19 @@ def parse_new(udemy: Udemy, udemy_object: dict): download_url = asset.get("download_url") if asset_type == "article": - logger.warning( - "If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: " - ) - logger.warning("AssetType: Article; AssetData: ", asset) - # html_content = lecture.get("html_content") - # lecture_path = os.path.join( - # chapter_dir, "{}.html".format(sanitize(lecture_title))) - # try: - # with open(lecture_path, 'w') as f: - # f.write(html_content) - # f.close() - # except Exception as e: - # print("Failed to write html file: ", e) - # continue + body = asset.get("body") + lecture_path = os.path.join( + chapter_dir, "{}.html".format(sanitize_filename(lecture_title))) + try: + with open("./templates/article_template.html", "r") as f: + content = f.read() + content = content.replace("__title_placeholder__", lecture_title) + content = content.replace("__data_placeholder__", body) + with open(lecture_path, encoding="utf8", mode="w") as f: + f.write(content) + except Exception as e: + print("Failed to write html file: ", e) + continue elif asset_type == "video": logger.warning( "If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: " @@ -1727,7 +1731,6 @@ def parse_new(udemy: Udemy, udemy_object: dict): if name.lower() not in file_data: with open(filename, "a", encoding="utf-8", errors="ignore") as f: f.write(content) - f.close() def _print_course_info(udemy: Udemy, udemy_object: dict): @@ -1843,7 +1846,7 @@ def main(): title = sanitize_filename(course_info.get("title")) course_title = course_info.get("published_title") - logger.info("> Fetching course content, this may take a minute...") + logger.info("> Fetching course curriculum, this may take a minute...") if load_from_file: course_json = json.loads( open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="r").read() @@ -1852,15 +1855,14 @@ def main(): course_title = course_json.get("published_title") portal_name = course_json.get("portal_name") else: - course_json = udemy._extract_course_json(course_url, course_id, portal_name) + course_json = udemy._extract_course_curriculum(course_url, course_id, portal_name) course_json["portal_name"] = portal_name if save_to_file: with open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="w") as f: f.write(json.dumps(course_json)) - f.close() - logger.info("> Course content retrieved!") + logger.info("> Course curriculum retrieved!") course = course_json.get("results") resource = course_json.get("detail") @@ -1999,7 +2001,6 @@ def main(): udemy_object.pop("bearer_token") udemy_object["portal_name"] = portal_name f.write(json.dumps(udemy_object)) - f.close() logger.info("> Saved parsed data to json") if info: diff --git a/quiz_template.html b/quiz_template.html deleted file mode 100644 index f8ebae6..0000000 --- a/quiz_template.html +++ /dev/null @@ -1,515 +0,0 @@ - - - - - - - - Quiz - - - - -
-
-

-

-
-
-
- Score: 999 of - 999% -
-
Correct: 999
-
Incorrect: 999
-
- -
- - - - -
- - - - - \ No newline at end of file diff --git a/templates/article_template.html b/templates/article_template.html new file mode 100644 index 0000000..6b02d43 --- /dev/null +++ b/templates/article_template.html @@ -0,0 +1,66 @@ + + + + + + __title_placeholder__ + + + + +
+
+
__title_placeholder__
+
__data_placeholder__
+
+
+ + diff --git a/templates/coding_assignment_template.html b/templates/coding_assignment_template.html new file mode 100644 index 0000000..cdcbac4 --- /dev/null +++ b/templates/coding_assignment_template.html @@ -0,0 +1,122 @@ + + + + + + + Coding Assignment + + + + + +

+
+

Instructions

+
+
+
+

Test(s)

+
+
+
+

Solution(s)

+
+
+ + + + diff --git a/templates/quiz_template.html b/templates/quiz_template.html new file mode 100644 index 0000000..501f742 --- /dev/null +++ b/templates/quiz_template.html @@ -0,0 +1,479 @@ + + + + + + + Quiz + + + + +
+
+

+

+
+
+
+ Score: 999 of + 999% +
+
Correct: 999
+
Incorrect: 999
+
+ +
+ + + + +
+ + + +