diff --git a/coding_assignment_template.html b/coding_assignment_template.html
deleted file mode 100644
index 54016cf..0000000
--- a/coding_assignment_template.html
+++ /dev/null
@@ -1,118 +0,0 @@
-
-
-
-
-
-
- Coding Assignment
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/constants.py b/constants.py
index 2b9892b..4a89ee2 100644
--- a/constants.py
+++ b/constants.py
@@ -12,24 +12,32 @@ HEADERS = {
LOGIN_URL = "https://www.udemy.com/join/login-popup/?ref=&display_type=popup&loc"
LOGOUT_URL = "https://www.udemy.com/user/logout"
# COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000"
-COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/?page_size=100&fields[asset]=asset_type,length,media_license_token,course_is_drmed,media_sources,thumbnail_sprite,slides,slide_urls,filename,download_urls,external_url,stream_urls&fields[chapter]=object_index,title&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&caching_intent=True"
-COURSE_INFO_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/"
+CURRICULUM_ITEMS_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/subscriber-curriculum-items/"
+COURSE_URL = "https://{portal_name}.udemy.com/api-2.0/courses/{course_id}/"
COURSE_SEARCH = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&page=1&page_size=500&search={course_name}"
SUBSCRIBED_COURSES = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[course]=id,title,url&page=1&page_size=12"
MY_COURSES_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses?fields[course]=id,url,title,published_title&ordering=-last_accessed,-access_time&page=1&page_size=10000"
COLLECTION_URL = "https://{portal_name}.udemy.com/api-2.0/users/me/subscribed-courses-collections/?collection_has_courses=True&course_limit=20&fields[course]=last_accessed_time,title,published_title&fields[user_has_subscribed_courses_collection]=@all&page=1&page_size=1000"
QUIZ_URL = "https://{portal_name}.udemy.com/api-2.0/quizzes/{quiz_id}/assessments/?version=1&page_size=250&fields[assessment]=id,assessment_type,prompt,correct_response,section,question_plain,related_lectures"
+CURRICULUM_ITEMS_PARAMS = {
+ "fields[lecture]": "title,object_index,created,asset,supplementary_assets,description,download_url",
+ "fields[quiz]": "title,object_index,type",
+ "fields[practice]": "title,object_index",
+ "fields[chapter]": "title,object_index",
+ "fields[asset]": "title,filename,asset_type,status,is_external,media_license_token,course_is_drmed,media_sources,captions,slides,slide_urls,download_urls,external_url,@min,status,delayed_asset_message,processing_errors,body",
+ "caching_intent": True,
+ "page_size": "200",
+}
+
+COURSE_URL_PARAMS = {"fields[course]": "title", "use_remote_version": True, "caching_intent": True}
+
HOME_DIR = os.getcwd()
SAVED_DIR = os.path.join(os.getcwd(), "saved")
KEY_FILE_PATH = os.path.join(os.getcwd(), "keyfile.json")
COOKIE_FILE_PATH = os.path.join(os.getcwd(), "cookies.txt")
LOG_DIR_PATH = os.path.join(os.getcwd(), "logs")
-LOG_FILE_PATH = os.path.join(
- os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log"
-)
-LOG_FORMAT = (
- "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s"
-)
+LOG_FILE_PATH = os.path.join(os.getcwd(), "logs", f"{time.strftime('%Y-%m-%d-%I-%M-%S')}.log")
+LOG_FORMAT = "[%(asctime)s] [%(name)s] [%(funcName)s:%(lineno)d] %(levelname)s: %(message)s"
LOG_DATE_FORMAT = "%I:%M:%S"
LOG_LEVEL = logging.INFO
diff --git a/main.py b/main.py
index 4056501..64a0333 100644
--- a/main.py
+++ b/main.py
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import argparse
-import glob
import json
import logging
import math
@@ -481,6 +480,14 @@ class Udemy:
)
return _temp
+ def _extract_article(self, asset, id):
+ return [{
+ "type": "article",
+ "body": asset.get("body"),
+ "extension": "html",
+ "id": id,
+ }]
+
def _extract_ppt(self, asset, lecture_counter):
_temp = []
download_urls = asset.get("download_urls")
@@ -813,7 +820,7 @@ class Udemy:
def _extract_course_info_json(self, url, course_id):
self.session._headers.update({"Referer": url})
- url = COURSE_INFO_URL.format(portal_name=portal_name, course_id=course_id)
+ url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
try:
resp = self.session._get(url).json()
except conn_error as error:
@@ -823,12 +830,12 @@ class Udemy:
else:
return resp
- def _extract_course_json(self, url, course_id, portal_name):
+ def _extract_course_curriculum(self, url, course_id, portal_name):
self.session._headers.update({"Referer": url})
- url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
+ url = CURRICULUM_ITEMS_URL.format(portal_name=portal_name, course_id=course_id)
page = 1
try:
- data = self.session._get(url).json()
+ data = self.session._get(url, CURRICULUM_ITEMS_PARAMS).json()
except conn_error as error:
logger.fatal(f"Connection error: {error}")
time.sleep(0.8)
@@ -838,7 +845,7 @@ class Udemy:
_count = data.get("count")
est_page_count = math.ceil(_count / 100) # 100 is the max results per page
while _next:
- logger.info(f"> Downloading course information.. (Page {page + 1}/{est_page_count})")
+ logger.info(f"> Downloading course curriculum.. (Page {page + 1}/{est_page_count})")
try:
resp = self.session._get(_next)
if not resp.ok:
@@ -987,19 +994,18 @@ class Udemy:
if isinstance(asset, dict):
asset_type = asset.get("asset_type").lower() or asset.get("assetType").lower()
if asset_type == "article":
- if isinstance(supp_assets, list) and len(supp_assets) > 0:
- retVal = self._extract_supplementary_assets(supp_assets, index)
+ retVal.extend(self._extract_article(asset, index))
elif asset_type == "video":
if isinstance(supp_assets, list) and len(supp_assets) > 0:
- retVal = self._extract_supplementary_assets(supp_assets, index)
+ retVal.extend(self._extract_supplementary_assets(supp_assets, index))
elif asset_type == "e-book":
- retVal = self._extract_ebook(asset, index)
+ retVal.extend(self._extract_ebook(asset, index))
elif asset_type == "file":
- retVal = self._extract_file(asset, index)
+ retVal.extend(self._extract_file(asset, index))
elif asset_type == "presentation":
- retVal = self._extract_ppt(asset, index)
+ retVal.extend(self._extract_ppt(asset, index))
elif asset_type == "audio":
- retVal = self._extract_audio(asset, index)
+ retVal.extend(self._extract_audio(asset, index))
else:
logger.warning(f"Unknown asset type: {asset_type}")
@@ -1108,9 +1114,9 @@ class Session(object):
self._headers["Authorization"] = "Bearer {}".format(bearer_token)
self._headers["X-Udemy-Authorization"] = "Bearer {}".format(bearer_token)
- def _get(self, url):
+ def _get(self, url, params = None):
for i in range(10):
- session = self._session.get(url, headers=self._headers, cookies=cj)
+ session = self._session.get(url, headers=self._headers, cookies=cj, params=params)
if session.ok or session.status_code in [502, 503]:
return session
if not session.ok:
@@ -1550,7 +1556,7 @@ def process_normal_quiz(quiz, lecture, chapter_dir):
lecture_path = os.path.join(chapter_dir, lecture_file_name)
logger.info(f" > Processing quiz {lecture_index}")
- with open("quiz_template.html", "r") as f:
+ with open("./templates/quiz_template.html", "r") as f:
html = f.read()
quiz_data = {
"quiz_id": lecture["data"].get("id"),
@@ -1572,7 +1578,7 @@ def process_coding_assignment(quiz, lecture, chapter_dir):
logger.info(f" > Processing quiz {lecture_index} (coding assignment)")
- with open("coding_assignment_template.html", "r") as f:
+ with open("./templates/coding_assignment_template.html", "r") as f:
html = f.read()
quiz_data = {
"title": lecture_title,
@@ -1648,7 +1654,6 @@ def parse_new(udemy: Udemy, udemy_object: dict):
try:
with open(lecture_path, encoding="utf8", mode="w") as f:
f.write(html_content)
- f.close()
except Exception:
logger.exception(" > Failed to write html file")
else:
@@ -1673,20 +1678,19 @@ def parse_new(udemy: Udemy, udemy_object: dict):
download_url = asset.get("download_url")
if asset_type == "article":
- logger.warning(
- "If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: "
- )
- logger.warning("AssetType: Article; AssetData: ", asset)
- # html_content = lecture.get("html_content")
- # lecture_path = os.path.join(
- # chapter_dir, "{}.html".format(sanitize(lecture_title)))
- # try:
- # with open(lecture_path, 'w') as f:
- # f.write(html_content)
- # f.close()
- # except Exception as e:
- # print("Failed to write html file: ", e)
- # continue
+ body = asset.get("body")
+ lecture_path = os.path.join(
+ chapter_dir, "{}.html".format(sanitize_filename(lecture_title)))
+ try:
+ with open("./templates/article_template.html", "r") as f:
+ content = f.read()
+ content = content.replace("__title_placeholder__", lecture_title)
+ content = content.replace("__data_placeholder__", body)
+ with open(lecture_path, encoding="utf8", mode="w") as f:
+ f.write(content)
+ except Exception as e:
+ print("Failed to write html file: ", e)
+ continue
elif asset_type == "video":
logger.warning(
"If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: "
@@ -1727,7 +1731,6 @@ def parse_new(udemy: Udemy, udemy_object: dict):
if name.lower() not in file_data:
with open(filename, "a", encoding="utf-8", errors="ignore") as f:
f.write(content)
- f.close()
def _print_course_info(udemy: Udemy, udemy_object: dict):
@@ -1843,7 +1846,7 @@ def main():
title = sanitize_filename(course_info.get("title"))
course_title = course_info.get("published_title")
- logger.info("> Fetching course content, this may take a minute...")
+ logger.info("> Fetching course curriculum, this may take a minute...")
if load_from_file:
course_json = json.loads(
open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="r").read()
@@ -1852,15 +1855,14 @@ def main():
course_title = course_json.get("published_title")
portal_name = course_json.get("portal_name")
else:
- course_json = udemy._extract_course_json(course_url, course_id, portal_name)
+ course_json = udemy._extract_course_curriculum(course_url, course_id, portal_name)
course_json["portal_name"] = portal_name
if save_to_file:
with open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="w") as f:
f.write(json.dumps(course_json))
- f.close()
- logger.info("> Course content retrieved!")
+ logger.info("> Course curriculum retrieved!")
course = course_json.get("results")
resource = course_json.get("detail")
@@ -1999,7 +2001,6 @@ def main():
udemy_object.pop("bearer_token")
udemy_object["portal_name"] = portal_name
f.write(json.dumps(udemy_object))
- f.close()
logger.info("> Saved parsed data to json")
if info:
diff --git a/quiz_template.html b/quiz_template.html
deleted file mode 100644
index f8ebae6..0000000
--- a/quiz_template.html
+++ /dev/null
@@ -1,515 +0,0 @@
-
-
-
-
-
-
-
- Quiz
-
-
-
-
-
-
-
-
- Score: 999 of
- 999%
-
- Correct: 999
- Incorrect: 999
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/templates/article_template.html b/templates/article_template.html
new file mode 100644
index 0000000..6b02d43
--- /dev/null
+++ b/templates/article_template.html
@@ -0,0 +1,66 @@
+
+
+
+
+
+ __title_placeholder__
+
+
+
+
+
+
+
__title_placeholder__
+
__data_placeholder__
+
+
+
+
diff --git a/templates/coding_assignment_template.html b/templates/coding_assignment_template.html
new file mode 100644
index 0000000..cdcbac4
--- /dev/null
+++ b/templates/coding_assignment_template.html
@@ -0,0 +1,122 @@
+
+
+
+
+
+
+ Coding Assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/templates/quiz_template.html b/templates/quiz_template.html
new file mode 100644
index 0000000..501f742
--- /dev/null
+++ b/templates/quiz_template.html
@@ -0,0 +1,479 @@
+
+
+
+
+
+
+ Quiz
+
+
+
+
+
+
+
+
+ Score: 999 of
+ 999%
+
+ Correct: 999
+ Incorrect: 999
+
+
+
+
+
+
+
+
+
+