fix quizzes, add cmd line switch

This commit is contained in:
Puyodead1 2023-07-20 11:57:59 -04:00
parent 88edbdf538
commit bc9ff0ba18
No known key found for this signature in database
GPG Key ID: A4FA4FEC0DD353FC
2 changed files with 100 additions and 49 deletions

114
main.py
View File

@ -33,8 +33,9 @@ cookies = ""
downloader = None downloader = None
logger: logging.Logger = None logger: logging.Logger = None
dl_assets = False dl_assets = False
skip_lectures = False
dl_captions = False dl_captions = False
dl_quizzes = False
skip_lectures = False
caption_locale = "en" caption_locale = "en"
quality = None quality = None
bearer_token = None bearer_token = None
@ -67,7 +68,7 @@ def log_subprocess_output(prefix: str, pipe: IO[bytes]):
# this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist # this is the first function that is called, we parse the arguments, setup the logger, and ensure that required directories exist
def pre_run(): def pre_run():
global cookies, dl_assets, skip_lectures, dl_captions, caption_locale, quality, bearer_token, portal_name, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc global cookies, dl_assets, dl_captions, dl_quizzes, skip_lectures, caption_locale, quality, bearer_token, course_name, keep_vtt, skip_hls, concurrent_downloads, disable_ipv6, load_from_file, save_to_file, bearer_token, course_url, info, logger, keys, id_as_course_name, is_subscription_course, LOG_LEVEL, use_h265, h265_crf, h265_preset, use_nvenc
# make sure the directory exists # make sure the directory exists
if not os.path.exists(DOWNLOAD_DIR): if not os.path.exists(DOWNLOAD_DIR):
@ -131,6 +132,12 @@ def pre_run():
action="store_true", action="store_true",
help="If specified, captions will be downloaded", help="If specified, captions will be downloaded",
) )
parser.add_argument(
"--download-quizzes",
dest="download_quizzes",
action="store_true",
help="If specified, quizzes will be downloaded",
)
parser.add_argument( parser.add_argument(
"--keep-vtt", "--keep-vtt",
dest="keep_vtt", dest="keep_vtt",
@ -215,6 +222,8 @@ def pre_run():
caption_locale = args.lang caption_locale = args.lang
if args.download_captions: if args.download_captions:
dl_captions = True dl_captions = True
if args.download_quizzes:
dl_quizzes = True
if args.skip_lectures: if args.skip_lectures:
skip_lectures = True skip_lectures = True
if args.quality: if args.quality:
@ -334,6 +343,7 @@ class Udemy:
sys.exit(1) sys.exit(1)
def _get_quiz(self, quiz_id): def _get_quiz(self, quiz_id):
print(portal_name)
self.session._headers.update( self.session._headers.update(
{ {
"Host": "{portal_name}.udemy.com".format(portal_name=portal_name), "Host": "{portal_name}.udemy.com".format(portal_name=portal_name),
@ -349,7 +359,7 @@ class Udemy:
time.sleep(0.8) time.sleep(0.8)
sys.exit(1) sys.exit(1)
else: else:
return resp return resp.get("results")
def _extract_supplementary_assets(self, supp_assets, lecture_counter): def _extract_supplementary_assets(self, supp_assets, lecture_counter):
_temp = [] _temp = []
@ -666,7 +676,7 @@ class Udemy:
results = webpage.get("results", []) results = webpage.get("results", [])
return results return results
def _extract_course_info_json(self, url, course_id, portal_name): def _extract_course_info_json(self, url, course_id):
self.session._headers.update({"Referer": url}) self.session._headers.update({"Referer": url})
url = COURSE_INFO_URL.format(portal_name=portal_name, course_id=course_id) url = COURSE_INFO_URL.format(portal_name=portal_name, course_id=course_id)
try: try:
@ -854,12 +864,14 @@ class Udemy:
data_args = data.attrs["data-module-args"] data_args = data.attrs["data-module-args"]
data_json = json.loads(data_args) data_json = json.loads(data_args)
course_id = data_json.get("courseId", None) course_id = data_json.get("courseId", None)
portal_name = self.extract_portal_name(url) return course_id
return course_id, portal_name
def _extract_course_info(self, url): def _extract_course_info(self, url):
global portal_name
portal_name, course_name = self.extract_course_name(url) portal_name, course_name = self.extract_course_name(url)
course = {} course = {
"portal_name": portal_name
}
if not is_subscription_course: if not is_subscription_course:
results = self._subscribed_courses(portal_name=portal_name, course_name=course_name) results = self._subscribed_courses(portal_name=portal_name, course_name=course_name)
@ -875,11 +887,10 @@ class Udemy:
course = self._extract_course(response=results, course_name=course_name) course = self._extract_course(response=results, course_name=course_name)
if not course or is_subscription_course: if not course or is_subscription_course:
course_id, portal_name = self._extract_subscription_course_info(url) course_id = self._extract_subscription_course_info(url)
course = self._extract_course_info_json(url, course_id, portal_name) course = self._extract_course_info_json(url, course_id)
if course: if course:
course.update({"portal_name": portal_name})
return course.get("id"), course return course.get("id"), course
if not course: if not course:
logger.fatal("Downloading course information, course id not found .. ") logger.fatal("Downloading course information, course id not found .. ")
@ -1564,9 +1575,9 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
def process_quiz(udemy: Udemy, lecture, lecture_path, chapter_dir): def process_quiz(udemy: Udemy, lecture, chapter_dir):
lecture_title = lecture.get("lecture_title") lecture_title = lecture.get("lecture_title")
lecture_index = lecture.get("quiz_index") lecture_index = lecture.get("lecture_index")
lecture_file_name = sanitize_filename(lecture_title + ".html") lecture_file_name = sanitize_filename(lecture_title + ".html")
lecture_path = os.path.join(chapter_dir, lecture_file_name) lecture_path = os.path.join(chapter_dir, lecture_file_name)
@ -1574,7 +1585,11 @@ def process_quiz(udemy: Udemy, lecture, lecture_path, chapter_dir):
questions = udemy._get_quiz(lecture.get("id")) questions = udemy._get_quiz(lecture.get("id"))
with open("quiz_template.html", "r") as f: with open("quiz_template.html", "r") as f:
html = f.read() html = f.read()
html.replace("__questions_placeholder__", json.dumps(questions)) quiz_data = {
"pass_percent": lecture.get("data").get("pass_percent"),
"questions": questions,
}
html = html.replace("__data_placeholder__", json.dumps(quiz_data))
with open(lecture_path, "w") as f: with open(lecture_path, "w") as f:
f.write(html) f.write(html)
@ -1601,14 +1616,15 @@ def parse_new(udemy: Udemy, udemy_object: dict):
for lecture in chapter.get("lectures"): for lecture in chapter.get("lectures"):
clazz = lecture.get("_class") clazz = lecture.get("_class")
if clazz == "quiz":
process_quiz(udemy, lecture, lecture_path, chapter_dir) if clazz == "quiz" and dl_quizzes:
process_quiz(udemy, lecture, chapter_dir)
continue continue
index = lecture.get("index") # this is lecture_counter index = lecture.get("index") # this is lecture_counter
lecture_index = lecture.get("lecture_index") # this is the raw object index from udemy # lecture_index = lecture.get("lecture_index") # this is the raw object index from udemy
lecture_title = lecture.get("lecture_title")
lecture_title = lecture.get("lecture_title")
parsed_lecture = udemy._parse_lecture(lecture) parsed_lecture = udemy._parse_lecture(lecture)
lecture_extension = parsed_lecture.get("extension") lecture_extension = parsed_lecture.get("extension")
@ -1619,8 +1635,9 @@ def parse_new(udemy: Udemy, udemy_object: dict):
lecture_file_name = sanitize_filename(lecture_title + "." + extension) lecture_file_name = sanitize_filename(lecture_title + "." + extension)
lecture_path = os.path.join(chapter_dir, lecture_file_name) lecture_path = os.path.join(chapter_dir, lecture_file_name)
logger.info(f" > Processing lecture {lecture_index} of {total_lectures}")
if not skip_lectures: if not skip_lectures:
logger.info(f" > Processing lecture {index} of {total_lectures}")
# Check if the lecture is already downloaded # Check if the lecture is already downloaded
if os.path.isfile(lecture_path): if os.path.isfile(lecture_path):
logger.info(" > Lecture '%s' is already downloaded, skipping..." % lecture_title) logger.info(" > Lecture '%s' is already downloaded, skipping..." % lecture_title)
@ -1762,7 +1779,7 @@ def _print_course_info(udemy: Udemy, udemy_object: dict):
def main(): def main():
global bearer_token global bearer_token, portal_name
aria_ret_val = check_for_aria() aria_ret_val = check_for_aria()
if not aria_ret_val: if not aria_ret_val:
logger.fatal("> Aria2c is missing from your system or path!") logger.fatal("> Aria2c is missing from your system or path!")
@ -1798,7 +1815,6 @@ def main():
if course_info and isinstance(course_info, dict): if course_info and isinstance(course_info, dict):
title = sanitize_filename(course_info.get("title")) title = sanitize_filename(course_info.get("title"))
course_title = course_info.get("published_title") course_title = course_info.get("published_title")
portal_name = course_info.get("portal_name")
logger.info("> Fetching course content, this may take a minute...") logger.info("> Fetching course content, this may take a minute...")
if load_from_file: if load_from_file:
@ -1808,6 +1824,8 @@ def main():
portal_name = course_json.get("portal_name") portal_name = course_json.get("portal_name")
else: else:
course_json = udemy._extract_course_json(course_url, course_id, portal_name) course_json = udemy._extract_course_json(course_url, course_id, portal_name)
course_json["portal_name"] = portal_name
if save_to_file: if save_to_file:
with open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="w") as f: with open(os.path.join(os.getcwd(), "saved", "course_content.json"), encoding="utf8", mode="w") as f:
f.write(json.dumps(course_json)) f.write(json.dumps(course_json))
@ -1855,13 +1873,13 @@ def main():
elif clazz == "lecture": elif clazz == "lecture":
lecture_counter += 1 lecture_counter += 1
lecture_id = entry.get("id") lecture_id = entry.get("id")
if len(udemy_object["chapters"]) == 0: # if len(udemy_object["chapters"]) == 0:
lectures = [] # lectures = []
chapter_index = entry.get("object_index") # chapter_index = entry.get("object_index")
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) # chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]: # if chapter_title not in udemy_object["chapters"]:
udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": []}) # udemy_object["chapters"].append({"chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": []})
counter += 1 # counter += 1
if lecture_id: if lecture_id:
logger.info(f"Processing {course.index(entry)} of {len(course)}") logger.info(f"Processing {course.index(entry)} of {len(course)}")
@ -1869,26 +1887,35 @@ def main():
lecture_index = entry.get("object_index") lecture_index = entry.get("object_index")
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title")) lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "data": entry}) lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
udemy_object["chapters"][counter]["lectures"] = lectures udemy_object["chapters"][counter]["lectures"] = lectures
udemy_object["chapters"][counter]["lecture_count"] = len(lectures) udemy_object["chapters"][counter]["lecture_count"] = len(lectures)
elif clazz == "quiz": elif clazz == "quiz":
lecture_counter += 1
lecture_id = entry.get("id") lecture_id = entry.get("id")
if len(udemy_object["chapters"]) == 0: # if len(udemy_object["chapters"]) == 0:
lectures = [] # lectures = []
chapter_index = entry.get("object_index") # chapter_index = entry.get("object_index")
chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title")) # chapter_title = "{0:02d} - ".format(chapter_index) + sanitize_filename(entry.get("title"))
if chapter_title not in udemy_object["chapters"]: # if chapter_title not in udemy_object["chapters"]:
lecture_counter = 0 # lecture_counter = 0
udemy_object["chapters"].append( # udemy_object["chapters"].append(
{ # {
"chapter_title": chapter_title, # "chapter_title": chapter_title,
"chapter_id": lecture_id, # "chapter_id": lecture_id,
"chapter_index": chapter_index, # "chapter_index": chapter_index,
"lectures": [], # "lectures": [],
} # }
) # )
counter += 1 # counter += 1
if lecture_id:
logger.info(f"Processing {course.index(entry)} of {len(course)}")
lecture_index = entry.get("object_index")
lecture_title = "{0:03d} ".format(lecture_counter) + sanitize_filename(entry.get("title"))
lectures.append({"index": lecture_counter, "lecture_index": lecture_index, "lecture_title": lecture_title, "_class": entry.get("_class"), "id": lecture_id, "data": entry})
udemy_object["chapters"][counter]["lectures"] = lectures udemy_object["chapters"][counter]["lectures"] = lectures
udemy_object["chapters"][counter]["lectures_count"] = len(lectures) udemy_object["chapters"][counter]["lectures_count"] = len(lectures)
@ -1900,6 +1927,7 @@ def main():
with open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="w") as f: with open(os.path.join(os.getcwd(), "saved", "_udemy.json"), encoding="utf8", mode="w") as f:
# remove "bearer_token" from the object before writing # remove "bearer_token" from the object before writing
udemy_object.pop("bearer_token") udemy_object.pop("bearer_token")
udemy_object["portal_name"] = portal_name
f.write(json.dumps(udemy_object)) f.write(json.dumps(udemy_object))
f.close() f.close()
logger.info("> Saved parsed data to json") logger.info("> Saved parsed data to json")

View File

@ -102,18 +102,31 @@
background: #1c1d1f; background: #1c1d1f;
box-shadow: 0 0 0 0.4rem #fff inset; box-shadow: 0 0 0 0.4rem #fff inset;
} }
.score {
position: fixed;
}
</style> </style>
</head> </head>
<body onload="main()"> <body onload="main()">
<div id="score" class="score">
<span>Score: N/A of N/A</span>
</div>
<div id="quiz-container" class="quiz-content"></div> <div id="quiz-container" class="quiz-content"></div>
<script> <script>
const quizData = __questions_placeholder__; const quizData = __data_placeholder__;
var correct = 0;
var total = 0;
const questionData = quizData.questions
const passPercent = quizData.pass_percent
function main() { function main() {
total = questionData.length;
var questions = []; var questions = [];
for (var i = 0; i < quizData.length; i++) { for (var i = 0; i < questionData.length; i++) {
var question = quizData[i]; var question = questionData[i];
var questionText = question.prompt.question; var questionText = question.prompt.question;
var answers = question.prompt.answers; var answers = question.prompt.answers;
var correctAnswer = question.correct_response[0]; var correctAnswer = question.correct_response[0];
@ -127,6 +140,8 @@
questions.push(questionObj); questions.push(questionObj);
} }
updateScore();
// display the questions // display the questions
var questionsContainer = document.getElementById("quiz-container"); var questionsContainer = document.getElementById("quiz-container");
for (var i = 0; i < questions.length; i++) { for (var i = 0; i < questions.length; i++) {
@ -204,10 +219,12 @@
if (answerText == question.correctAnswer) { if (answerText == question.correctAnswer) {
answer.parentElement.parentElement.style.borderColor = answer.parentElement.parentElement.style.borderColor =
"limegreen"; "limegreen";
alert("Correct!"); // alert("Correct!");
correct++;
updateScore();
} else { } else {
answer.parentElement.parentElement.style.borderColor = "red"; answer.parentElement.parentElement.style.borderColor = "red";
alert("Incorrect!"); // alert("Incorrect!");
} }
} else { } else {
alert("Please select an answer."); alert("Please select an answer.");
@ -235,6 +252,12 @@
span.classList.add("selected"); span.classList.add("selected");
input.checked = true; input.checked = true;
} }
function updateScore() {
var scoreElem = document.getElementById("score");
const score = (correct / total) * 100;
scoreElem.innerHTML = "Score: " + score + "/" + passPercent + "%";
}
</script> </script>
</body> </body>
</html> </html>