import os, requests, shutil, json, glob, urllib.request, argparse, sys, datetime from sanitize_filename import sanitize import urllib.request from tqdm import tqdm from dotenv import load_dotenv from mpegdash.parser import MPEGDASHParser from utils import extract_kid from vtt_to_srt import convert from requests.exceptions import ConnectionError as conn_error from html.parser import HTMLParser as compat_HTMLParser from sanitize import sanitize, slugify, SLUG_OK from pyffmpeg import FFMPeg as FFMPEG import subprocess course_id = None header_bearer = None download_dir = os.path.join(os.getcwd(), "out_dir") working_dir = os.path.join(os.getcwd(), "working_dir") # set the folder to download segments for DRM videos retry = 3 home_dir = os.getcwd() keyfile_path = os.path.join(os.getcwd(), "keyfile.json") dl_assets = False dl_captions = False skip_lectures = False caption_locale = "en" quality = None # None will download the best possible valid_qualities = [144, 360, 480, 720, 1080] if not os.path.exists(working_dir): os.makedirs(working_dir) if not os.path.exists(download_dir): os.makedirs(download_dir) #Get the keys with open(keyfile_path, 'r') as keyfile: keyfile = keyfile.read() keyfile = json.loads(keyfile) def durationtoseconds(period): """ @author Jayapraveen """ #Duration format in PTxDxHxMxS if (period[:2] == "PT"): period = period[2:] day = int(period.split("D")[0] if 'D' in period else 0) hour = int(period.split("H")[0].split("D")[-1] if 'H' in period else 0) minute = int( period.split("M")[0].split("H")[-1] if 'M' in period else 0) second = period.split("S")[0].split("M")[-1] print("Total time: " + str(day) + " days " + str(hour) + " hours " + str(minute) + " minutes and " + str(second) + " seconds") total_time = float( str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split('.')[0]))) + '.' + str(int(second.split('.')[-1]))) return total_time else: print("Duration Format Error") return None def download_media(filename, url, lecture_working_dir, epoch=0): if (os.path.isfile(filename)): print("Segment already downloaded.. skipping..") else: media = requests.get(url, stream=True) media_length = int(media.headers.get("content-length")) if media.status_code == 200: if (os.path.isfile(filename) and os.path.getsize(filename) >= media_length): print("Segment already downloaded.. skipping write to disk..") else: try: pbar = tqdm(total=media_length, initial=0, unit='B', unit_scale=True, desc=filename) with open(os.path.join(lecture_working_dir, filename), 'wb') as video_file: for chunk in media.iter_content(chunk_size=1024): if chunk: video_file.write(chunk) pbar.update(1024) pbar.close() print("Segment downloaded: " + filename) return False #Successfully downloaded the file except: print( "Connection error: Reattempting download of segment..") download_media(filename, url, lecture_working_dir, epoch + 1) if os.path.getsize(filename) >= media_length: pass else: print("Segment is faulty.. Redownloading...") download_media(filename, url, lecture_working_dir, epoch + 1) elif (media.status_code == 404): print("Probably end hit!\n", url) return True #Probably hit the last of the file else: if (epoch > retry): exit("Error fetching segment, exceeded retry times.") print("Error fetching segment file.. Redownloading...") download_media(filename, url, lecture_working_dir, epoch + 1) """ @author Jayapraveen """ def cleanup(path): """ @author Jayapraveen """ leftover_files = glob.glob(path + '/*.mp4', recursive=True) for file_list in leftover_files: try: os.remove(file_list) except OSError: print(f"Error deleting file: {file_list}") os.removedirs(path) """ @author Jayapraveen """ def mux_process(video_title, lecture_working_dir, outfile): time_stamp = datetime.datetime.now().isoformat()+'Z' if os.name == "nt": command = f"ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=\"{time_stamp}\" \"{outfile}\"" else: command = f"nice -n 7 ffmpeg -y -i \"{lecture_working_dir}//decrypted_audio.mp4\" -i \"{lecture_working_dir}//decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=\"{time_stamp}\" \"{outfile}\"" os.system(command) def decrypt(kid, filename, lecture_working_dir): """ @author Jayapraveen """ print("> Decrypting, this might take a minute...") try: key = keyfile[kid.lower()] except KeyError as error: exit("Key not found") if (os.name == "nt"): os.system( f"mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"" ) else: os.system( f"nice -n 7 mp4decrypt --key 1:{key} \"{lecture_working_dir}//encrypted_{filename}.mp4\" \"{lecture_working_dir}//decrypted_{filename}.mp4\"" ) with open(list_path, 'w') as f: f.write("{}\n{}".format(audio_urls, video_urls)) f.close() print("> Downloading Lecture Segments...") ret_code = subprocess.Popen([ "aria2c", "-i", list_path, "-j16", "-s20", "-x16", "-c", "--auto-file-renaming=false", "--summary-interval=0" ]).wait() print("> Lecture Segments Downloaded") print("Return code: " + str(ret_code)) def handle_irregular_segments(media_info, video_title, lecture_working_dir, output_path): no_segment, video_url, video_init, video_extension, no_segment, audio_url, audio_init, audio_extension = media_info download_media("video_0.seg.mp4", video_init, lecture_working_dir) video_kid = extract_kid(os.path.join(lecture_working_dir, "video_0.seg.mp4")) print("KID for video file is: " + video_kid) download_media("audio_0.seg.mp4", audio_init, lecture_working_dir) audio_kid = extract_kid(os.path.join(lecture_working_dir, "audio_0.seg.mp4")) print("KID for audio file is: " + audio_kid) os.chdir(lecture_working_dir) if os.name == "nt": video_concat_command = "copy /b " + "+".join([ f"video_{i}.{video_extension}" for i in range(0, no_vid_segments) ]) + " encrypted_video.mp4" audio_concat_command = "copy /b " + "+".join([ f"audio_{i}.{audio_extension}" for i in range(0, no_aud_segments) ]) + " encrypted_audio.mp4" else: video_concat_command = "cat " + " ".join([ f"video_{i}.{video_extension}" for i in range(0, no_aud_segments) ]) + " > encrypted_video.mp4" audio_concat_command = "cat " + " ".join([ f"audio_{i}.{audio_extension}" for i in range(0, no_vid_segments) ]) + " > encrypted_audio.mp4" os.system(video_concat_command) os.system(audio_concat_command) os.chdir(home_dir) try: decrypt(video_kid, "video", lecture_working_dir) decrypt(audio_kid, "audio", lecture_working_dir) os.chdir(home_dir) mux_process(video_title, lecture_working_dir, output_path) cleanup(lecture_working_dir) except Exception as e: print(f"Error: ", e) def check_for_aria(): try: subprocess.Popen(["aria2c", "-v"], stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL).wait() return True except FileNotFoundError: return False except Exception as e: print( "> Unexpected exception while checking for Aria2c, please tell the program author about this! ", e) return True def check_for_ffmpeg(): try: subprocess.Popen(["ffmpeg"], stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL).wait() return True except FileNotFoundError: return False except Exception as e: print( "> Unexpected exception while checking for FFMPEG, please tell the program author about this! ", e) return True def check_for_mp4decrypt(): try: subprocess.Popen(["mp4decrypt"], stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL).wait() return True except FileNotFoundError: return False except Exception as e: print( "> Unexpected exception while checking for MP4Decrypt, please tell the program author about this! ", e) return True def download(url, path, filename): """ @author Puyodead1 """ file_size = int(requests.head(url).headers["Content-Length"]) if os.path.exists(path): first_byte = os.path.getsize(path) else: first_byte = 0 if first_byte >= file_size: return file_size header = {"Range": "bytes=%s-%s" % (first_byte, file_size)} pbar = tqdm(total=file_size, initial=first_byte, unit='B', unit_scale=True, desc=filename) res = requests.get(url, headers=header, stream=True) res.raise_for_status() with (open(path, 'ab')) as f: for chunk in res.iter_content(chunk_size=1024): if chunk: f.write(chunk) pbar.update(1024) pbar.close() return file_size def process_caption(caption, lecture_index, lecture_title, lecture_dir, tries=0): filename = f"%s. %s_%s.%s" % (lecture_index, sanitize(lecture_title), caption.get("locale_id"), caption.get("ext")) filename_no_ext = f"%s. %s_%s" % (lecture_index, sanitize(lecture_title), caption.get("locale_id")) filepath = os.path.join(lecture_dir, filename) if os.path.isfile(filepath): print(" > Caption '%s' already downloaded." % filename) else: print(f" > Downloading caption: '%s'" % filename) try: download_aria(caption.get("download_url"), lecture_dir, filename) except Exception as e: if tries >= 3: print( f" > Error downloading caption: {e}. Exceeded retries, skipping." ) return else: print( f" > Error downloading caption: {e}. Will retry {3-tries} more times." ) process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries + 1) if caption.get("extension") == "vtt": try: print(" > Converting caption to SRT format...") convert(lecture_dir, filename_no_ext) print(" > Caption conversion complete.") if not keep_vtt: os.remove(filepath) except Exception as e: print(f" > Error converting caption: {e}") def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token): lecture_title = lecture.get("lecture_title") is_encrypted = lecture.get("is_encrypted") lecture_video_sources = lecture.get("video_sources") lecture_audio_sources = lecture.get("audio_sources") if is_encrypted: if len(lecture_audio_sources) > 0 and len(lecture_video_sources) > 0: lecture_working_dir = os.path.join(working_dir, str(lecture.get("asset_id"))) if not os.path.isfile(lecture_path): video_source = lecture_video_sources[ -1] # last index is the best quality audio_source = lecture_audio_sources[-1] if isinstance(quality, int): video_source = min( lecture_video_sources, key=lambda x: abs(int(x.get("height")) - quality)) if not os.path.exists(lecture_working_dir): os.mkdir(lecture_working_dir) print(f" > Lecture '%s' has DRM, attempting to download" % lecture_title) handle_segments(video_source, audio_source, lecture_title, lecture_working_dir, lecture_path) else: print( " > Lecture '%s' is already downloaded, skipping..." % lecture_title) else: print(f" > Lecture '%s' is missing media links" % lecture_title) lecture_working_dir = os.path.join( working_dir, str(lecture_asset["id"]) ) # set the folder to download ephemeral files media_sources = lecture_asset["media_sources"] if not os.path.exists(lecture_working_dir): os.mkdir(lecture_working_dir) if not os.path.isfile(lecture_path): print( " > Lecture doesn't have DRM, attempting to download..." ) source = sources[0] # first index is the best quality if isinstance(quality, int): source = min( sources, key=lambda x: abs(int(x.get("height")) - quality)) try: print(" ====== Selected quality: ", source.get("type"), source.get("height")) url = source.get("download_url") source_type = source.get("type") if source_type == "hls": temp_filepath = lecture_path.replace(".mp4", "") temp_filepath = temp_filepath + ".hls-part.mp4" retVal = FFMPEG(None, url, access_token, temp_filepath).download() if retVal: os.rename(temp_filepath, lecture_path) print(" > HLS Download success") else: download_aria(url, lecture_dir, lecture_title + ".mp4") except Exception as e: print(f" > Error downloading lecture: ", e) else: print( " > Lecture '%s' is already downloaded, skipping..." % lecture_title) else: print(" > Missing sources for lecture", lecture) def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, caption_locale, keep_vtt, access_token): total_chapters = _udemy.get("total_chapters") total_lectures = _udemy.get("total_lectures") print(f"Chapter(s) ({total_chapters})") print(f"Lecture(s) ({total_lectures})") course_name = _udemy.get("course_title") course_dir = os.path.join(download_dir, course_name) if not os.path.exists(course_dir): os.mkdir(course_dir) for chapter in _udemy.get("chapters"): chapter_title = chapter.get("chapter_title") chapter_index = chapter.get("chapter_index") chapter_dir = os.path.join(course_dir, chapter_title) if not os.path.exists(chapter_dir): os.mkdir(chapter_dir) print( f"======= Processing chapter {chapter_index} of {total_chapters} =======" ) for lecture in chapter.get("lectures"): lecture_title = lecture.get("lecture_title") lecture_index = lecture.get("lecture_index") extension = lecture.get("extension") print( f" > Processing lecture {lecture_index} of {total_lectures}") if not skip_lectures: if extension == "html": html_content = lecture.get("html_content").encode( "ascii", "ignore").decode("utf8") lecture_path = os.path.join( chapter_dir, "{}.html".format(sanitize(lecture_title))) try: download(download_url, os.path.join(lecture_dir, asset_filename), asset_filename) except Exception as e: print(" > Failed to write html file: ", e) continue else: lecture_path = os.path.join( chapter_dir, "{}.mp4".format(sanitize(lecture_title))) process_lecture(lecture, lecture_path, chapter_dir, quality, access_token) if dl_assets: assets = lecture.get("assets") print(" > Processing {} asset(s) for lecture...".format( len(assets))) for asset in assets: asset_type = asset.get("type") filename = asset.get("filename") download_url = asset.get("download_url") if asset_type == "article": print( "If you're seeing this message, that means that you reached a secret area that I haven't finished! jk I haven't implemented handling for this asset type, please report this at https://github.com/Puyodead1/udemy-downloader/issues so I can add it. When reporting, please provide the following information: " ) continue elif asset["asset_type"] == "Article": assets.append(asset) asset_path = os.path.join(lecture_dir, sanitize(lecture_title)) with open(asset_path, 'w') as f: f.write(asset["body"]) elif asset["asset_type"] == "ExternalLink": assets.append(asset) asset_path = os.path.join(lecture_dir, f"{lecture_index}. External URLs.txt") with open(asset_path, 'a') as f: f.write(f"%s : %s\n" % (asset["title"], asset["external_url"])) print("> Found %s assets for lecture '%s'" % (len(assets), lecture_title)) # process captions if dl_captions: captions = [] for caption in lecture_asset.get("captions"): if not isinstance(caption, dict): continue if caption.get("_class") != "caption": continue download_url = caption.get("url") if not download_url or not isinstance(download_url, str): continue lang = (caption.get("language") or caption.get("srclang") or caption.get("label") or caption.get("locale_id").split("_")[0]) ext = "vtt" if "vtt" in download_url.rsplit(".", 1)[-1] else "srt" if caption_locale == "all" or caption_locale == lang: captions.append({ "language": lang, "locale_id": caption.get("locale_id"), "ext": ext, "url": download_url }) content = u"\n{}\n{}\n".format(name, download_url) if name.lower() not in file_data: with open(filename, 'a', encoding="utf-8", errors="ignore") as f: f.write(content) f.close() subtitles = lecture.get("subtitles") if dl_captions and subtitles: print("Processing {} caption(s)...".format(len(subtitles))) for subtitle in subtitles: lang = subtitle.get("language") if lang == caption_locale or caption_locale == "all": process_caption(subtitle, lecture_title, chapter_dir, keep_vtt) def parse(data): course_dir = os.path.join(download_dir, course_id) if not os.path.exists(course_dir): os.mkdir(course_dir) chapters = [] lectures = [] for obj in data: if obj["_class"] == "chapter": obj["lectures"] = [] chapters.append(obj) elif obj["_class"] == "lecture" and obj["asset"][ "asset_type"] == "Video": try: chapters[-1]["lectures"].append(obj) except IndexError: # This is caused by there not being a starting chapter lectures.append(obj) lecture_index = lectures.index(obj) + 1 lecture_path = os.path.join(course_dir, f'{lecture_index}. {sanitize(obj["title"])}.mp4') process_lecture(obj, lecture_index, lecture_path, download_dir) def course_info(course_data): print("\n\n\n\n") course_title = course_data.get("title") chapter_count = course_data.get("total_chapters") lecture_count = course_data.get("total_lectures") print("> Course: {}".format(course_title)) print("> Total Chapters: {}".format(chapter_count)) print("> Total Lectures: {}".format(lecture_count)) print("\n") chapters = course_data.get("chapters") for chapter in chapters: chapter_dir = os.path.join(course_dir, f'{chapters.index(chapter) + 1}. {sanitize(chapter["title"])}') if not os.path.exists(chapter_dir): os.mkdir(chapter_dir) for lecture in chapter["lectures"]: lecture_index = chapter["lectures"].index(lecture) + 1 lecture_path = os.path.join(chapter_dir, f'{lecture_index}. {sanitize(lecture["title"])}.mp4') process_lecture(lecture, lecture_index, lecture_path, chapter_dir) print("\n\n\n\n\n\n\n\n=====================") print("All downloads completed for course!") print("=====================") if __name__ == "__main__": parser = argparse.ArgumentParser(description='Udemy Downloader') parser.add_argument("-c", "--course-url", dest="course_url", type=str, help="The URL of the course to download", required=True) parser.add_argument( "-b", "--bearer", dest="bearer_token", type=str, help="The Bearer token to use", ) parser.add_argument( "-q", "--quality", dest="quality", type=int, help= "Download specific video quality. If the requested quality isn't available, the closest quality will be used. If not specified, the best quality will be downloaded for each lecture", ) parser.add_argument( "-l", "--lang", dest="lang", type=str, help= "The language to download for captions, specify 'all' to download all captions (Default is 'en')", ) parser.add_argument( "--skip-lectures", dest="skip_lectures", action="store_true", help="If specified, lectures won't be downloaded", ) parser.add_argument( "--download-assets", dest="download_assets", action="store_true", help="If specified, lecture assets will be downloaded", ) parser.add_argument( "--download-captions", dest="download_captions", action="store_true", help="If specified, captions will be downloaded", ) parser.add_argument( "--keep-vtt", dest="keep_vtt", action="store_true", help="If specified, .vtt files won't be removed", ) parser.add_argument( "--skip-hls", dest="skip_hls", action="store_true", help= "If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)", ) parser.add_argument( "--info", dest="info", action="store_true", help= "If specified, only course information will be printed, nothing will be downloaded", ) parser.add_argument( "--save-to-file", dest="save_to_file", action="store_true", help=argparse.SUPPRESS, ) parser.add_argument( "--load-from-file", dest="load_from_file", action="store_true", help=argparse.SUPPRESS, ) dl_assets = False skip_lectures = False dl_captions = False caption_locale = "en" quality = None bearer_token = None portal_name = None course_name = None keep_vtt = False skip_hls = False args = parser.parse_args() if args.download_assets: dl_assets = True if args.lang: caption_locale = args.lang if args.download_captions: dl_captions = True if args.skip_lectures: skip_lectures = True if args.quality: quality = args.quality if args.keep_vtt: keep_vtt = args.keep_vtt if args.skip_hls: skip_hls = args.skip_hls aria_ret_val = check_for_aria() if not aria_ret_val: print("> Aria2c is missing from your system or path!") sys.exit(1) ffmpeg_ret_val = check_for_aria() if not ffmpeg_ret_val: print("> FFMPEG is missing from your system or path!") sys.exit(1) mp4decrypt_ret_val = check_for_mp4decrypt() if not mp4decrypt_ret_val: print( "> MP4Decrypt is missing from your system or path! (This is part of Bento4 tools)" ) sys.exit(1) if args.load_from_file: print( "> 'load_from_file' was specified, data will be loaded from json files instead of fetched" ) if args.save_to_file: print( "> 'save_to_file' was specified, data will be saved to json files") if not os.path.isfile(keyfile_path): print("> Keyfile not found! Did you rename the file correctly?") sys.exit(1) load_dotenv() access_token = None if args.bearer_token: access_token = args.bearer_token else: access_token = os.getenv("UDEMY_BEARER") udemy = Udemy(access_token) print("> Fetching course information, this may take a minute...") if not args.load_from_file: course_id, course_info = udemy._extract_course_info(args.course_url) print("> Course information retrieved!") if course_info and isinstance(course_info, dict): title = _clean(course_info.get("title")) course_title = course_info.get("published_title") portal_name = course_info.get("portal_name") print("> Fetching course content, this may take a minute...") if args.load_from_file: course_json = json.loads( open(os.path.join(os.getcwd(), "saved", "course_content.json"), 'r').read()) title = course_json.get("title") course_title = course_json.get("published_title") portal_name = course_json.get("portal_name") else: course_json = udemy._extract_course_json(args.course_url, course_id, portal_name) if args.save_to_file: with open(os.path.join(os.getcwd(), "saved", "course_content.json"), 'w') as f: f.write(json.dumps(course_json)) f.close() print("> Course content retrieved!") course = course_json.get("results") resource = course_json.get("detail") if args.load_from_file: _udemy = json.loads( open(os.path.join(os.getcwd(), "saved", "_udemy.json")).read()) if args.info: course_info(_udemy) else: parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, caption_locale, keep_vtt, access_token) else: _udemy = {} _udemy["access_token"] = access_token _udemy["course_id"] = course_id _udemy["title"] = title _udemy["course_title"] = course_title _udemy["chapters"] = [] counter = -1 if resource: print("> Trying to logout") udemy.session.terminate() print("> Logged out.") if course: print("> Processing course data, this may take a minute. ") lecture_counter = 0 for entry in course: clazz = entry.get("_class") asset = entry.get("asset") supp_assets = entry.get("supplementary_assets") if clazz == "chapter": lecture_counter = 0 lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format(chapter_index) + _clean( entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": [] }) counter += 1 elif clazz == "lecture": lecture_counter += 1 lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + _clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [] }) counter += 1 if lecture_id: retVal = [] if isinstance(asset, dict): asset_type = (asset.get("asset_type").lower() or asset.get("assetType").lower) if asset_type == "article": if isinstance(supp_assets, list) and len(supp_assets) > 0: retVal = udemy._extract_supplementary_assets( supp_assets) elif asset_type == "video": if isinstance(supp_assets, list) and len(supp_assets) > 0: retVal = udemy._extract_supplementary_assets( supp_assets) elif asset_type == "e-book": retVal = udemy._extract_ebook(asset) elif asset_type == "file": retVal = udemy._extract_file(asset) elif asset_type == "presentation": retVal = udemy._extract_ppt(asset) elif asset_type == "audio": retVal = udemy._extract_audio(asset) lecture_index = entry.get("object_index") lecture_title = "{0:03d} ".format( lecture_counter) + _clean(entry.get("title")) if asset.get("stream_urls") != None: # not encrypted data = asset.get("stream_urls") if data and isinstance(data, dict): sources = data.get("Video") tracks = asset.get("captions") #duration = asset.get("time_estimation") sources = udemy._extract_sources( sources, skip_hls) subtitles = udemy._extract_subtitles(tracks) sources_count = len(sources) subtitle_count = len(subtitles) lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lecture_id": lecture_id, "lecture_title": lecture_title, # "duration": duration, "assets": retVal, "assets_count": len(retVal), "sources": sources, "subtitles": subtitles, "subtitle_count": subtitle_count, "sources_count": sources_count, "is_encrypted": False, "asset_id": asset.get("id") }) else: lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "html_content": asset.get("body"), "extension": "html", "assets": retVal, "assets_count": len(retVal), "subtitle_count": 0, "sources_count": 0, "is_encrypted": False, "asset_id": asset.get("id") }) else: # encrypted data = asset.get("media_sources") if data and isinstance(data, list): video_media_sources, audio_media_sources = udemy._extract_media_sources( data) tracks = asset.get("captions") # duration = asset.get("time_estimation") subtitles = udemy._extract_subtitles(tracks) sources_count = len(video_media_sources) subtitle_count = len(subtitles) lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, # "duration": duration, "assets": retVal, "assets_count": len(retVal), "video_sources": video_media_sources, "audio_sources": audio_media_sources, "subtitles": subtitles, "subtitle_count": subtitle_count, "sources_count": sources_count, "is_encrypted": True, "asset_id": asset.get("id") }) else: lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "html_content": asset.get("body"), "extension": "html", "assets": retVal, "assets_count": len(retVal), "subtitle_count": 0, "sources_count": 0, "is_encrypted": False, "asset_id": asset.get("id") }) _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lecture_count"] = len( lectures) elif clazz == "quiz": lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + _clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: lecture_counter = 0 _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) _udemy["total_chapters"] = len(_udemy["chapters"]) _udemy["total_lectures"] = sum([ entry.get("lecture_count", 0) for entry in _udemy["chapters"] if entry ]) if args.save_to_file: with open(os.path.join(os.getcwd(), "saved", "_udemy.json"), 'w') as f: f.write(json.dumps(_udemy)) f.close() print("Saved parsed data to json") if args.info: course_info(_udemy) else: parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, caption_locale, keep_vtt, access_token)