diff --git a/.env.sample b/.env.sample index c0cd4b4..93971fb 100644 --- a/.env.sample +++ b/.env.sample @@ -1 +1,2 @@ -UDEMY_BEARER=enter bearer token without the Bearer prefix \ No newline at end of file +UDEMY_BEARER=enter bearer token without the Bearer prefix +UDEMY_COURSE_ID=course id goes here \ No newline at end of file diff --git a/.gitignore b/.gitignore index 7177fd0..9246984 100644 --- a/.gitignore +++ b/.gitignore @@ -115,4 +115,6 @@ dmypy.json *.mp4 keyfile.json .env -test_data.json \ No newline at end of file +test_data.json +out_dir +working_dir \ No newline at end of file diff --git a/README.md b/README.md index b7c99e3..1f69396 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,75 @@ # Udemy Downloader with DRM support -### NOTE +# NOTE + This program is WIP, the code is provided as-is and i am not held resposible for any legal repercussions resulting from the use of this program. -## Support +# Support + if you want help using the program, join [my discord server](https://discord.gg/5B3XVb4RRX) or use [github issues](https://github.com/Puyodead1/udemy-downloader/issues) -## License +# License + All code is licensed under the MIT license -## Description +# Description + Simple and hacky program to download a udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons). -## Requirements +# Requirements + 1. You would need to download ffmpeg and mp4decrypter from Bento4 SDK and ensure they are in path(typing their name in cmd invokes them). -## Usage -*quick and dirty how-to* -1. you need to open the network tab, and find the index.mpd file url -![index mpd](https://i.imgur.com/MW78CAu.png) -2. open the `dashdownloader_multisegment.py` file and replace ``mpd url`` with the url -![mpd url](https://i.imgur.com/YfGSPKd.png) -3. Change the video title and output path to whatever you want the video to be called -![title](https://i.imgur.com/lymSmag.png) -- ``175. Inverse Transforming Vectors`` is what your would replace -4. rename ``keyfile.example.json`` to ``keyfile.json`` -5. open ``keyfile.json`` and enter the key id and decryption key for the video -![keyfile example](https://i.imgur.com/naABWva.png) -![example key and kid from console](https://i.imgur.com/awgndZA.png) -6. run ``python dashdownloader_multisegment.py`` in the terminal to start the download. -- make sure you have ffmpeg and mp4decrypt installed in your path +# Usage + +_quick and dirty how-to_ + +You will need to get a few things before you can use this program: + +- Decryption Key ID +- Decryption Key +- Udemy Course ID +- Udemy Bearer Token + +### Setting up + +- rename `.env.sample` to `.env` +- rename `keyfile.example.json` to `keyfile.json` + +### Aquire bearer token + +- open dev tools +- go to network tab +- in the search field, enter `api-2.0/courses` + ![Valid udemy api requests](https://i.imgur.com/Or371l7.png) +- click a random request +- locate the `Request Headers` section +- copy the the text after `Authorization`, it should look like `Bearer xxxxxxxxxxx` + ![bearer token example](https://i.imgur.com/FhQdwgD.png) +- enter this in the `.env` file after `UDEMY_BEARER=` + +### Aquire Course ID + +- Follow above before following this +- locate the request url field + ![request url](https://i.imgur.com/EUIV3bk.png) +- copy the number after `/api-2.0/courses/` as seen highlighed in the above picture +- enter this in the `.env` file after `UDEMY_COURSE_ID=` + +### Key ID and Key + +It is up to you to aquire the key and key id. + +- Enter the key and key id in the `keyfile.json` + ![keyfile example](https://i.imgur.com/wLPsqOR.png) + ![example key and kid from console](https://i.imgur.com/awgndZA.png) + +### Start Downloading + +You can now run `python main.py` to start downloading. The course will download to `out_dir`, chapters are seperated into folders. # Credits + https://github.com/Jayapraveen/Drm-Dash-stream-downloader - for the original code which this is based on https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction https://github.com/alastairmccormack/pymp4parse/ - For code related to mp4 box parsing (used by pywvpssh) diff --git a/main.py b/main.py new file mode 100644 index 0000000..040d5e6 --- /dev/null +++ b/main.py @@ -0,0 +1,279 @@ +import os,requests,shutil,json,glob,urllib.request +from sanitize_filename import sanitize +import urllib.request +from tqdm import tqdm +from dotenv import load_dotenv +from mpegdash.parser import MPEGDASHParser +from mpegdash.nodes import Descriptor +from mpegdash.utils import ( + parse_attr_value, parse_child_nodes, parse_node_value, + write_attr_value, write_child_node, write_node_value +) +from utils import extract_kid + +load_dotenv() + +course_id = os.getenv("UDEMY_COURSE_ID") # the course id to download +bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization +header_bearer = "Bearer " + bearer_token +download_dir = "%s\out_dir" % os.getcwd() +working_dir = "%s\working_dir" % os.getcwd() # set the folder to download segments for DRM videos +retry = 3 +home_dir = os.getcwd(); +keyfile_path = "%s\keyfile.json" % os.getcwd() + +if not os.path.exists(working_dir): + os.makedirs(working_dir) + +if not os.path.exists(download_dir): + os.makedirs(download_dir) + +#Get the keys +with open(keyfile_path,'r') as keyfile: + keyfile = keyfile.read() +keyfile = json.loads(keyfile) + +""" +@author Jayapraveen +""" +def durationtoseconds(period): + #Duration format in PTxDxHxMxS + if(period[:2] == "PT"): + period = period[2:] + day = int(period.split("D")[0] if 'D' in period else 0) + hour = int(period.split("H")[0].split("D")[-1] if 'H' in period else 0) + minute = int(period.split("M")[0].split("H")[-1] if 'M' in period else 0) + second = period.split("S")[0].split("M")[-1] + print("Total time: " + str(day) + " days " + str(hour) + " hours " + str(minute) + " minutes and " + str(second) + " seconds") + total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split('.')[0]))) + '.' + str(int(second.split('.')[-1]))) + return total_time + + else: + print("Duration Format Error") + return None + +def download_media(filename,url,lecture_working_dir,epoch = 0): + if(os.path.isfile(filename)): + print("Segment already downloaded.. skipping..") + else: + media = requests.get(url, stream=True) + media_length = int(media.headers.get("content-length")) + if media.status_code == 200: + if(os.path.isfile(filename) and os.path.getsize(filename) >= media_length): + print("Segment already downloaded.. skipping write to disk..") + else: + try: + pbar = tqdm(total=media_length, initial=0,unit='MB', unit_scale=True, desc=filename) + with open(f"{lecture_working_dir}\\{filename}", 'wb') as video_file: + for chunk in media.iter_content(chunk_size=1024): + if chunk: + video_file.write(chunk) + pbar.update(1024) + pbar.close() + print("Segment downloaded: " + filename) + return False #Successfully downloaded the file + except: + print("Connection error: Reattempting download of segment..") + download_media(filename,url, lecture_working_dir,epoch + 1) + + if os.path.getsize(filename) >= media_length: + pass + else: + print("Segment is faulty.. Redownloading...") + download_media(filename,url, lecture_working_dir,epoch + 1) + elif(media.status_code == 404): + print("Probably end hit!\n",url) + return True #Probably hit the last of the file + else: + if (epoch > retry): + exit("Error fetching segment, exceeded retry times.") + print("Error fetching segment file.. Redownloading...") + download_media(filename,url, lecture_working_dir,epoch + 1) + +""" +@author Jayapraveen +""" +def cleanup(path): + leftover_files = glob.glob(path + '/*.mp4', recursive=True) + mpd_files = glob.glob(path + '/*.mpd', recursive=True) + leftover_files = leftover_files + mpd_files + for file_list in leftover_files: + try: + os.remove(file_list) + except OSError: + print(f"Error deleting file: {file_list}") + +""" +@author Jayapraveen +""" +def mux_process(video_title,lecture_working_dir,outfile): + if os.name == "nt": + command = f"ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\"" + else: + command = f"nice -n 7 ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\"" + os.system(command) + +""" +@author Jayapraveen +""" +def decrypt(kid,filename,lecture_working_dir): + try: + key = keyfile[kid.lower()] + except KeyError as error: + exit("Key not found") + if(os.name == "nt"): + os.system(f"mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"") + else: + os.system(f"nice -n 7 mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"") + +""" +@author Jayapraveen +""" +def handle_irregular_segments(media_info,video_title,lecture_working_dir,output_path): + no_segment,video_url,video_init,video_extension,no_segment,audio_url,audio_init,audio_extension = media_info + download_media("video_0.seg.mp4",video_init,lecture_working_dir) + video_kid = extract_kid(f"{lecture_working_dir}\\video_0.seg.mp4") + print("KID for video file is: " + video_kid) + download_media("audio_0.seg.mp4",audio_init,lecture_working_dir) + audio_kid = extract_kid(f"{lecture_working_dir}\\audio_0.seg.mp4") + print("KID for audio file is: " + audio_kid) + for count in range(1,no_segment): + video_segment_url = video_url.replace("$Number$",str(count)) + audio_segment_url = audio_url.replace("$Number$",str(count)) + video_status = download_media(f"video_{str(count)}.seg.{video_extension}",video_segment_url,lecture_working_dir) + audio_status = download_media(f"audio_{str(count)}.seg.{audio_extension}",audio_segment_url,lecture_working_dir) + os.chdir(lecture_working_dir) + if(video_status): + if os.name == "nt": + video_concat_command = "copy /b " + "+".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " encrypted_video.mp4" + audio_concat_command = "copy /b " + "+".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " encrypted_audio.mp4" + else: + video_concat_command = "cat " + " ".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " > encrypted_video.mp4" + audio_concat_command = "cat " + " ".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " > encrypted_audio.mp4" + os.system(video_concat_command) + os.system(audio_concat_command) + decrypt(video_kid,"video",lecture_working_dir) + decrypt(audio_kid,"audio",lecture_working_dir) + os.chdir(home_dir) + mux_process(video_title,lecture_working_dir,output_path) + break + +""" +@author Jayapraveen +""" +def manifest_parser(mpd_url): + video = [] + audio = [] + manifest = requests.get(mpd_url).text + with open(f"{working_dir}\\manifest.mpd",'w') as manifest_handler: + manifest_handler.write(manifest) + mpd = MPEGDASHParser.parse(f"{working_dir}\\manifest.mpd") + running_time = durationtoseconds(mpd.media_presentation_duration) + for period in mpd.periods: + for adapt_set in period.adaptation_sets: + print("Processing " + adapt_set.mime_type) + content_type = adapt_set.mime_type + repr = adapt_set.representations[-1] # Max Quality + for segment in repr.segment_templates: + if(segment.duration): + print("Media segments are of equal timeframe") + segment_time = segment.duration / segment.timescale + total_segments = running_time / segment_time + else: + print("Media segments are of inequal timeframe") + + approx_no_segments = round(running_time / 6) + 10 # aproximate of 6 sec per segment + print("Expected No of segments:",approx_no_segments) + if(content_type == "audio/mp4"): + segment_extension = segment.media.split(".")[-1] + audio.append(approx_no_segments) + audio.append(segment.media) + audio.append(segment.initialization) + audio.append(segment_extension) + elif(content_type == "video/mp4"): + segment_extension = segment.media.split(".")[-1] + video.append(approx_no_segments) + video.append(segment.media) + video.append(segment.initialization) + video.append(segment_extension) + return video + audio + + + +""" +@author Puyodead1 +""" +def download(url, path, filename): + """ + @param: url to download file + @param: path place to put the file + @oaram: filename used for progress bar + """ + file_size = int(requests.head(url).headers["Content-Length"]) + if os.path.exists(path): + print("file exists") + first_byte = os.path.getsize(path) + else: + first_byte = 0 + if first_byte >= file_size: + return file_size + header = {"Range": "bytes=%s-%s" % (first_byte, file_size)} + pbar = tqdm( + total=file_size, initial=first_byte, + unit='MB', unit_scale=True, desc=filename) + req = requests.get(url, headers=header, stream=True) + with(open(path, 'ab')) as f: + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + pbar.update(1024) + pbar.close() + return file_size + +def parse(data): + chapters = [] + + for obj in data: + if obj["_class"] == "chapter": + obj["lectures"] = [] + chapters.append(obj) + elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video": + chapters[-1]["lectures"].append(obj) + + for chapter in chapters: + chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"]) + if not os.path.exists(chapter_dir): + os.mkdir(chapter_dir) + + for lecture in chapter["lectures"]: + lecture_title = lecture["title"] + lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title)) + lecture_asset = lecture["asset"] + if lecture_asset["media_license_token"] == None: + # not encrypted + lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index + download(lecture_url, lecture_path, lecture_title) + else: + # encrypted + print(f"Lecture %s has DRM, attempting to download" % lecture_title) + lecture_working_dir = "%s\%s" % (working_dir, lecture_asset["id"]) # set the folder to download ephemeral files + if not os.path.exists(lecture_working_dir): + os.mkdir(lecture_working_dir) + mpd_url = lecture_asset["media_sources"][1]["src"] # index 1 is the dash + base_url = mpd_url.split("index.mpd")[0] + media_info = manifest_parser(mpd_url) + handle_irregular_segments(media_info,lecture_title,lecture_working_dir,lecture_path) + cleanup(lecture_working_dir) + +r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer}) +if r.status_code == 200: + # loop + data = r.json() + parse(data["results"]) +else: + print("An error occurred while trying to fetch coure data!") + print(r.text) + +# with open("test_data.json", encoding="utf8") as f: +# data = json.loads(f.read())["results"] +# parse(data) \ No newline at end of file diff --git a/test_fetch.py b/test_fetch.py deleted file mode 100644 index 677e035..0000000 --- a/test_fetch.py +++ /dev/null @@ -1,82 +0,0 @@ -import requests -import json -import os -from sanitize_filename import sanitize -import urllib.request -from tqdm import tqdm -from dotenv import load_dotenv - -load_dotenv() - - -course_id = "657932" # the course id to download -bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization -header_bearer = "Bearer " + bearer_token -#r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer}) -# if r.status_code == 200: -# # loop -# data = r.json() -# for result in data: -# print(result) -# else: -# print("An error occurred while trying to fetch coure data!") -# print(r.text) - -download_dir = os.getcwd() + "\\out_dir" - -def download(url, path, filename): - """ - @param: url to download file - @param: path place to put the file - @oaram: filename used for progress bar - """ - file_size = int(requests.head(url).headers["Content-Length"]) - if os.path.exists(path): - print("file exists") - first_byte = os.path.getsize(path) - else: - first_byte = 0 - if first_byte >= file_size: - return file_size - header = {"Range": "bytes=%s-%s" % (first_byte, file_size)} - pbar = tqdm( - total=file_size, initial=first_byte, - unit='MB', unit_scale=True, desc=filename) - req = requests.get(url, headers=header, stream=True) - with(open(path, 'ab')) as f: - for chunk in req.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - pbar.update(1024) - pbar.close() - return file_size - -with open("test_data.json", encoding="utf8") as f: - data = json.loads(f.read())["results"] - - chapters = [] - - for obj in data: - if obj["_class"] == "chapter": - obj["lectures"] = [] - chapters.append(obj) - elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video": - chapters[-1]["lectures"].append(obj) - - for chapter in chapters: - chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"]) - if not os.path.isdir(chapter_dir): - os.mkdir(chapter_dir) - - for lecture in chapter["lectures"]: - lecture_title = lecture["title"] - lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title)) - lecture_asset = lecture["asset"] - if lecture_asset["media_license_token"] == None: - # not encrypted - lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index - download(lecture_url, lecture_path, lecture_title) - else: - # encrypted - print("drm") - pass \ No newline at end of file