download drm videos and non-drm videos, also added progress bar

2025-04-30 02:34:25 +02:00 · 2021-05-18 20:31:39 -04:00 · 2021-05-18 20:31:39 -04:00 · 1d57644cdf
commit 1d57644cdf
parent ecb5a78498
5 changed files with 342 additions and 104 deletions
--- a/.env.sample
+++ b/.env.sample
@ -1 +1,2 @@
-UDEMY_BEARER=enter bearer token without the Bearer prefix
+UDEMY_BEARER=enter bearer token without the Bearer prefix
+UDEMY_COURSE_ID=course id goes here
--- a/.gitignore
+++ b/.gitignore
@ -115,4 +115,6 @@ dmypy.json
 *.mp4
 keyfile.json
 .env
-test_data.json
+test_data.json
+out_dir
+working_dir
--- a/README.md
+++ b/README.md
@ -1,37 +1,75 @@
 # Udemy Downloader with DRM support

-### NOTE
+# NOTE
+
 This program is WIP, the code is provided as-is and i am not held resposible for any legal repercussions resulting from the use of this program.

-## Support
+# Support
+
 if you want help using the program, join [my discord server](https://discord.gg/5B3XVb4RRX) or use [github issues](https://github.com/Puyodead1/udemy-downloader/issues)

-## License
+# License
+
 All code is licensed under the MIT license

-## Description
+# Description
+
 Simple and hacky program to download a udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons).

-## Requirements
+# Requirements
+
 1. You would need to download ffmpeg and mp4decrypter from Bento4 SDK and ensure they are in path(typing their name in cmd invokes them).

-## Usage
-*quick and dirty how-to*
-1. you need to open the network tab, and find the index.mpd file url
-![index mpd](https://i.imgur.com/MW78CAu.png)
-2. open the `dashdownloader_multisegment.py` file and replace ``mpd url`` with the url
-![mpd url](https://i.imgur.com/YfGSPKd.png)
-3. Change the video title and output path to whatever you want the video to be called
-![title](https://i.imgur.com/lymSmag.png)
- ``175. Inverse Transforming Vectors`` is what your would replace
-4. rename ``keyfile.example.json`` to ``keyfile.json``
-5. open ``keyfile.json`` and enter the key id and decryption key for the video
-![keyfile example](https://i.imgur.com/naABWva.png)
-![example key and kid from console](https://i.imgur.com/awgndZA.png)
-6. run ``python dashdownloader_multisegment.py`` in the terminal to start the download.
- make sure you have ffmpeg and mp4decrypt installed in your path
+# Usage
+
+_quick and dirty how-to_
+
+You will need to get a few things before you can use this program:
+
+- Decryption Key ID
+- Decryption Key
+- Udemy Course ID
+- Udemy Bearer Token
+
+### Setting up
+
+- rename `.env.sample` to `.env`
+- rename `keyfile.example.json` to `keyfile.json`
+
+### Aquire bearer token
+
+- open dev tools
+- go to network tab
+- in the search field, enter `api-2.0/courses`
+  ![Valid udemy api requests](https://i.imgur.com/Or371l7.png)
+- click a random request
+- locate the `Request Headers` section
+- copy the the text after `Authorization`, it should look like `Bearer xxxxxxxxxxx`
+  ![bearer token example](https://i.imgur.com/FhQdwgD.png)
+- enter this in the `.env` file after `UDEMY_BEARER=`
+
+### Aquire Course ID
+
+- Follow above before following this
+- locate the request url field
+  ![request url](https://i.imgur.com/EUIV3bk.png)
+- copy the number after `/api-2.0/courses/` as seen highlighed in the above picture
+- enter this in the `.env` file after `UDEMY_COURSE_ID=`
+
+### Key ID and Key
+
+It is up to you to aquire the key and key id.
+
+- Enter the key and key id in the `keyfile.json`
+  ![keyfile example](https://i.imgur.com/wLPsqOR.png)
+  ![example key and kid from console](https://i.imgur.com/awgndZA.png)
+
+### Start Downloading
+
+You can now run `python main.py` to start downloading. The course will download to `out_dir`, chapters are seperated into folders.

 # Credits
+
 https://github.com/Jayapraveen/Drm-Dash-stream-downloader - for the original code which this is based on
 https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction
 https://github.com/alastairmccormack/pymp4parse/ - For code related to mp4 box parsing (used by pywvpssh)
--- a/main.py
+++ b/main.py
@ -0,0 +1,279 @@
+import os,requests,shutil,json,glob,urllib.request
+from sanitize_filename import sanitize
+import urllib.request
+from tqdm import tqdm
+from dotenv import load_dotenv
+from mpegdash.parser import MPEGDASHParser
+from mpegdash.nodes import Descriptor
+from mpegdash.utils import (
+    parse_attr_value, parse_child_nodes, parse_node_value,
+    write_attr_value, write_child_node, write_node_value
+)
+from utils import extract_kid
+
+load_dotenv()
+
+course_id = os.getenv("UDEMY_COURSE_ID") # the course id to download
+bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization
+header_bearer = "Bearer " + bearer_token
+download_dir = "%s\out_dir" % os.getcwd()
+working_dir = "%s\working_dir" % os.getcwd() # set the folder to download segments for DRM videos
+retry = 3
+home_dir = os.getcwd();
+keyfile_path = "%s\keyfile.json" % os.getcwd()
+
+if not os.path.exists(working_dir):
+    os.makedirs(working_dir)
+
+if not os.path.exists(download_dir):
+    os.makedirs(download_dir)
+
+#Get the keys
+with open(keyfile_path,'r') as keyfile:
+    keyfile = keyfile.read()
+keyfile = json.loads(keyfile)
+
+"""
+@author Jayapraveen
+"""
+def durationtoseconds(period):
+    #Duration format in PTxDxHxMxS
+    if(period[:2] == "PT"):
+        period = period[2:]   
+        day = int(period.split("D")[0] if 'D' in period else 0)
+        hour = int(period.split("H")[0].split("D")[-1]  if 'H' in period else 0)
+        minute = int(period.split("M")[0].split("H")[-1] if 'M' in period else 0)
+        second = period.split("S")[0].split("M")[-1]
+        print("Total time: " + str(day) + " days " + str(hour) + " hours " + str(minute) + " minutes and " + str(second) + " seconds")
+        total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split('.')[0]))) + '.' + str(int(second.split('.')[-1])))
+        return total_time
+
+    else:
+        print("Duration Format Error")
+        return None
+
+def download_media(filename,url,lecture_working_dir,epoch = 0):
+    if(os.path.isfile(filename)):
+        print("Segment already downloaded.. skipping..")
+    else:
+        media = requests.get(url, stream=True)
+        media_length = int(media.headers.get("content-length"))
+        if media.status_code == 200:
+            if(os.path.isfile(filename) and os.path.getsize(filename) >= media_length):
+                print("Segment already downloaded.. skipping write to disk..")
+            else:
+                try:
+                    pbar = tqdm(total=media_length, initial=0,unit='MB', unit_scale=True, desc=filename)
+                    with open(f"{lecture_working_dir}\\{filename}", 'wb') as video_file:
+                        for chunk in media.iter_content(chunk_size=1024):
+                            if chunk:
+                                video_file.write(chunk)
+                                pbar.update(1024)
+                    pbar.close()
+                    print("Segment downloaded: " + filename)
+                    return False #Successfully downloaded the file
+                except:
+                    print("Connection error: Reattempting download of segment..")
+                    download_media(filename,url, lecture_working_dir,epoch + 1)
+
+            if os.path.getsize(filename) >= media_length:
+                pass
+            else:
+                print("Segment is faulty.. Redownloading...")
+                download_media(filename,url, lecture_working_dir,epoch + 1)
+        elif(media.status_code == 404):
+            print("Probably end hit!\n",url)
+            return True #Probably hit the last of the file
+        else:
+            if (epoch > retry):
+                exit("Error fetching segment, exceeded retry times.")
+            print("Error fetching segment file.. Redownloading...")
+            download_media(filename,url, lecture_working_dir,epoch + 1)
+
+"""
+@author Jayapraveen
+"""
+def cleanup(path):
+    leftover_files = glob.glob(path + '/*.mp4', recursive=True)
+    mpd_files = glob.glob(path + '/*.mpd', recursive=True)
+    leftover_files = leftover_files + mpd_files
+    for file_list in leftover_files:
+        try:
+            os.remove(file_list)
+        except OSError:
+            print(f"Error deleting file: {file_list}")
+
+"""
+@author Jayapraveen
+"""
+def mux_process(video_title,lecture_working_dir,outfile):
+    if os.name == "nt":
+        command = f"ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
+    else:
+        command = f"nice -n 7 ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
+    os.system(command)
+
+"""
+@author Jayapraveen
+"""
+def decrypt(kid,filename,lecture_working_dir):
+    try:
+        key = keyfile[kid.lower()]
+    except KeyError as error:
+        exit("Key not found")
+    if(os.name == "nt"):
+        os.system(f"mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"")
+    else:
+        os.system(f"nice -n 7 mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"")
+
+"""
+@author Jayapraveen
+"""
+def handle_irregular_segments(media_info,video_title,lecture_working_dir,output_path):
+    no_segment,video_url,video_init,video_extension,no_segment,audio_url,audio_init,audio_extension = media_info
+    download_media("video_0.seg.mp4",video_init,lecture_working_dir)
+    video_kid = extract_kid(f"{lecture_working_dir}\\video_0.seg.mp4")
+    print("KID for video file is: " + video_kid)
+    download_media("audio_0.seg.mp4",audio_init,lecture_working_dir)
+    audio_kid = extract_kid(f"{lecture_working_dir}\\audio_0.seg.mp4")
+    print("KID for audio file is: " + audio_kid)
+    for count in range(1,no_segment):
+        video_segment_url = video_url.replace("$Number$",str(count))
+        audio_segment_url = audio_url.replace("$Number$",str(count))
+        video_status = download_media(f"video_{str(count)}.seg.{video_extension}",video_segment_url,lecture_working_dir)   
+        audio_status = download_media(f"audio_{str(count)}.seg.{audio_extension}",audio_segment_url,lecture_working_dir)
+        os.chdir(lecture_working_dir)
+        if(video_status):
+            if os.name == "nt":
+                video_concat_command = "copy /b " + "+".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " encrypted_video.mp4"
+                audio_concat_command = "copy /b " + "+".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " encrypted_audio.mp4"
+            else:
+                video_concat_command = "cat " + " ".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " > encrypted_video.mp4"
+                audio_concat_command = "cat " + " ".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " > encrypted_audio.mp4"
+            os.system(video_concat_command)
+            os.system(audio_concat_command)
+            decrypt(video_kid,"video",lecture_working_dir)
+            decrypt(audio_kid,"audio",lecture_working_dir)
+            os.chdir(home_dir)
+            mux_process(video_title,lecture_working_dir,output_path)
+            break
+    
+"""
+@author Jayapraveen
+"""
+def manifest_parser(mpd_url):
+    video = []
+    audio = []
+    manifest = requests.get(mpd_url).text
+    with open(f"{working_dir}\\manifest.mpd",'w') as manifest_handler:
+        manifest_handler.write(manifest)
+    mpd = MPEGDASHParser.parse(f"{working_dir}\\manifest.mpd")
+    running_time = durationtoseconds(mpd.media_presentation_duration)
+    for period in mpd.periods:
+        for adapt_set in period.adaptation_sets:
+            print("Processing " + adapt_set.mime_type)
+            content_type = adapt_set.mime_type
+            repr = adapt_set.representations[-1] # Max Quality
+            for segment in repr.segment_templates:
+                if(segment.duration):
+                    print("Media segments are of equal timeframe")
+                    segment_time = segment.duration / segment.timescale
+                    total_segments = running_time / segment_time
+                else:
+                    print("Media segments are of inequal timeframe")
+                    
+                    approx_no_segments = round(running_time / 6) + 10 # aproximate of 6 sec per segment
+                    print("Expected No of segments:",approx_no_segments)
+                    if(content_type == "audio/mp4"):
+                        segment_extension = segment.media.split(".")[-1]
+                        audio.append(approx_no_segments)
+                        audio.append(segment.media)
+                        audio.append(segment.initialization)
+                        audio.append(segment_extension)
+                    elif(content_type == "video/mp4"):
+                        segment_extension = segment.media.split(".")[-1]
+                        video.append(approx_no_segments)
+                        video.append(segment.media)
+                        video.append(segment.initialization)
+                        video.append(segment_extension)
+    return video + audio
+
+
+
+"""
+@author Puyodead1
+"""
+def download(url, path, filename):
+    """
+    @param: url to download file
+    @param: path place to put the file
+    @oaram: filename used for progress bar
+    """
+    file_size = int(requests.head(url).headers["Content-Length"])
+    if os.path.exists(path):
+        print("file exists")
+        first_byte = os.path.getsize(path)
+    else:
+        first_byte = 0
+    if first_byte >= file_size:
+        return file_size
+    header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
+    pbar = tqdm(
+        total=file_size, initial=first_byte,
+        unit='MB', unit_scale=True, desc=filename)
+    req = requests.get(url, headers=header, stream=True)
+    with(open(path, 'ab')) as f:
+        for chunk in req.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+                pbar.update(1024)
+    pbar.close()
+    return file_size
+
+def parse(data):
+    chapters = []
+
+    for obj in data:
+        if obj["_class"] == "chapter":
+            obj["lectures"] = []
+            chapters.append(obj)
+        elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video":
+            chapters[-1]["lectures"].append(obj)
+    
+    for chapter in chapters:
+        chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"])
+        if not os.path.exists(chapter_dir):
+            os.mkdir(chapter_dir)
+
+        for lecture in chapter["lectures"]:
+            lecture_title = lecture["title"]
+            lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title))
+            lecture_asset = lecture["asset"]
+            if lecture_asset["media_license_token"] == None:
+                # not encrypted
+                lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index
+                download(lecture_url, lecture_path, lecture_title)
+            else:
+                # encrypted
+                print(f"Lecture %s has DRM, attempting to download" % lecture_title)
+                lecture_working_dir = "%s\%s" % (working_dir, lecture_asset["id"]) # set the folder to download ephemeral files
+                if not os.path.exists(lecture_working_dir):
+                    os.mkdir(lecture_working_dir)
+                mpd_url = lecture_asset["media_sources"][1]["src"] # index 1 is the dash
+                base_url = mpd_url.split("index.mpd")[0]
+                media_info = manifest_parser(mpd_url)
+                handle_irregular_segments(media_info,lecture_title,lecture_working_dir,lecture_path)
+                cleanup(lecture_working_dir)
+
+r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer})
+if r.status_code == 200:
+    # loop
+    data = r.json()
+    parse(data["results"])
+else:
+    print("An error occurred while trying to fetch coure data!")
+    print(r.text)
+
+# with open("test_data.json", encoding="utf8") as f:
+#     data = json.loads(f.read())["results"]
+#     parse(data)
--- a/test_fetch.py
+++ b/test_fetch.py
@ -1,82 +0,0 @@
-import requests
-import json
-import os
-from sanitize_filename import sanitize
-import urllib.request
-from tqdm import tqdm
-from dotenv import load_dotenv
-
-load_dotenv()
-
-
-course_id = "657932" # the course id to download
-bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization
-header_bearer = "Bearer " + bearer_token
-#r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer})
-# if r.status_code == 200:
-#     # loop
-#     data = r.json()
-#     for result in data:
-#         print(result)
-# else:
-#     print("An error occurred while trying to fetch coure data!")
-#     print(r.text)
-
-download_dir = os.getcwd() + "\\out_dir"
-
-def download(url, path, filename):
-    """
-    @param: url to download file
-    @param: path place to put the file
-    @oaram: filename used for progress bar
-    """
-    file_size = int(requests.head(url).headers["Content-Length"])
-    if os.path.exists(path):
-        print("file exists")
-        first_byte = os.path.getsize(path)
-    else:
-        first_byte = 0
-    if first_byte >= file_size:
-        return file_size
-    header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
-    pbar = tqdm(
-        total=file_size, initial=first_byte,
-        unit='MB', unit_scale=True, desc=filename)
-    req = requests.get(url, headers=header, stream=True)
-    with(open(path, 'ab')) as f:
-        for chunk in req.iter_content(chunk_size=1024):
-            if chunk:
-                f.write(chunk)
-                pbar.update(1024)
-    pbar.close()
-    return file_size
-
-with open("test_data.json", encoding="utf8") as f:
-    data = json.loads(f.read())["results"]
-
-    chapters = []
-
-    for obj in data:
-        if obj["_class"] == "chapter":
-            obj["lectures"] = []
-            chapters.append(obj)
-        elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video":
-            chapters[-1]["lectures"].append(obj)
-    
-    for chapter in chapters:
-        chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"])
-        if not os.path.isdir(chapter_dir):
-            os.mkdir(chapter_dir)
-
-        for lecture in chapter["lectures"]:
-            lecture_title = lecture["title"]
-            lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title))
-            lecture_asset = lecture["asset"]
-            if lecture_asset["media_license_token"] == None:
-                # not encrypted
-                lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index
-                download(lecture_url, lecture_path, lecture_title)
-            else:
-                # encrypted
-                print("drm")
-                pass