download drm videos and non-drm videos, also added progress bar

This commit is contained in:
Puyodead1 2021-05-18 20:31:39 -04:00
parent ecb5a78498
commit 1d57644cdf
5 changed files with 342 additions and 104 deletions

View File

@ -1 +1,2 @@
UDEMY_BEARER=enter bearer token without the Bearer prefix UDEMY_BEARER=enter bearer token without the Bearer prefix
UDEMY_COURSE_ID=course id goes here

2
.gitignore vendored
View File

@ -116,3 +116,5 @@ dmypy.json
keyfile.json keyfile.json
.env .env
test_data.json test_data.json
out_dir
working_dir

View File

@ -1,37 +1,75 @@
# Udemy Downloader with DRM support # Udemy Downloader with DRM support
### NOTE # NOTE
This program is WIP, the code is provided as-is and i am not held resposible for any legal repercussions resulting from the use of this program. This program is WIP, the code is provided as-is and i am not held resposible for any legal repercussions resulting from the use of this program.
## Support # Support
if you want help using the program, join [my discord server](https://discord.gg/5B3XVb4RRX) or use [github issues](https://github.com/Puyodead1/udemy-downloader/issues) if you want help using the program, join [my discord server](https://discord.gg/5B3XVb4RRX) or use [github issues](https://github.com/Puyodead1/udemy-downloader/issues)
## License # License
All code is licensed under the MIT license All code is licensed under the MIT license
## Description # Description
Simple and hacky program to download a udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons). Simple and hacky program to download a udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons).
## Requirements # Requirements
1. You would need to download ffmpeg and mp4decrypter from Bento4 SDK and ensure they are in path(typing their name in cmd invokes them). 1. You would need to download ffmpeg and mp4decrypter from Bento4 SDK and ensure they are in path(typing their name in cmd invokes them).
## Usage # Usage
*quick and dirty how-to*
1. you need to open the network tab, and find the index.mpd file url _quick and dirty how-to_
![index mpd](https://i.imgur.com/MW78CAu.png)
2. open the `dashdownloader_multisegment.py` file and replace ``mpd url`` with the url You will need to get a few things before you can use this program:
![mpd url](https://i.imgur.com/YfGSPKd.png)
3. Change the video title and output path to whatever you want the video to be called - Decryption Key ID
![title](https://i.imgur.com/lymSmag.png) - Decryption Key
- ``175. Inverse Transforming Vectors`` is what your would replace - Udemy Course ID
4. rename ``keyfile.example.json`` to ``keyfile.json`` - Udemy Bearer Token
5. open ``keyfile.json`` and enter the key id and decryption key for the video
![keyfile example](https://i.imgur.com/naABWva.png) ### Setting up
- rename `.env.sample` to `.env`
- rename `keyfile.example.json` to `keyfile.json`
### Aquire bearer token
- open dev tools
- go to network tab
- in the search field, enter `api-2.0/courses`
![Valid udemy api requests](https://i.imgur.com/Or371l7.png)
- click a random request
- locate the `Request Headers` section
- copy the the text after `Authorization`, it should look like `Bearer xxxxxxxxxxx`
![bearer token example](https://i.imgur.com/FhQdwgD.png)
- enter this in the `.env` file after `UDEMY_BEARER=`
### Aquire Course ID
- Follow above before following this
- locate the request url field
![request url](https://i.imgur.com/EUIV3bk.png)
- copy the number after `/api-2.0/courses/` as seen highlighed in the above picture
- enter this in the `.env` file after `UDEMY_COURSE_ID=`
### Key ID and Key
It is up to you to aquire the key and key id.
- Enter the key and key id in the `keyfile.json`
![keyfile example](https://i.imgur.com/wLPsqOR.png)
![example key and kid from console](https://i.imgur.com/awgndZA.png) ![example key and kid from console](https://i.imgur.com/awgndZA.png)
6. run ``python dashdownloader_multisegment.py`` in the terminal to start the download.
- make sure you have ffmpeg and mp4decrypt installed in your path ### Start Downloading
You can now run `python main.py` to start downloading. The course will download to `out_dir`, chapters are seperated into folders.
# Credits # Credits
https://github.com/Jayapraveen/Drm-Dash-stream-downloader - for the original code which this is based on https://github.com/Jayapraveen/Drm-Dash-stream-downloader - for the original code which this is based on
https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction
https://github.com/alastairmccormack/pymp4parse/ - For code related to mp4 box parsing (used by pywvpssh) https://github.com/alastairmccormack/pymp4parse/ - For code related to mp4 box parsing (used by pywvpssh)

279
main.py Normal file
View File

@ -0,0 +1,279 @@
import os,requests,shutil,json,glob,urllib.request
from sanitize_filename import sanitize
import urllib.request
from tqdm import tqdm
from dotenv import load_dotenv
from mpegdash.parser import MPEGDASHParser
from mpegdash.nodes import Descriptor
from mpegdash.utils import (
parse_attr_value, parse_child_nodes, parse_node_value,
write_attr_value, write_child_node, write_node_value
)
from utils import extract_kid
load_dotenv()
course_id = os.getenv("UDEMY_COURSE_ID") # the course id to download
bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization
header_bearer = "Bearer " + bearer_token
download_dir = "%s\out_dir" % os.getcwd()
working_dir = "%s\working_dir" % os.getcwd() # set the folder to download segments for DRM videos
retry = 3
home_dir = os.getcwd();
keyfile_path = "%s\keyfile.json" % os.getcwd()
if not os.path.exists(working_dir):
os.makedirs(working_dir)
if not os.path.exists(download_dir):
os.makedirs(download_dir)
#Get the keys
with open(keyfile_path,'r') as keyfile:
keyfile = keyfile.read()
keyfile = json.loads(keyfile)
"""
@author Jayapraveen
"""
def durationtoseconds(period):
#Duration format in PTxDxHxMxS
if(period[:2] == "PT"):
period = period[2:]
day = int(period.split("D")[0] if 'D' in period else 0)
hour = int(period.split("H")[0].split("D")[-1] if 'H' in period else 0)
minute = int(period.split("M")[0].split("H")[-1] if 'M' in period else 0)
second = period.split("S")[0].split("M")[-1]
print("Total time: " + str(day) + " days " + str(hour) + " hours " + str(minute) + " minutes and " + str(second) + " seconds")
total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split('.')[0]))) + '.' + str(int(second.split('.')[-1])))
return total_time
else:
print("Duration Format Error")
return None
def download_media(filename,url,lecture_working_dir,epoch = 0):
if(os.path.isfile(filename)):
print("Segment already downloaded.. skipping..")
else:
media = requests.get(url, stream=True)
media_length = int(media.headers.get("content-length"))
if media.status_code == 200:
if(os.path.isfile(filename) and os.path.getsize(filename) >= media_length):
print("Segment already downloaded.. skipping write to disk..")
else:
try:
pbar = tqdm(total=media_length, initial=0,unit='MB', unit_scale=True, desc=filename)
with open(f"{lecture_working_dir}\\{filename}", 'wb') as video_file:
for chunk in media.iter_content(chunk_size=1024):
if chunk:
video_file.write(chunk)
pbar.update(1024)
pbar.close()
print("Segment downloaded: " + filename)
return False #Successfully downloaded the file
except:
print("Connection error: Reattempting download of segment..")
download_media(filename,url, lecture_working_dir,epoch + 1)
if os.path.getsize(filename) >= media_length:
pass
else:
print("Segment is faulty.. Redownloading...")
download_media(filename,url, lecture_working_dir,epoch + 1)
elif(media.status_code == 404):
print("Probably end hit!\n",url)
return True #Probably hit the last of the file
else:
if (epoch > retry):
exit("Error fetching segment, exceeded retry times.")
print("Error fetching segment file.. Redownloading...")
download_media(filename,url, lecture_working_dir,epoch + 1)
"""
@author Jayapraveen
"""
def cleanup(path):
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
mpd_files = glob.glob(path + '/*.mpd', recursive=True)
leftover_files = leftover_files + mpd_files
for file_list in leftover_files:
try:
os.remove(file_list)
except OSError:
print(f"Error deleting file: {file_list}")
"""
@author Jayapraveen
"""
def mux_process(video_title,lecture_working_dir,outfile):
if os.name == "nt":
command = f"ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
else:
command = f"nice -n 7 ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
os.system(command)
"""
@author Jayapraveen
"""
def decrypt(kid,filename,lecture_working_dir):
try:
key = keyfile[kid.lower()]
except KeyError as error:
exit("Key not found")
if(os.name == "nt"):
os.system(f"mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"")
else:
os.system(f"nice -n 7 mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"")
"""
@author Jayapraveen
"""
def handle_irregular_segments(media_info,video_title,lecture_working_dir,output_path):
no_segment,video_url,video_init,video_extension,no_segment,audio_url,audio_init,audio_extension = media_info
download_media("video_0.seg.mp4",video_init,lecture_working_dir)
video_kid = extract_kid(f"{lecture_working_dir}\\video_0.seg.mp4")
print("KID for video file is: " + video_kid)
download_media("audio_0.seg.mp4",audio_init,lecture_working_dir)
audio_kid = extract_kid(f"{lecture_working_dir}\\audio_0.seg.mp4")
print("KID for audio file is: " + audio_kid)
for count in range(1,no_segment):
video_segment_url = video_url.replace("$Number$",str(count))
audio_segment_url = audio_url.replace("$Number$",str(count))
video_status = download_media(f"video_{str(count)}.seg.{video_extension}",video_segment_url,lecture_working_dir)
audio_status = download_media(f"audio_{str(count)}.seg.{audio_extension}",audio_segment_url,lecture_working_dir)
os.chdir(lecture_working_dir)
if(video_status):
if os.name == "nt":
video_concat_command = "copy /b " + "+".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " encrypted_video.mp4"
audio_concat_command = "copy /b " + "+".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " encrypted_audio.mp4"
else:
video_concat_command = "cat " + " ".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " > encrypted_video.mp4"
audio_concat_command = "cat " + " ".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " > encrypted_audio.mp4"
os.system(video_concat_command)
os.system(audio_concat_command)
decrypt(video_kid,"video",lecture_working_dir)
decrypt(audio_kid,"audio",lecture_working_dir)
os.chdir(home_dir)
mux_process(video_title,lecture_working_dir,output_path)
break
"""
@author Jayapraveen
"""
def manifest_parser(mpd_url):
video = []
audio = []
manifest = requests.get(mpd_url).text
with open(f"{working_dir}\\manifest.mpd",'w') as manifest_handler:
manifest_handler.write(manifest)
mpd = MPEGDASHParser.parse(f"{working_dir}\\manifest.mpd")
running_time = durationtoseconds(mpd.media_presentation_duration)
for period in mpd.periods:
for adapt_set in period.adaptation_sets:
print("Processing " + adapt_set.mime_type)
content_type = adapt_set.mime_type
repr = adapt_set.representations[-1] # Max Quality
for segment in repr.segment_templates:
if(segment.duration):
print("Media segments are of equal timeframe")
segment_time = segment.duration / segment.timescale
total_segments = running_time / segment_time
else:
print("Media segments are of inequal timeframe")
approx_no_segments = round(running_time / 6) + 10 # aproximate of 6 sec per segment
print("Expected No of segments:",approx_no_segments)
if(content_type == "audio/mp4"):
segment_extension = segment.media.split(".")[-1]
audio.append(approx_no_segments)
audio.append(segment.media)
audio.append(segment.initialization)
audio.append(segment_extension)
elif(content_type == "video/mp4"):
segment_extension = segment.media.split(".")[-1]
video.append(approx_no_segments)
video.append(segment.media)
video.append(segment.initialization)
video.append(segment_extension)
return video + audio
"""
@author Puyodead1
"""
def download(url, path, filename):
"""
@param: url to download file
@param: path place to put the file
@oaram: filename used for progress bar
"""
file_size = int(requests.head(url).headers["Content-Length"])
if os.path.exists(path):
print("file exists")
first_byte = os.path.getsize(path)
else:
first_byte = 0
if first_byte >= file_size:
return file_size
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
pbar = tqdm(
total=file_size, initial=first_byte,
unit='MB', unit_scale=True, desc=filename)
req = requests.get(url, headers=header, stream=True)
with(open(path, 'ab')) as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(1024)
pbar.close()
return file_size
def parse(data):
chapters = []
for obj in data:
if obj["_class"] == "chapter":
obj["lectures"] = []
chapters.append(obj)
elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video":
chapters[-1]["lectures"].append(obj)
for chapter in chapters:
chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"])
if not os.path.exists(chapter_dir):
os.mkdir(chapter_dir)
for lecture in chapter["lectures"]:
lecture_title = lecture["title"]
lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title))
lecture_asset = lecture["asset"]
if lecture_asset["media_license_token"] == None:
# not encrypted
lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index
download(lecture_url, lecture_path, lecture_title)
else:
# encrypted
print(f"Lecture %s has DRM, attempting to download" % lecture_title)
lecture_working_dir = "%s\%s" % (working_dir, lecture_asset["id"]) # set the folder to download ephemeral files
if not os.path.exists(lecture_working_dir):
os.mkdir(lecture_working_dir)
mpd_url = lecture_asset["media_sources"][1]["src"] # index 1 is the dash
base_url = mpd_url.split("index.mpd")[0]
media_info = manifest_parser(mpd_url)
handle_irregular_segments(media_info,lecture_title,lecture_working_dir,lecture_path)
cleanup(lecture_working_dir)
r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer})
if r.status_code == 200:
# loop
data = r.json()
parse(data["results"])
else:
print("An error occurred while trying to fetch coure data!")
print(r.text)
# with open("test_data.json", encoding="utf8") as f:
# data = json.loads(f.read())["results"]
# parse(data)

View File

@ -1,82 +0,0 @@
import requests
import json
import os
from sanitize_filename import sanitize
import urllib.request
from tqdm import tqdm
from dotenv import load_dotenv
load_dotenv()
course_id = "657932" # the course id to download
bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization
header_bearer = "Bearer " + bearer_token
#r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer})
# if r.status_code == 200:
# # loop
# data = r.json()
# for result in data:
# print(result)
# else:
# print("An error occurred while trying to fetch coure data!")
# print(r.text)
download_dir = os.getcwd() + "\\out_dir"
def download(url, path, filename):
"""
@param: url to download file
@param: path place to put the file
@oaram: filename used for progress bar
"""
file_size = int(requests.head(url).headers["Content-Length"])
if os.path.exists(path):
print("file exists")
first_byte = os.path.getsize(path)
else:
first_byte = 0
if first_byte >= file_size:
return file_size
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
pbar = tqdm(
total=file_size, initial=first_byte,
unit='MB', unit_scale=True, desc=filename)
req = requests.get(url, headers=header, stream=True)
with(open(path, 'ab')) as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(1024)
pbar.close()
return file_size
with open("test_data.json", encoding="utf8") as f:
data = json.loads(f.read())["results"]
chapters = []
for obj in data:
if obj["_class"] == "chapter":
obj["lectures"] = []
chapters.append(obj)
elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video":
chapters[-1]["lectures"].append(obj)
for chapter in chapters:
chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"])
if not os.path.isdir(chapter_dir):
os.mkdir(chapter_dir)
for lecture in chapter["lectures"]:
lecture_title = lecture["title"]
lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title))
lecture_asset = lecture["asset"]
if lecture_asset["media_license_token"] == None:
# not encrypted
lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index
download(lecture_url, lecture_path, lecture_title)
else:
# encrypted
print("drm")
pass