mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-04-30 02:34:25 +02:00
download drm videos and non-drm videos, also added progress bar
This commit is contained in:
parent
ecb5a78498
commit
1d57644cdf
@ -1 +1,2 @@
|
||||
UDEMY_BEARER=enter bearer token without the Bearer prefix
|
||||
UDEMY_BEARER=enter bearer token without the Bearer prefix
|
||||
UDEMY_COURSE_ID=course id goes here
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -115,4 +115,6 @@ dmypy.json
|
||||
*.mp4
|
||||
keyfile.json
|
||||
.env
|
||||
test_data.json
|
||||
test_data.json
|
||||
out_dir
|
||||
working_dir
|
78
README.md
78
README.md
@ -1,37 +1,75 @@
|
||||
# Udemy Downloader with DRM support
|
||||
|
||||
### NOTE
|
||||
# NOTE
|
||||
|
||||
This program is WIP, the code is provided as-is and i am not held resposible for any legal repercussions resulting from the use of this program.
|
||||
|
||||
## Support
|
||||
# Support
|
||||
|
||||
if you want help using the program, join [my discord server](https://discord.gg/5B3XVb4RRX) or use [github issues](https://github.com/Puyodead1/udemy-downloader/issues)
|
||||
|
||||
## License
|
||||
# License
|
||||
|
||||
All code is licensed under the MIT license
|
||||
|
||||
## Description
|
||||
# Description
|
||||
|
||||
Simple and hacky program to download a udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons).
|
||||
|
||||
## Requirements
|
||||
# Requirements
|
||||
|
||||
1. You would need to download ffmpeg and mp4decrypter from Bento4 SDK and ensure they are in path(typing their name in cmd invokes them).
|
||||
|
||||
## Usage
|
||||
*quick and dirty how-to*
|
||||
1. you need to open the network tab, and find the index.mpd file url
|
||||

|
||||
2. open the `dashdownloader_multisegment.py` file and replace ``mpd url`` with the url
|
||||

|
||||
3. Change the video title and output path to whatever you want the video to be called
|
||||

|
||||
- ``175. Inverse Transforming Vectors`` is what your would replace
|
||||
4. rename ``keyfile.example.json`` to ``keyfile.json``
|
||||
5. open ``keyfile.json`` and enter the key id and decryption key for the video
|
||||

|
||||

|
||||
6. run ``python dashdownloader_multisegment.py`` in the terminal to start the download.
|
||||
- make sure you have ffmpeg and mp4decrypt installed in your path
|
||||
# Usage
|
||||
|
||||
_quick and dirty how-to_
|
||||
|
||||
You will need to get a few things before you can use this program:
|
||||
|
||||
- Decryption Key ID
|
||||
- Decryption Key
|
||||
- Udemy Course ID
|
||||
- Udemy Bearer Token
|
||||
|
||||
### Setting up
|
||||
|
||||
- rename `.env.sample` to `.env`
|
||||
- rename `keyfile.example.json` to `keyfile.json`
|
||||
|
||||
### Aquire bearer token
|
||||
|
||||
- open dev tools
|
||||
- go to network tab
|
||||
- in the search field, enter `api-2.0/courses`
|
||||

|
||||
- click a random request
|
||||
- locate the `Request Headers` section
|
||||
- copy the the text after `Authorization`, it should look like `Bearer xxxxxxxxxxx`
|
||||

|
||||
- enter this in the `.env` file after `UDEMY_BEARER=`
|
||||
|
||||
### Aquire Course ID
|
||||
|
||||
- Follow above before following this
|
||||
- locate the request url field
|
||||

|
||||
- copy the number after `/api-2.0/courses/` as seen highlighed in the above picture
|
||||
- enter this in the `.env` file after `UDEMY_COURSE_ID=`
|
||||
|
||||
### Key ID and Key
|
||||
|
||||
It is up to you to aquire the key and key id.
|
||||
|
||||
- Enter the key and key id in the `keyfile.json`
|
||||

|
||||

|
||||
|
||||
### Start Downloading
|
||||
|
||||
You can now run `python main.py` to start downloading. The course will download to `out_dir`, chapters are seperated into folders.
|
||||
|
||||
# Credits
|
||||
|
||||
https://github.com/Jayapraveen/Drm-Dash-stream-downloader - for the original code which this is based on
|
||||
https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction
|
||||
https://github.com/alastairmccormack/pymp4parse/ - For code related to mp4 box parsing (used by pywvpssh)
|
||||
|
279
main.py
Normal file
279
main.py
Normal file
@ -0,0 +1,279 @@
|
||||
import os,requests,shutil,json,glob,urllib.request
|
||||
from sanitize_filename import sanitize
|
||||
import urllib.request
|
||||
from tqdm import tqdm
|
||||
from dotenv import load_dotenv
|
||||
from mpegdash.parser import MPEGDASHParser
|
||||
from mpegdash.nodes import Descriptor
|
||||
from mpegdash.utils import (
|
||||
parse_attr_value, parse_child_nodes, parse_node_value,
|
||||
write_attr_value, write_child_node, write_node_value
|
||||
)
|
||||
from utils import extract_kid
|
||||
|
||||
load_dotenv()
|
||||
|
||||
course_id = os.getenv("UDEMY_COURSE_ID") # the course id to download
|
||||
bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization
|
||||
header_bearer = "Bearer " + bearer_token
|
||||
download_dir = "%s\out_dir" % os.getcwd()
|
||||
working_dir = "%s\working_dir" % os.getcwd() # set the folder to download segments for DRM videos
|
||||
retry = 3
|
||||
home_dir = os.getcwd();
|
||||
keyfile_path = "%s\keyfile.json" % os.getcwd()
|
||||
|
||||
if not os.path.exists(working_dir):
|
||||
os.makedirs(working_dir)
|
||||
|
||||
if not os.path.exists(download_dir):
|
||||
os.makedirs(download_dir)
|
||||
|
||||
#Get the keys
|
||||
with open(keyfile_path,'r') as keyfile:
|
||||
keyfile = keyfile.read()
|
||||
keyfile = json.loads(keyfile)
|
||||
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
def durationtoseconds(period):
|
||||
#Duration format in PTxDxHxMxS
|
||||
if(period[:2] == "PT"):
|
||||
period = period[2:]
|
||||
day = int(period.split("D")[0] if 'D' in period else 0)
|
||||
hour = int(period.split("H")[0].split("D")[-1] if 'H' in period else 0)
|
||||
minute = int(period.split("M")[0].split("H")[-1] if 'M' in period else 0)
|
||||
second = period.split("S")[0].split("M")[-1]
|
||||
print("Total time: " + str(day) + " days " + str(hour) + " hours " + str(minute) + " minutes and " + str(second) + " seconds")
|
||||
total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split('.')[0]))) + '.' + str(int(second.split('.')[-1])))
|
||||
return total_time
|
||||
|
||||
else:
|
||||
print("Duration Format Error")
|
||||
return None
|
||||
|
||||
def download_media(filename,url,lecture_working_dir,epoch = 0):
|
||||
if(os.path.isfile(filename)):
|
||||
print("Segment already downloaded.. skipping..")
|
||||
else:
|
||||
media = requests.get(url, stream=True)
|
||||
media_length = int(media.headers.get("content-length"))
|
||||
if media.status_code == 200:
|
||||
if(os.path.isfile(filename) and os.path.getsize(filename) >= media_length):
|
||||
print("Segment already downloaded.. skipping write to disk..")
|
||||
else:
|
||||
try:
|
||||
pbar = tqdm(total=media_length, initial=0,unit='MB', unit_scale=True, desc=filename)
|
||||
with open(f"{lecture_working_dir}\\{filename}", 'wb') as video_file:
|
||||
for chunk in media.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
video_file.write(chunk)
|
||||
pbar.update(1024)
|
||||
pbar.close()
|
||||
print("Segment downloaded: " + filename)
|
||||
return False #Successfully downloaded the file
|
||||
except:
|
||||
print("Connection error: Reattempting download of segment..")
|
||||
download_media(filename,url, lecture_working_dir,epoch + 1)
|
||||
|
||||
if os.path.getsize(filename) >= media_length:
|
||||
pass
|
||||
else:
|
||||
print("Segment is faulty.. Redownloading...")
|
||||
download_media(filename,url, lecture_working_dir,epoch + 1)
|
||||
elif(media.status_code == 404):
|
||||
print("Probably end hit!\n",url)
|
||||
return True #Probably hit the last of the file
|
||||
else:
|
||||
if (epoch > retry):
|
||||
exit("Error fetching segment, exceeded retry times.")
|
||||
print("Error fetching segment file.. Redownloading...")
|
||||
download_media(filename,url, lecture_working_dir,epoch + 1)
|
||||
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
def cleanup(path):
|
||||
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
|
||||
mpd_files = glob.glob(path + '/*.mpd', recursive=True)
|
||||
leftover_files = leftover_files + mpd_files
|
||||
for file_list in leftover_files:
|
||||
try:
|
||||
os.remove(file_list)
|
||||
except OSError:
|
||||
print(f"Error deleting file: {file_list}")
|
||||
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
def mux_process(video_title,lecture_working_dir,outfile):
|
||||
if os.name == "nt":
|
||||
command = f"ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
|
||||
else:
|
||||
command = f"nice -n 7 ffmpeg -y -i \"{lecture_working_dir}\\decrypted_audio.mp4\" -i \"{lecture_working_dir}\\decrypted_video.mp4\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
|
||||
os.system(command)
|
||||
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
def decrypt(kid,filename,lecture_working_dir):
|
||||
try:
|
||||
key = keyfile[kid.lower()]
|
||||
except KeyError as error:
|
||||
exit("Key not found")
|
||||
if(os.name == "nt"):
|
||||
os.system(f"mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"")
|
||||
else:
|
||||
os.system(f"nice -n 7 mp4decrypt --key 1:{key} \"{lecture_working_dir}\\encrypted_{filename}.mp4\" \"{lecture_working_dir}\\decrypted_{filename}.mp4\"")
|
||||
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
def handle_irregular_segments(media_info,video_title,lecture_working_dir,output_path):
|
||||
no_segment,video_url,video_init,video_extension,no_segment,audio_url,audio_init,audio_extension = media_info
|
||||
download_media("video_0.seg.mp4",video_init,lecture_working_dir)
|
||||
video_kid = extract_kid(f"{lecture_working_dir}\\video_0.seg.mp4")
|
||||
print("KID for video file is: " + video_kid)
|
||||
download_media("audio_0.seg.mp4",audio_init,lecture_working_dir)
|
||||
audio_kid = extract_kid(f"{lecture_working_dir}\\audio_0.seg.mp4")
|
||||
print("KID for audio file is: " + audio_kid)
|
||||
for count in range(1,no_segment):
|
||||
video_segment_url = video_url.replace("$Number$",str(count))
|
||||
audio_segment_url = audio_url.replace("$Number$",str(count))
|
||||
video_status = download_media(f"video_{str(count)}.seg.{video_extension}",video_segment_url,lecture_working_dir)
|
||||
audio_status = download_media(f"audio_{str(count)}.seg.{audio_extension}",audio_segment_url,lecture_working_dir)
|
||||
os.chdir(lecture_working_dir)
|
||||
if(video_status):
|
||||
if os.name == "nt":
|
||||
video_concat_command = "copy /b " + "+".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " encrypted_video.mp4"
|
||||
audio_concat_command = "copy /b " + "+".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " encrypted_audio.mp4"
|
||||
else:
|
||||
video_concat_command = "cat " + " ".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " > encrypted_video.mp4"
|
||||
audio_concat_command = "cat " + " ".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " > encrypted_audio.mp4"
|
||||
os.system(video_concat_command)
|
||||
os.system(audio_concat_command)
|
||||
decrypt(video_kid,"video",lecture_working_dir)
|
||||
decrypt(audio_kid,"audio",lecture_working_dir)
|
||||
os.chdir(home_dir)
|
||||
mux_process(video_title,lecture_working_dir,output_path)
|
||||
break
|
||||
|
||||
"""
|
||||
@author Jayapraveen
|
||||
"""
|
||||
def manifest_parser(mpd_url):
|
||||
video = []
|
||||
audio = []
|
||||
manifest = requests.get(mpd_url).text
|
||||
with open(f"{working_dir}\\manifest.mpd",'w') as manifest_handler:
|
||||
manifest_handler.write(manifest)
|
||||
mpd = MPEGDASHParser.parse(f"{working_dir}\\manifest.mpd")
|
||||
running_time = durationtoseconds(mpd.media_presentation_duration)
|
||||
for period in mpd.periods:
|
||||
for adapt_set in period.adaptation_sets:
|
||||
print("Processing " + adapt_set.mime_type)
|
||||
content_type = adapt_set.mime_type
|
||||
repr = adapt_set.representations[-1] # Max Quality
|
||||
for segment in repr.segment_templates:
|
||||
if(segment.duration):
|
||||
print("Media segments are of equal timeframe")
|
||||
segment_time = segment.duration / segment.timescale
|
||||
total_segments = running_time / segment_time
|
||||
else:
|
||||
print("Media segments are of inequal timeframe")
|
||||
|
||||
approx_no_segments = round(running_time / 6) + 10 # aproximate of 6 sec per segment
|
||||
print("Expected No of segments:",approx_no_segments)
|
||||
if(content_type == "audio/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
audio.append(approx_no_segments)
|
||||
audio.append(segment.media)
|
||||
audio.append(segment.initialization)
|
||||
audio.append(segment_extension)
|
||||
elif(content_type == "video/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
video.append(approx_no_segments)
|
||||
video.append(segment.media)
|
||||
video.append(segment.initialization)
|
||||
video.append(segment_extension)
|
||||
return video + audio
|
||||
|
||||
|
||||
|
||||
"""
|
||||
@author Puyodead1
|
||||
"""
|
||||
def download(url, path, filename):
|
||||
"""
|
||||
@param: url to download file
|
||||
@param: path place to put the file
|
||||
@oaram: filename used for progress bar
|
||||
"""
|
||||
file_size = int(requests.head(url).headers["Content-Length"])
|
||||
if os.path.exists(path):
|
||||
print("file exists")
|
||||
first_byte = os.path.getsize(path)
|
||||
else:
|
||||
first_byte = 0
|
||||
if first_byte >= file_size:
|
||||
return file_size
|
||||
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
|
||||
pbar = tqdm(
|
||||
total=file_size, initial=first_byte,
|
||||
unit='MB', unit_scale=True, desc=filename)
|
||||
req = requests.get(url, headers=header, stream=True)
|
||||
with(open(path, 'ab')) as f:
|
||||
for chunk in req.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
pbar.update(1024)
|
||||
pbar.close()
|
||||
return file_size
|
||||
|
||||
def parse(data):
|
||||
chapters = []
|
||||
|
||||
for obj in data:
|
||||
if obj["_class"] == "chapter":
|
||||
obj["lectures"] = []
|
||||
chapters.append(obj)
|
||||
elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video":
|
||||
chapters[-1]["lectures"].append(obj)
|
||||
|
||||
for chapter in chapters:
|
||||
chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"])
|
||||
if not os.path.exists(chapter_dir):
|
||||
os.mkdir(chapter_dir)
|
||||
|
||||
for lecture in chapter["lectures"]:
|
||||
lecture_title = lecture["title"]
|
||||
lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title))
|
||||
lecture_asset = lecture["asset"]
|
||||
if lecture_asset["media_license_token"] == None:
|
||||
# not encrypted
|
||||
lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index
|
||||
download(lecture_url, lecture_path, lecture_title)
|
||||
else:
|
||||
# encrypted
|
||||
print(f"Lecture %s has DRM, attempting to download" % lecture_title)
|
||||
lecture_working_dir = "%s\%s" % (working_dir, lecture_asset["id"]) # set the folder to download ephemeral files
|
||||
if not os.path.exists(lecture_working_dir):
|
||||
os.mkdir(lecture_working_dir)
|
||||
mpd_url = lecture_asset["media_sources"][1]["src"] # index 1 is the dash
|
||||
base_url = mpd_url.split("index.mpd")[0]
|
||||
media_info = manifest_parser(mpd_url)
|
||||
handle_irregular_segments(media_info,lecture_title,lecture_working_dir,lecture_path)
|
||||
cleanup(lecture_working_dir)
|
||||
|
||||
r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer})
|
||||
if r.status_code == 200:
|
||||
# loop
|
||||
data = r.json()
|
||||
parse(data["results"])
|
||||
else:
|
||||
print("An error occurred while trying to fetch coure data!")
|
||||
print(r.text)
|
||||
|
||||
# with open("test_data.json", encoding="utf8") as f:
|
||||
# data = json.loads(f.read())["results"]
|
||||
# parse(data)
|
@ -1,82 +0,0 @@
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
from sanitize_filename import sanitize
|
||||
import urllib.request
|
||||
from tqdm import tqdm
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
course_id = "657932" # the course id to download
|
||||
bearer_token = os.getenv("UDEMY_BEARER") # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization
|
||||
header_bearer = "Bearer " + bearer_token
|
||||
#r = requests.get(f"https://udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items?fields[asset]=results,title,external_url,time_estimation,download_urls,slide_urls,filename,asset_type,captions,media_license_token,course_is_drmed,media_sources,stream_urls,body&fields[chapter]=object_index,title,sort_order&fields[lecture]=id,title,object_index,asset,supplementary_assets,view_html&page_size=10000".format(course_id), headers={"Authorization": header_bearer, "x-udemy-authorization": header_bearer})
|
||||
# if r.status_code == 200:
|
||||
# # loop
|
||||
# data = r.json()
|
||||
# for result in data:
|
||||
# print(result)
|
||||
# else:
|
||||
# print("An error occurred while trying to fetch coure data!")
|
||||
# print(r.text)
|
||||
|
||||
download_dir = os.getcwd() + "\\out_dir"
|
||||
|
||||
def download(url, path, filename):
|
||||
"""
|
||||
@param: url to download file
|
||||
@param: path place to put the file
|
||||
@oaram: filename used for progress bar
|
||||
"""
|
||||
file_size = int(requests.head(url).headers["Content-Length"])
|
||||
if os.path.exists(path):
|
||||
print("file exists")
|
||||
first_byte = os.path.getsize(path)
|
||||
else:
|
||||
first_byte = 0
|
||||
if first_byte >= file_size:
|
||||
return file_size
|
||||
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
|
||||
pbar = tqdm(
|
||||
total=file_size, initial=first_byte,
|
||||
unit='MB', unit_scale=True, desc=filename)
|
||||
req = requests.get(url, headers=header, stream=True)
|
||||
with(open(path, 'ab')) as f:
|
||||
for chunk in req.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
pbar.update(1024)
|
||||
pbar.close()
|
||||
return file_size
|
||||
|
||||
with open("test_data.json", encoding="utf8") as f:
|
||||
data = json.loads(f.read())["results"]
|
||||
|
||||
chapters = []
|
||||
|
||||
for obj in data:
|
||||
if obj["_class"] == "chapter":
|
||||
obj["lectures"] = []
|
||||
chapters.append(obj)
|
||||
elif obj["_class"] == "lecture" and obj["asset"]["asset_type"] == "Video":
|
||||
chapters[-1]["lectures"].append(obj)
|
||||
|
||||
for chapter in chapters:
|
||||
chapter_dir = f"%s\\%s. %s" % (download_dir,chapters.index(chapter) + 1,chapter["title"])
|
||||
if not os.path.isdir(chapter_dir):
|
||||
os.mkdir(chapter_dir)
|
||||
|
||||
for lecture in chapter["lectures"]:
|
||||
lecture_title = lecture["title"]
|
||||
lecture_path = f"%s\\%s. %s.mp4" % (chapter_dir, chapter["lectures"].index(lecture) + 1,sanitize(lecture_title))
|
||||
lecture_asset = lecture["asset"]
|
||||
if lecture_asset["media_license_token"] == None:
|
||||
# not encrypted
|
||||
lecture_url = lecture_asset["media_sources"][0]["src"] # best quality is the first index
|
||||
download(lecture_url, lecture_path, lecture_title)
|
||||
else:
|
||||
# encrypted
|
||||
print("drm")
|
||||
pass
|
Loading…
x
Reference in New Issue
Block a user