mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-03 09:54:26 +02:00
Merge branch 'Puyodead1:master' into master
This commit is contained in:
commit
a205ec91bf
@ -1,2 +1 @@
|
|||||||
UDEMY_BEARER=enter bearer token without the Bearer prefix
|
UDEMY_BEARER=Your bearer token here
|
||||||
UDEMY_COURSE_ID=course id goes here
|
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -119,4 +119,7 @@ test_data.json
|
|||||||
out_dir
|
out_dir
|
||||||
working_dir
|
working_dir
|
||||||
manifest.mpd
|
manifest.mpd
|
||||||
.vscode
|
.vscode
|
||||||
|
saved
|
||||||
|
*.aria2
|
||||||
|
info.py
|
101
README.md
101
README.md
@ -1,4 +1,5 @@
|
|||||||
# Udemy Downloader with DRM support
|
# Udemy Downloader with DRM support
|
||||||
|
|
||||||
[](https://forthebadge.com)
|
[](https://forthebadge.com)
|
||||||
[](https://forthebadge.com)
|
[](https://forthebadge.com)
|
||||||
[](https://forthebadge.com)
|
[](https://forthebadge.com)
|
||||||
@ -6,6 +7,7 @@
|
|||||||

|

|
||||||

|

|
||||||

|

|
||||||
|
|
||||||
# NOTE
|
# NOTE
|
||||||
|
|
||||||
This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program.
|
This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program.
|
||||||
@ -21,11 +23,11 @@ All code is licensed under the MIT license
|
|||||||
# Description
|
# Description
|
||||||
|
|
||||||
Simple program to download a Udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons).<br>
|
Simple program to download a Udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons).<br>
|
||||||
Current only Windows is supported but with some small modifications it should work on linux also (and maybe mac)
|
Windows is the primary development OS, but I've made an effort to support linux also.
|
||||||
|
|
||||||
# Requirements
|
# Requirements
|
||||||
|
|
||||||
1. You would need to download `ffmpeg` and `mp4decrypter`from Bento4 SDK and ensure they are in path (typing their name in cmd invokes them).
|
1. You would need to download `ffmpeg`, `aria2c` and `mp4decrypter` (from Bento4 SDK) and ensure they are in path (typing their name in cmd should invoke them).
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
|
|
||||||
@ -35,96 +37,91 @@ You will need to get a few things before you can use this program:
|
|||||||
|
|
||||||
- Decryption Key ID
|
- Decryption Key ID
|
||||||
- Decryption Key
|
- Decryption Key
|
||||||
- Udemy Course ID
|
- Udemy Course URL
|
||||||
- Udemy Bearer Token
|
- Udemy Bearer Token (aka acccess token for udemy-dl users)
|
||||||
|
|
||||||
### Setting up
|
### Setting up
|
||||||
|
|
||||||
- rename `.env.sample` to `.env`
|
- rename `.env.sample` to `.env` _(you only need to do this if you plan to use the .env file to store your bearer token)_
|
||||||
- rename `keyfile.example.json` to `keyfile.json`
|
- rename `keyfile.example.json` to `keyfile.json`
|
||||||
|
|
||||||
### Aquire bearer token
|
### Aquire Bearer Token
|
||||||
|
|
||||||
- open dev tools
|
- Firefox: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-491903900)
|
||||||
- go to network tab
|
- Chrome: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-492569372)
|
||||||
- in the search field, enter `api-2.0/courses`
|
- If you want to use the .env file to store your Bearer Token, edit the .env and add your token.
|
||||||
- 
|
|
||||||
- click a random request
|
|
||||||
- locate the `Request Headers` section
|
|
||||||
- copy the the text after `Authorization`, it should look like `Bearer xxxxxxxxxxx`
|
|
||||||
- 
|
|
||||||
- enter this in the `.env` file after `UDEMY_BEARER=` (you can also pass this as an argument, see advanced usage for more information)
|
|
||||||
|
|
||||||
### Aquire Course ID
|
|
||||||
|
|
||||||
- Follow above before following this
|
|
||||||
- locate the request url field
|
|
||||||
- 
|
|
||||||
- copy the number after `/api-2.0/courses/` as seen highlighed in the above picture
|
|
||||||
- enter this in the `.env` file after `UDEMY_COURSE_ID=` (you can also pass this as an argument, see advanced usage for more information)
|
|
||||||
|
|
||||||
### Key ID and Key
|
### Key ID and Key
|
||||||
|
|
||||||
It is up to you to aquire the key and key id.
|
It is up to you to aquire the key and key id. Please don't ask me for help acquiring these, decrypting DRM protected content can be considered piracy.
|
||||||
|
|
||||||
- Enter the key and key id in the `keyfile.json`
|
- Enter the key and key id in the `keyfile.json`
|
||||||
- 
|
- 
|
||||||
- 
|
- 
|
||||||
|
|
||||||
### Start Downloading
|
### Start Downloading
|
||||||
|
|
||||||
You can now run `python main.py` to start downloading. The course will download to `out_dir`, chapters are seperated into folders.
|
You can now run the program, see the examples below. The course will download to `out_dir`.
|
||||||
|
|
||||||
# Advanced Usage
|
# Advanced Usage
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: main.py [-h] [-d] [-b BEARER_TOKEN] [-c COURSE_ID] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
|
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
|
||||||
|
[--keep-vtt] [--skip-hls] [--info]
|
||||||
|
|
||||||
Udemy Downloader
|
Udemy Downloader
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-d, --debug Use test_data.json rather than fetch from the udemy api.
|
-c COURSE_URL, --course-url COURSE_URL
|
||||||
|
The URL of the course to download
|
||||||
-b BEARER_TOKEN, --bearer BEARER_TOKEN
|
-b BEARER_TOKEN, --bearer BEARER_TOKEN
|
||||||
The Bearer token to use
|
The Bearer token to use
|
||||||
-c COURSE_ID, --course-id COURSE_ID
|
|
||||||
The ID of the course to download
|
|
||||||
-q QUALITY, --quality QUALITY
|
-q QUALITY, --quality QUALITY
|
||||||
Download specific video quality. (144, 360, 480, 720, 1080)
|
Download specific video quality. If the requested quality isn't available, the closest quality will be used. If not
|
||||||
-l LANG, --lang LANG The language to download for captions (Default is en)
|
specified, the best quality will be downloaded for each lecture
|
||||||
--skip-lectures If specified, lectures won't be downloaded.
|
-l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en')
|
||||||
--download-assets If specified, lecture assets will be downloaded.
|
--skip-lectures If specified, lectures won't be downloaded
|
||||||
--download-captions If specified, captions will be downloaded.
|
--download-assets If specified, lecture assets will be downloaded
|
||||||
|
--download-captions If specified, captions will be downloaded
|
||||||
|
--keep-vtt If specified, .vtt files won't be removed
|
||||||
|
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm
|
||||||
|
lectures)
|
||||||
|
--info If specified, only course information will be printed, nothing will be downloaded
|
||||||
```
|
```
|
||||||
|
|
||||||
- Passing a Bearer Token and Course ID as an argument
|
- Passing a Bearer Token and Course ID as an argument
|
||||||
- `python main.py -b <Bearer Token> -c <Course ID>`
|
- `python main.py -c <Course URL> -b <Bearer Token>`
|
||||||
|
- `python main.py -c https://www.udemy.com/courses/myawesomecourse -b <Bearer Token>`
|
||||||
- Download a specific quality
|
- Download a specific quality
|
||||||
- `python main.py -q 720`
|
- `python main.py -c <Course URL> -q 720`
|
||||||
- Download assets along with lectures
|
- Download assets along with lectures
|
||||||
- `python main.py --download-assets`
|
- `python main.py -c <Course URL> --download-assets`
|
||||||
- Download assets and specify a quality
|
- Download assets and specify a quality
|
||||||
- `python main.py -q 360 --download-assets`
|
- `python main.py -c <Course URL> -q 360 --download-assets`
|
||||||
- Download captions (Defaults to English)
|
- Download captions (Defaults to English)
|
||||||
- `python main.py --download-captions`
|
- `python main.py -c <Course URL> --download-captions`
|
||||||
- Download captions with specific language
|
- Download captions with specific language
|
||||||
- `python main.py --download-captions -l en` - English subtitles
|
- `python main.py -c <Course URL> --download-captions -l en` - English subtitles
|
||||||
- `python main.py --download-captions -l es` - Spanish subtitles
|
- `python main.py -c <Course URL> --download-captions -l es` - Spanish subtitles
|
||||||
- `python main.py --download-captions -l it` - Italian subtitles
|
- `python main.py -c <Course URL> --download-captions -l it` - Italian subtitles
|
||||||
- `python main.py --download-captions -l pl` - Polish Subtitles
|
- `python main.py -c <Course URL> --download-captions -l pl` - Polish Subtitles
|
||||||
- `python main.py --download-captions -l all` - Downloads all subtitles
|
- `python main.py -c <Course URL> --download-captions -l all` - Downloads all subtitles
|
||||||
- etc
|
- etc
|
||||||
- Skip downloading lecture videos
|
- Skip downloading lecture videos
|
||||||
- `python main.py --skip-lectures --download-captions` - Downloads only captions
|
- `python main.py -c <Course URL> --skip-lectures --download-captions` - Downloads only captions
|
||||||
- `python main.py --skip-lectures --download-assets` - Downloads only assets
|
- `python main.py -c <Course URL> --skip-lectures --download-assets` - Downloads only assets
|
||||||
|
- Keep .VTT caption files:
|
||||||
# Getting an error about "Accepting the latest terms of service"?
|
- `python main.py -c <Course URL> --download-captions --keep-vtt`
|
||||||
|
- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
|
||||||
- If you are using Udemy business, you must edit `main.py` and change `udemy.com` to `<portal name>.udemy.com`
|
- `python main.py -c <Course URL> --skip-hls`
|
||||||
|
- Print course information only:
|
||||||
|
- `python main.py -c <Course URL> --info`
|
||||||
|
|
||||||
# Credits
|
# Credits
|
||||||
|
|
||||||
- https://github.com/Jayapraveen/Drm-Dash-stream-downloader - For the original code which this is based on
|
- https://github.com/Jayapraveen/Drm-Dash-stream-downloader - For the original code which this is based on
|
||||||
- https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction
|
- https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction
|
||||||
- https://github.com/alastairmccormack/pymp4parse/ - For code related to mp4 box parsing (used by pywvpssh)
|
- https://github.com/alastairmccormack/pymp4parse - For code related to mp4 box parsing (used by pywvpssh)
|
||||||
- https://github.com/lbrayner/vtt-to-srt - For code related to converting subtitles from vtt to srt format
|
- https://github.com/lbrayner/vtt-to-srt - For code related to converting subtitles from vtt to srt format
|
||||||
|
- https://github.com/r0oth3x49/udemy-dl - For some of the informaton related to using the udemy api
|
||||||
|
@ -1,203 +0,0 @@
|
|||||||
#dashdrmmultisegmentdownloader
|
|
||||||
import os,requests,shutil,json,glob
|
|
||||||
from mpegdash.parser import MPEGDASHParser
|
|
||||||
from mpegdash.nodes import Descriptor
|
|
||||||
from mpegdash.utils import (
|
|
||||||
parse_attr_value, parse_child_nodes, parse_node_value,
|
|
||||||
write_attr_value, write_child_node, write_node_value
|
|
||||||
)
|
|
||||||
from utils import extract_kid
|
|
||||||
|
|
||||||
#global ids
|
|
||||||
retry = 3
|
|
||||||
download_dir = os.path.join(os.getcwd(), 'out_dir') # set the folder to output
|
|
||||||
working_dir = os.path.join(os.getcwd(), "working_dir") # set the folder to download ephemeral files
|
|
||||||
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
|
||||||
|
|
||||||
if not os.path.exists(working_dir):
|
|
||||||
os.makedirs(working_dir)
|
|
||||||
|
|
||||||
#Get the keys
|
|
||||||
with open(keyfile_path,'r') as keyfile:
|
|
||||||
keyfile = keyfile.read()
|
|
||||||
keyfile = json.loads(keyfile)
|
|
||||||
|
|
||||||
|
|
||||||
#Patching the Mpegdash lib for keyID
|
|
||||||
def __init__(self):
|
|
||||||
self.scheme_id_uri = '' # xs:anyURI (required)
|
|
||||||
self.value = None # xs:string
|
|
||||||
self.id = None # xs:string
|
|
||||||
self.key_id = None # xs:string
|
|
||||||
|
|
||||||
def parse(self, xmlnode):
|
|
||||||
self.scheme_id_uri = parse_attr_value(xmlnode, 'schemeIdUri', str)
|
|
||||||
self.value = parse_attr_value(xmlnode, 'value', str)
|
|
||||||
self.id = parse_attr_value(xmlnode, 'id', str)
|
|
||||||
self.key_id = parse_attr_value(xmlnode, 'cenc:default_KID', str)
|
|
||||||
|
|
||||||
def write(self, xmlnode):
|
|
||||||
write_attr_value(xmlnode, 'schemeIdUri', self.scheme_id_uri)
|
|
||||||
write_attr_value(xmlnode, 'value', self.value)
|
|
||||||
write_attr_value(xmlnode, 'id', self.id)
|
|
||||||
write_attr_value(xmlnode, 'cenc:default_KID', self.key_id)
|
|
||||||
|
|
||||||
Descriptor.__init__ = __init__
|
|
||||||
Descriptor.parse = parse
|
|
||||||
Descriptor.write = write
|
|
||||||
|
|
||||||
def durationtoseconds(period):
|
|
||||||
#Duration format in PTxDxHxMxS
|
|
||||||
if(period[:2] == "PT"):
|
|
||||||
period = period[2:]
|
|
||||||
day = int(period.split("D")[0] if 'D' in period else 0)
|
|
||||||
hour = int(period.split("H")[0].split("D")[-1] if 'H' in period else 0)
|
|
||||||
minute = int(period.split("M")[0].split("H")[-1] if 'M' in period else 0)
|
|
||||||
second = period.split("S")[0].split("M")[-1]
|
|
||||||
print("Total time: " + str(day) + " days " + str(hour) + " hours " + str(minute) + " minutes and " + str(second) + " seconds")
|
|
||||||
total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split('.')[0]))) + '.' + str(int(second.split('.')[-1])))
|
|
||||||
return total_time
|
|
||||||
|
|
||||||
else:
|
|
||||||
print("Duration Format Error")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def download_media(filename,url,epoch = 0):
|
|
||||||
if(os.path.isfile(filename)):
|
|
||||||
print("Segment already downloaded.. skipping..")
|
|
||||||
else:
|
|
||||||
media = requests.get(url, stream=True)
|
|
||||||
media_length = int(media.headers.get("content-length"))
|
|
||||||
if media.status_code == 200:
|
|
||||||
if(os.path.isfile(filename) and os.path.getsize(filename) >= media_length):
|
|
||||||
print("Segment already downloaded.. skipping write to disk..")
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
with open(filename, 'wb') as video_file:
|
|
||||||
shutil.copyfileobj(media.raw, video_file)
|
|
||||||
print("Segment downloaded: " + filename)
|
|
||||||
return False #Successfully downloaded the file
|
|
||||||
except:
|
|
||||||
print("Connection error: Reattempting download of segment..")
|
|
||||||
download_media(filename,url, epoch + 1)
|
|
||||||
|
|
||||||
if os.path.getsize(filename) >= media_length:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
print("Segment is faulty.. Redownloading...")
|
|
||||||
download_media(filename,url, epoch + 1)
|
|
||||||
elif(media.status_code == 404):
|
|
||||||
print("Probably end hit!\n",url)
|
|
||||||
return True #Probably hit the last of the file
|
|
||||||
else:
|
|
||||||
if (epoch > retry):
|
|
||||||
exit("Error fetching segment, exceeded retry times.")
|
|
||||||
print("Error fetching segment file.. Redownloading...")
|
|
||||||
download_media(filename,url, epoch + 1)
|
|
||||||
|
|
||||||
def cleanup(path):
|
|
||||||
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
|
|
||||||
mpd_files = glob.glob(path + '/*.mpd', recursive=True)
|
|
||||||
leftover_files = leftover_files + mpd_files
|
|
||||||
for file_list in leftover_files:
|
|
||||||
try:
|
|
||||||
os.remove(file_list)
|
|
||||||
except OSError:
|
|
||||||
print(f"Error deleting file: {file_list}")
|
|
||||||
|
|
||||||
def mux_process(video_title,outfile):
|
|
||||||
if os.name == "nt":
|
|
||||||
command = f"ffmpeg -y -i decrypted_audio.mp4 -i decrypted_video.mp4 -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
|
|
||||||
else:
|
|
||||||
command = f"nice -n 7 ffmpeg -y -i decrypted_audio.mp4 -i decrypted_video.mp4 -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z {outfile}.mp4"
|
|
||||||
os.system(command)
|
|
||||||
|
|
||||||
def decrypt(kid,filename):
|
|
||||||
try:
|
|
||||||
key = keyfile[kid.lower()]
|
|
||||||
except KeyError as error:
|
|
||||||
exit("Key not found")
|
|
||||||
if(os.name == "nt"):
|
|
||||||
os.system(f"mp4decrypt --key 1:{key} encrypted_{filename}.mp4 decrypted_{filename}.mp4")
|
|
||||||
else:
|
|
||||||
os.system(f"nice -n 7 mp4decrypt --key 1:{key} encrypted_{filename}.mp4 decrypted_{filename}.mp4")
|
|
||||||
|
|
||||||
|
|
||||||
def handle_irregular_segments(media_info,video_title,output_path):
|
|
||||||
no_segment,video_url,video_init,video_extension,no_segment,audio_url,audio_init,audio_extension = media_info
|
|
||||||
download_media("video_0.seg.mp4",video_init)
|
|
||||||
video_kid = extract_kid("video_0.seg.mp4")
|
|
||||||
print("KID for video file is: " + video_kid)
|
|
||||||
download_media("audio_0.seg.mp4",audio_init)
|
|
||||||
audio_kid = extract_kid("audio_0.seg.mp4")
|
|
||||||
print("KID for audio file is: " + audio_kid)
|
|
||||||
for count in range(1,no_segment):
|
|
||||||
video_segment_url = video_url.replace("$Number$",str(count))
|
|
||||||
audio_segment_url = audio_url.replace("$Number$",str(count))
|
|
||||||
video_status = download_media(f"video_{str(count)}.seg.{video_extension}",video_segment_url)
|
|
||||||
audio_status = download_media(f"audio_{str(count)}.seg.{audio_extension}",audio_segment_url)
|
|
||||||
if(video_status):
|
|
||||||
if os.name == "nt":
|
|
||||||
video_concat_command = "copy /b " + "+".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " encrypted_video.mp4"
|
|
||||||
audio_concat_command = "copy /b " + "+".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " encrypted_audio.mp4"
|
|
||||||
else:
|
|
||||||
video_concat_command = "cat " + " ".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " > encrypted_video.mp4"
|
|
||||||
audio_concat_command = "cat " + " ".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " > encrypted_audio.mp4"
|
|
||||||
print(video_concat_command)
|
|
||||||
print(audio_concat_command)
|
|
||||||
os.system(video_concat_command)
|
|
||||||
os.system(audio_concat_command)
|
|
||||||
decrypt(video_kid,"video")
|
|
||||||
decrypt(audio_kid,"audio")
|
|
||||||
mux_process(video_title,output_path)
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
def manifest_parser(mpd_url):
|
|
||||||
video = []
|
|
||||||
audio = []
|
|
||||||
manifest = requests.get(mpd_url).text
|
|
||||||
with open("manifest.mpd",'w') as manifest_handler:
|
|
||||||
manifest_handler.write(manifest)
|
|
||||||
mpd = MPEGDASHParser.parse("./manifest.mpd")
|
|
||||||
running_time = durationtoseconds(mpd.media_presentation_duration)
|
|
||||||
for period in mpd.periods:
|
|
||||||
for adapt_set in period.adaptation_sets:
|
|
||||||
print("Processing " + adapt_set.mime_type)
|
|
||||||
content_type = adapt_set.mime_type
|
|
||||||
repr = adapt_set.representations[-1] # Max Quality
|
|
||||||
for segment in repr.segment_templates:
|
|
||||||
if(segment.duration):
|
|
||||||
print("Media segments are of equal timeframe")
|
|
||||||
segment_time = segment.duration / segment.timescale
|
|
||||||
total_segments = running_time / segment_time
|
|
||||||
else:
|
|
||||||
print("Media segments are of inequal timeframe")
|
|
||||||
|
|
||||||
approx_no_segments = round(running_time / 6) + 20 # aproximate of 6 sec per segment
|
|
||||||
print("Expected No of segments:",approx_no_segments)
|
|
||||||
if(content_type == "audio/mp4"):
|
|
||||||
segment_extension = segment.media.split(".")[-1]
|
|
||||||
audio.append(approx_no_segments)
|
|
||||||
audio.append(segment.media)
|
|
||||||
audio.append(segment.initialization)
|
|
||||||
audio.append(segment_extension)
|
|
||||||
elif(content_type == "video/mp4"):
|
|
||||||
segment_extension = segment.media.split(".")[-1]
|
|
||||||
video.append(approx_no_segments)
|
|
||||||
video.append(segment.media)
|
|
||||||
video.append(segment.initialization)
|
|
||||||
video.append(segment_extension)
|
|
||||||
return video + audio
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
mpd = "mpd url"
|
|
||||||
base_url = mpd.split("index.mpd")[0]
|
|
||||||
os.chdir(working_dir)
|
|
||||||
media_info = manifest_parser(mpd)
|
|
||||||
video_title = "175. Inverse Transforming Vectors" # the video title that gets embeded into the mp4 file metadata
|
|
||||||
output_path = os.path.join(download_dir, "175. Inverse Transforming Vectors") # video title used in the filename, dont append .mp4
|
|
||||||
handle_irregular_segments(media_info,video_title,output_path)
|
|
||||||
cleanup(working_dir)
|
|
277
pyffmpeg.py
Normal file
277
pyffmpeg.py
Normal file
@ -0,0 +1,277 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# pylint: disable=R,C,W,E
|
||||||
|
"""
|
||||||
|
Author : Nasir Khan (r0ot h3x49)
|
||||||
|
Github : https://github.com/r0oth3x49
|
||||||
|
License : MIT
|
||||||
|
Copyright (c) 2018-2025 Nasir Khan (r0ot h3x49)
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the
|
||||||
|
Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
|
||||||
|
ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
|
||||||
|
THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from colorama import Fore, Style
|
||||||
|
|
||||||
|
|
||||||
|
class FFMPeg:
|
||||||
|
|
||||||
|
_PROGRESS_PATTERN = re.compile(
|
||||||
|
r"(frame|fps|total_size|out_time|bitrate|speed|progress)\s*\=\s*(\S+)")
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
duration,
|
||||||
|
url,
|
||||||
|
token,
|
||||||
|
filepath,
|
||||||
|
quiet=False,
|
||||||
|
callback=lambda *x: None):
|
||||||
|
self.url = url
|
||||||
|
self.filepath = filepath
|
||||||
|
self.quiet = quiet
|
||||||
|
self.duration = duration
|
||||||
|
self.callback = callback
|
||||||
|
self.token = token
|
||||||
|
|
||||||
|
def _command(self):
|
||||||
|
"""
|
||||||
|
ffmpeg.exe -headers "Authorization: Bearer {token}" -i "" -c copy -bsf:a aac_adtstoasc out.mp4
|
||||||
|
"""
|
||||||
|
command = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-headers",
|
||||||
|
f"Authorization: Bearer {self.token}",
|
||||||
|
"-i",
|
||||||
|
f"{self.url}",
|
||||||
|
"-c",
|
||||||
|
"copy",
|
||||||
|
"-bsf:a",
|
||||||
|
"aac_adtstoasc",
|
||||||
|
f"{self.filepath}",
|
||||||
|
"-y",
|
||||||
|
"-progress",
|
||||||
|
"pipe:2",
|
||||||
|
]
|
||||||
|
return command
|
||||||
|
|
||||||
|
def _fetch_total_duration(self, line):
|
||||||
|
duration_in_secs = 0
|
||||||
|
duration_regex = re.compile(
|
||||||
|
r"Duration: (\d{2}):(\d{2}):(\d{2})\.\d{2}")
|
||||||
|
mobj = duration_regex.search(line)
|
||||||
|
if mobj:
|
||||||
|
duration_tuple = mobj.groups()
|
||||||
|
duration_in_secs = (int(duration_tuple[0]) * 60 +
|
||||||
|
int(duration_tuple[1]) * 60 +
|
||||||
|
int(duration_tuple[2]))
|
||||||
|
else:
|
||||||
|
duration_in_secs = self.duration
|
||||||
|
return duration_in_secs
|
||||||
|
|
||||||
|
def _fetch_current_duration_done(self, time_str):
|
||||||
|
time_str = time_str.split(":")
|
||||||
|
return (int(time_str[0]) * 60 + int(time_str[1]) * 60 +
|
||||||
|
int(time_str[2].split(".")[0]))
|
||||||
|
|
||||||
|
def _prepare_time_str(self, secs):
|
||||||
|
(mins, secs) = divmod(secs, 60)
|
||||||
|
(hours, mins) = divmod(mins, 60)
|
||||||
|
if hours > 99:
|
||||||
|
time_str = "--:--:--"
|
||||||
|
if hours == 0:
|
||||||
|
time_str = "%02d:%02ds" % (mins, secs)
|
||||||
|
else:
|
||||||
|
time_str = "%02d:%02d:%02ds" % (hours, mins, secs)
|
||||||
|
return time_str
|
||||||
|
|
||||||
|
def _progress(self,
|
||||||
|
iterations,
|
||||||
|
total,
|
||||||
|
bytesdone,
|
||||||
|
speed,
|
||||||
|
elapsed,
|
||||||
|
bar_length=30,
|
||||||
|
fps=None):
|
||||||
|
offset = 0
|
||||||
|
filled_length = int(round(bar_length * iterations / float(total)))
|
||||||
|
percents = format(100.00 * (iterations * 1.0 / float(total)), ".2f")
|
||||||
|
|
||||||
|
if bytesdone <= 1048576:
|
||||||
|
_receiving = round(float(bytesdone) / 1024.00, 2)
|
||||||
|
_received = format(
|
||||||
|
_receiving if _receiving < 1024.00 else _receiving / 1024.00,
|
||||||
|
".2f")
|
||||||
|
suffix_recvd = "KB" if _receiving < 1024.00 else "MB"
|
||||||
|
else:
|
||||||
|
_receiving = round(float(bytesdone) / 1048576, 2)
|
||||||
|
_received = format(
|
||||||
|
_receiving if _receiving < 1024.00 else _receiving / 1024.00,
|
||||||
|
".2f")
|
||||||
|
suffix_recvd = "MB" if _receiving < 1024.00 else "GB"
|
||||||
|
|
||||||
|
suffix_rate = "Kb/s" if speed < 1024.00 else "Mb/s"
|
||||||
|
if fps:
|
||||||
|
suffix_rate += f" {fps}/fps"
|
||||||
|
if elapsed:
|
||||||
|
rate = ((float(iterations) - float(offset)) / 1024.0) / elapsed
|
||||||
|
eta = (total - iterations) / (rate * 1024.0)
|
||||||
|
else:
|
||||||
|
rate = 0
|
||||||
|
eta = 0
|
||||||
|
rate = format(speed if speed < 1024.00 else speed / 1024.00, ".2f")
|
||||||
|
(mins, secs) = divmod(eta, 60)
|
||||||
|
(hours, mins) = divmod(mins, 60)
|
||||||
|
if hours > 99:
|
||||||
|
eta = "--:--:--"
|
||||||
|
if hours == 0:
|
||||||
|
eta = "eta %02d:%02ds" % (mins, secs)
|
||||||
|
else:
|
||||||
|
eta = "eta %02d:%02d:%02ds" % (hours, mins, secs)
|
||||||
|
if secs == 0:
|
||||||
|
eta = "\n"
|
||||||
|
|
||||||
|
total_time = self._prepare_time_str(total)
|
||||||
|
done_time = self._prepare_time_str(iterations)
|
||||||
|
downloaded = f"{total_time}/{done_time}"
|
||||||
|
|
||||||
|
received_bytes = str(_received) + str(suffix_recvd)
|
||||||
|
percents = f"{received_bytes} {percents}"
|
||||||
|
|
||||||
|
self.hls_progress(
|
||||||
|
downloaded=downloaded,
|
||||||
|
percents=percents,
|
||||||
|
filled_length=filled_length,
|
||||||
|
rate=str(rate) + str(suffix_rate),
|
||||||
|
suffix=eta,
|
||||||
|
bar_length=bar_length,
|
||||||
|
)
|
||||||
|
|
||||||
|
def hls_progress(self,
|
||||||
|
downloaded,
|
||||||
|
percents,
|
||||||
|
filled_length,
|
||||||
|
rate,
|
||||||
|
suffix,
|
||||||
|
bar_length=30):
|
||||||
|
bar = (Fore.CYAN + Style.DIM + "#" * filled_length + Fore.WHITE +
|
||||||
|
Style.DIM + "-" * (bar_length - filled_length))
|
||||||
|
sys.stdout.write(
|
||||||
|
"\033[2K\033[1G\r\r{}{}[{}{}*{}{}] : {}{}{} {}% |{}{}{}| {} {}".
|
||||||
|
format(
|
||||||
|
Fore.CYAN,
|
||||||
|
Style.DIM,
|
||||||
|
Fore.MAGENTA,
|
||||||
|
Style.BRIGHT,
|
||||||
|
Fore.CYAN,
|
||||||
|
Style.DIM,
|
||||||
|
Fore.GREEN,
|
||||||
|
Style.BRIGHT,
|
||||||
|
downloaded,
|
||||||
|
percents,
|
||||||
|
bar,
|
||||||
|
Fore.GREEN,
|
||||||
|
Style.BRIGHT,
|
||||||
|
rate,
|
||||||
|
suffix,
|
||||||
|
))
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
def _parse_progress(self, line):
|
||||||
|
items = {
|
||||||
|
key: value
|
||||||
|
for key, value in self._PROGRESS_PATTERN.findall(line)
|
||||||
|
}
|
||||||
|
return items
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
total_time = None
|
||||||
|
t0 = time.time()
|
||||||
|
progress_lines = []
|
||||||
|
active = True
|
||||||
|
retVal = {}
|
||||||
|
command = self._command()
|
||||||
|
bytes_done = 0
|
||||||
|
download_speed = 0
|
||||||
|
try:
|
||||||
|
with subprocess.Popen(command,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE) as proc:
|
||||||
|
while active:
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
try:
|
||||||
|
line = proc.stderr.readline().decode("utf-8").strip()
|
||||||
|
if not total_time:
|
||||||
|
total_time = self._fetch_total_duration(line)
|
||||||
|
if "progress=end" in line:
|
||||||
|
try:
|
||||||
|
self._progress(
|
||||||
|
total_time,
|
||||||
|
total_time,
|
||||||
|
bytes_done,
|
||||||
|
download_speed,
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
retVal = {
|
||||||
|
"status": "False",
|
||||||
|
"msg": "Error: KeyboardInterrupt",
|
||||||
|
}
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
except Exception as err:
|
||||||
|
{"status": "False", "msg": f"Error: {err}"}
|
||||||
|
active = False
|
||||||
|
retVal = {"status": "True", "msg": "download"}
|
||||||
|
break
|
||||||
|
if "progress" not in line:
|
||||||
|
progress_lines.append(line)
|
||||||
|
else:
|
||||||
|
lines = "\n".join(progress_lines)
|
||||||
|
items = self._parse_progress(lines)
|
||||||
|
if items:
|
||||||
|
secs = self._fetch_current_duration_done(
|
||||||
|
items.get("out_time"))
|
||||||
|
_tsize = (
|
||||||
|
items.get("total_size").lower().replace(
|
||||||
|
"kb", ""))
|
||||||
|
_brate = (items.get("bitrate").lower().replace(
|
||||||
|
"kbits/s", ""))
|
||||||
|
fps = items.get("fps")
|
||||||
|
bytes_done = float(
|
||||||
|
_tsize) if _tsize != "n/a" else 0
|
||||||
|
download_speed = float(
|
||||||
|
_brate) if _brate != "n/a" else 0
|
||||||
|
try:
|
||||||
|
self._progress(
|
||||||
|
secs,
|
||||||
|
total_time,
|
||||||
|
bytes_done,
|
||||||
|
download_speed,
|
||||||
|
elapsed,
|
||||||
|
fps=fps,
|
||||||
|
)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
retVal = {
|
||||||
|
"status": "False",
|
||||||
|
"msg": "Error: KeyboardInterrupt",
|
||||||
|
}
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
except Exception as err:
|
||||||
|
{"status": "False", "msg": f"Error: {err}"}
|
||||||
|
progress_lines = []
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
active = False
|
||||||
|
retVal = {
|
||||||
|
"status": "False",
|
||||||
|
"msg": "Error: KeyboardInterrupt"
|
||||||
|
}
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
return retVal
|
@ -5,4 +5,6 @@ requests
|
|||||||
python-dotenv
|
python-dotenv
|
||||||
protobuf
|
protobuf
|
||||||
webvtt-py
|
webvtt-py
|
||||||
pysrt
|
pysrt
|
||||||
|
m3u8
|
||||||
|
colorama
|
136
sanitize.py
Normal file
136
sanitize.py
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
# This file is from https://github.com/r0oth3x49/udemy-dl/blob/master/udemy/sanitize.py
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import six
|
||||||
|
import unicodedata
|
||||||
|
from unidecode import unidecode
|
||||||
|
|
||||||
|
|
||||||
|
def smart_text(s, encoding="utf-8", errors="strict"):
|
||||||
|
if isinstance(s, six.text_type):
|
||||||
|
return s
|
||||||
|
|
||||||
|
if not isinstance(s, six.string_types):
|
||||||
|
if six.PY3:
|
||||||
|
if isinstance(s, bytes):
|
||||||
|
s = six.text_type(s, encoding, errors)
|
||||||
|
else:
|
||||||
|
s = six.text_type(s)
|
||||||
|
elif hasattr(s, "__unicode__"):
|
||||||
|
s = six.text_type(s)
|
||||||
|
else:
|
||||||
|
s = six.text_type(bytes(s), encoding, errors)
|
||||||
|
else:
|
||||||
|
s = six.text_type(s)
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
# Extra characters outside of alphanumerics that we'll allow.
|
||||||
|
SLUG_OK = "-_~"
|
||||||
|
|
||||||
|
|
||||||
|
def slugify(s,
|
||||||
|
ok=SLUG_OK,
|
||||||
|
lower=True,
|
||||||
|
spaces=False,
|
||||||
|
only_ascii=False,
|
||||||
|
space_replacement="-"):
|
||||||
|
"""
|
||||||
|
Creates a unicode slug for given string with several options.
|
||||||
|
L and N signify letter/number.
|
||||||
|
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
|
||||||
|
:param s: Your unicode string.
|
||||||
|
:param ok: Extra characters outside of alphanumerics to be allowed.
|
||||||
|
Default is '-_~'
|
||||||
|
:param lower: Lower the output string.
|
||||||
|
Default is True
|
||||||
|
:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
|
||||||
|
:param only_ascii: True to replace non-ASCII unicode characters with
|
||||||
|
their ASCII representations.
|
||||||
|
:param space_replacement: Char used to replace spaces if "spaces" is False.
|
||||||
|
Default is dash ("-") or first char in ok if dash not allowed
|
||||||
|
:type s: String
|
||||||
|
:type ok: String
|
||||||
|
:type lower: Bool
|
||||||
|
:type spaces: Bool
|
||||||
|
:type only_ascii: Bool
|
||||||
|
:type space_replacement: String
|
||||||
|
:return: Slugified unicode string
|
||||||
|
"""
|
||||||
|
|
||||||
|
if only_ascii and ok != SLUG_OK and hasattr(ok, "decode"):
|
||||||
|
try:
|
||||||
|
ok.decode("ascii")
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
raise ValueError(
|
||||||
|
('You can not use "only_ascii=True" with '
|
||||||
|
'a non ascii available chars in "ok" ("%s" given)') % ok)
|
||||||
|
|
||||||
|
rv = []
|
||||||
|
for c in unicodedata.normalize("NFKC", smart_text(s)):
|
||||||
|
cat = unicodedata.category(c)[0]
|
||||||
|
if cat in "LN" or c in ok:
|
||||||
|
rv.append(c)
|
||||||
|
elif cat == "Z": # space
|
||||||
|
rv.append(" ")
|
||||||
|
new = "".join(rv).strip()
|
||||||
|
|
||||||
|
if only_ascii:
|
||||||
|
new = unidecode(new)
|
||||||
|
if not spaces:
|
||||||
|
if space_replacement and space_replacement not in ok:
|
||||||
|
space_replacement = ok[0] if ok else ""
|
||||||
|
new = re.sub("[%s\s]+" % space_replacement, space_replacement, new)
|
||||||
|
if lower:
|
||||||
|
new = new.lower()
|
||||||
|
|
||||||
|
return new
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize(title):
|
||||||
|
_locale = {
|
||||||
|
"194": "A",
|
||||||
|
"199": "C",
|
||||||
|
"286": "G",
|
||||||
|
"304": "I",
|
||||||
|
"206": "I",
|
||||||
|
"214": "O",
|
||||||
|
"350": "S",
|
||||||
|
"219": "U",
|
||||||
|
"226": "a",
|
||||||
|
"231": "c",
|
||||||
|
"287": "g",
|
||||||
|
"305": "i",
|
||||||
|
"238": "i",
|
||||||
|
"246": "o",
|
||||||
|
"351": "s",
|
||||||
|
"251": "u",
|
||||||
|
"191": "",
|
||||||
|
"225": "a",
|
||||||
|
"233": "e",
|
||||||
|
"237": "i",
|
||||||
|
"243": "o",
|
||||||
|
"250": "u",
|
||||||
|
"252": "u",
|
||||||
|
"168u": "u",
|
||||||
|
"241": "n",
|
||||||
|
"193": "A",
|
||||||
|
"201": "E",
|
||||||
|
"205": "I",
|
||||||
|
"211": "O",
|
||||||
|
"218": "U",
|
||||||
|
"220": "U",
|
||||||
|
"168U": "U",
|
||||||
|
"209": "N",
|
||||||
|
"223": "ss",
|
||||||
|
}
|
||||||
|
_temp = "".join([str(ord(i)) if ord(i) > 128 else i for i in title])
|
||||||
|
for _ascii, _char in _locale.items():
|
||||||
|
if _ascii in _temp:
|
||||||
|
_temp = _temp.replace(_ascii, _char)
|
||||||
|
|
||||||
|
ok = re.compile(r'[^\\/:*?"<>]')
|
||||||
|
_title = "".join(x if ok.match(x) else "_" for x in _temp)
|
||||||
|
return _title
|
@ -6,8 +6,8 @@ from pysrt.srttime import SubRipTime
|
|||||||
|
|
||||||
def convert(directory, filename):
|
def convert(directory, filename):
|
||||||
index = 0
|
index = 0
|
||||||
vtt_filepath = os.path.join(directory, f"{filename}.vtt")
|
vtt_filepath = os.path.join(directory, filename + ".vtt")
|
||||||
srt_filepath = os.path.join(directory, f"{filename}.srt")
|
srt_filepath = os.path.join(directory, filename + ".srt")
|
||||||
srt = open(srt_filepath, "w")
|
srt = open(srt_filepath, "w")
|
||||||
|
|
||||||
for caption in WebVTT().read(vtt_filepath):
|
for caption in WebVTT().read(vtt_filepath):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user