mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-04-30 02:14:25 +02:00
Merge branch 'Puyodead1:master' into master
This commit is contained in:
commit
a205ec91bf
@ -1,2 +1 @@
|
||||
UDEMY_BEARER=enter bearer token without the Bearer prefix
|
||||
UDEMY_COURSE_ID=course id goes here
|
||||
UDEMY_BEARER=Your bearer token here
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -119,4 +119,7 @@ test_data.json
|
||||
out_dir
|
||||
working_dir
|
||||
manifest.mpd
|
||||
.vscode
|
||||
.vscode
|
||||
saved
|
||||
*.aria2
|
||||
info.py
|
101
README.md
101
README.md
@ -1,4 +1,5 @@
|
||||
# Udemy Downloader with DRM support
|
||||
|
||||
[](https://forthebadge.com)
|
||||
[](https://forthebadge.com)
|
||||
[](https://forthebadge.com)
|
||||
@ -6,6 +7,7 @@
|
||||

|
||||

|
||||

|
||||
|
||||
# NOTE
|
||||
|
||||
This program is WIP, the code is provided as-is and I am not held resposible for any legal issues resulting from the use of this program.
|
||||
@ -21,11 +23,11 @@ All code is licensed under the MIT license
|
||||
# Description
|
||||
|
||||
Simple program to download a Udemy course, has support for DRM videos but requires the user to aquire the decryption key (for legal reasons).<br>
|
||||
Current only Windows is supported but with some small modifications it should work on linux also (and maybe mac)
|
||||
Windows is the primary development OS, but I've made an effort to support linux also.
|
||||
|
||||
# Requirements
|
||||
|
||||
1. You would need to download `ffmpeg` and `mp4decrypter`from Bento4 SDK and ensure they are in path (typing their name in cmd invokes them).
|
||||
1. You would need to download `ffmpeg`, `aria2c` and `mp4decrypter` (from Bento4 SDK) and ensure they are in path (typing their name in cmd should invoke them).
|
||||
|
||||
# Usage
|
||||
|
||||
@ -35,96 +37,91 @@ You will need to get a few things before you can use this program:
|
||||
|
||||
- Decryption Key ID
|
||||
- Decryption Key
|
||||
- Udemy Course ID
|
||||
- Udemy Bearer Token
|
||||
- Udemy Course URL
|
||||
- Udemy Bearer Token (aka acccess token for udemy-dl users)
|
||||
|
||||
### Setting up
|
||||
|
||||
- rename `.env.sample` to `.env`
|
||||
- rename `.env.sample` to `.env` _(you only need to do this if you plan to use the .env file to store your bearer token)_
|
||||
- rename `keyfile.example.json` to `keyfile.json`
|
||||
|
||||
### Aquire bearer token
|
||||
### Aquire Bearer Token
|
||||
|
||||
- open dev tools
|
||||
- go to network tab
|
||||
- in the search field, enter `api-2.0/courses`
|
||||
- 
|
||||
- click a random request
|
||||
- locate the `Request Headers` section
|
||||
- copy the the text after `Authorization`, it should look like `Bearer xxxxxxxxxxx`
|
||||
- 
|
||||
- enter this in the `.env` file after `UDEMY_BEARER=` (you can also pass this as an argument, see advanced usage for more information)
|
||||
|
||||
### Aquire Course ID
|
||||
|
||||
- Follow above before following this
|
||||
- locate the request url field
|
||||
- 
|
||||
- copy the number after `/api-2.0/courses/` as seen highlighed in the above picture
|
||||
- enter this in the `.env` file after `UDEMY_COURSE_ID=` (you can also pass this as an argument, see advanced usage for more information)
|
||||
- Firefox: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-491903900)
|
||||
- Chrome: [Udemy-DL Guide](https://github.com/r0oth3x49/udemy-dl/issues/389#issuecomment-492569372)
|
||||
- If you want to use the .env file to store your Bearer Token, edit the .env and add your token.
|
||||
|
||||
### Key ID and Key
|
||||
|
||||
It is up to you to aquire the key and key id.
|
||||
It is up to you to aquire the key and key id. Please don't ask me for help acquiring these, decrypting DRM protected content can be considered piracy.
|
||||
|
||||
- Enter the key and key id in the `keyfile.json`
|
||||
- 
|
||||
- 
|
||||
- 
|
||||
|
||||
### Start Downloading
|
||||
|
||||
You can now run `python main.py` to start downloading. The course will download to `out_dir`, chapters are seperated into folders.
|
||||
You can now run the program, see the examples below. The course will download to `out_dir`.
|
||||
|
||||
# Advanced Usage
|
||||
|
||||
```
|
||||
usage: main.py [-h] [-d] [-b BEARER_TOKEN] [-c COURSE_ID] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
|
||||
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
|
||||
[--keep-vtt] [--skip-hls] [--info]
|
||||
|
||||
Udemy Downloader
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-d, --debug Use test_data.json rather than fetch from the udemy api.
|
||||
-c COURSE_URL, --course-url COURSE_URL
|
||||
The URL of the course to download
|
||||
-b BEARER_TOKEN, --bearer BEARER_TOKEN
|
||||
The Bearer token to use
|
||||
-c COURSE_ID, --course-id COURSE_ID
|
||||
The ID of the course to download
|
||||
-q QUALITY, --quality QUALITY
|
||||
Download specific video quality. (144, 360, 480, 720, 1080)
|
||||
-l LANG, --lang LANG The language to download for captions (Default is en)
|
||||
--skip-lectures If specified, lectures won't be downloaded.
|
||||
--download-assets If specified, lecture assets will be downloaded.
|
||||
--download-captions If specified, captions will be downloaded.
|
||||
Download specific video quality. If the requested quality isn't available, the closest quality will be used. If not
|
||||
specified, the best quality will be downloaded for each lecture
|
||||
-l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en')
|
||||
--skip-lectures If specified, lectures won't be downloaded
|
||||
--download-assets If specified, lecture assets will be downloaded
|
||||
--download-captions If specified, captions will be downloaded
|
||||
--keep-vtt If specified, .vtt files won't be removed
|
||||
--skip-hls If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm
|
||||
lectures)
|
||||
--info If specified, only course information will be printed, nothing will be downloaded
|
||||
```
|
||||
|
||||
- Passing a Bearer Token and Course ID as an argument
|
||||
- `python main.py -b <Bearer Token> -c <Course ID>`
|
||||
- `python main.py -c <Course URL> -b <Bearer Token>`
|
||||
- `python main.py -c https://www.udemy.com/courses/myawesomecourse -b <Bearer Token>`
|
||||
- Download a specific quality
|
||||
- `python main.py -q 720`
|
||||
- `python main.py -c <Course URL> -q 720`
|
||||
- Download assets along with lectures
|
||||
- `python main.py --download-assets`
|
||||
- `python main.py -c <Course URL> --download-assets`
|
||||
- Download assets and specify a quality
|
||||
- `python main.py -q 360 --download-assets`
|
||||
- `python main.py -c <Course URL> -q 360 --download-assets`
|
||||
- Download captions (Defaults to English)
|
||||
- `python main.py --download-captions`
|
||||
- `python main.py -c <Course URL> --download-captions`
|
||||
- Download captions with specific language
|
||||
- `python main.py --download-captions -l en` - English subtitles
|
||||
- `python main.py --download-captions -l es` - Spanish subtitles
|
||||
- `python main.py --download-captions -l it` - Italian subtitles
|
||||
- `python main.py --download-captions -l pl` - Polish Subtitles
|
||||
- `python main.py --download-captions -l all` - Downloads all subtitles
|
||||
- `python main.py -c <Course URL> --download-captions -l en` - English subtitles
|
||||
- `python main.py -c <Course URL> --download-captions -l es` - Spanish subtitles
|
||||
- `python main.py -c <Course URL> --download-captions -l it` - Italian subtitles
|
||||
- `python main.py -c <Course URL> --download-captions -l pl` - Polish Subtitles
|
||||
- `python main.py -c <Course URL> --download-captions -l all` - Downloads all subtitles
|
||||
- etc
|
||||
- Skip downloading lecture videos
|
||||
- `python main.py --skip-lectures --download-captions` - Downloads only captions
|
||||
- `python main.py --skip-lectures --download-assets` - Downloads only assets
|
||||
|
||||
# Getting an error about "Accepting the latest terms of service"?
|
||||
|
||||
- If you are using Udemy business, you must edit `main.py` and change `udemy.com` to `<portal name>.udemy.com`
|
||||
- `python main.py -c <Course URL> --skip-lectures --download-captions` - Downloads only captions
|
||||
- `python main.py -c <Course URL> --skip-lectures --download-assets` - Downloads only assets
|
||||
- Keep .VTT caption files:
|
||||
- `python main.py -c <Course URL> --download-captions --keep-vtt`
|
||||
- Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
|
||||
- `python main.py -c <Course URL> --skip-hls`
|
||||
- Print course information only:
|
||||
- `python main.py -c <Course URL> --info`
|
||||
|
||||
# Credits
|
||||
|
||||
- https://github.com/Jayapraveen/Drm-Dash-stream-downloader - For the original code which this is based on
|
||||
- https://github.com/alastairmccormack/pywvpssh - For code related to PSSH extraction
|
||||
- https://github.com/alastairmccormack/pymp4parse/ - For code related to mp4 box parsing (used by pywvpssh)
|
||||
- https://github.com/alastairmccormack/pymp4parse - For code related to mp4 box parsing (used by pywvpssh)
|
||||
- https://github.com/lbrayner/vtt-to-srt - For code related to converting subtitles from vtt to srt format
|
||||
- https://github.com/r0oth3x49/udemy-dl - For some of the informaton related to using the udemy api
|
||||
|
@ -1,203 +0,0 @@
|
||||
#dashdrmmultisegmentdownloader
|
||||
import os,requests,shutil,json,glob
|
||||
from mpegdash.parser import MPEGDASHParser
|
||||
from mpegdash.nodes import Descriptor
|
||||
from mpegdash.utils import (
|
||||
parse_attr_value, parse_child_nodes, parse_node_value,
|
||||
write_attr_value, write_child_node, write_node_value
|
||||
)
|
||||
from utils import extract_kid
|
||||
|
||||
#global ids
|
||||
retry = 3
|
||||
download_dir = os.path.join(os.getcwd(), 'out_dir') # set the folder to output
|
||||
working_dir = os.path.join(os.getcwd(), "working_dir") # set the folder to download ephemeral files
|
||||
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
||||
|
||||
if not os.path.exists(working_dir):
|
||||
os.makedirs(working_dir)
|
||||
|
||||
#Get the keys
|
||||
with open(keyfile_path,'r') as keyfile:
|
||||
keyfile = keyfile.read()
|
||||
keyfile = json.loads(keyfile)
|
||||
|
||||
|
||||
#Patching the Mpegdash lib for keyID
|
||||
def __init__(self):
|
||||
self.scheme_id_uri = '' # xs:anyURI (required)
|
||||
self.value = None # xs:string
|
||||
self.id = None # xs:string
|
||||
self.key_id = None # xs:string
|
||||
|
||||
def parse(self, xmlnode):
|
||||
self.scheme_id_uri = parse_attr_value(xmlnode, 'schemeIdUri', str)
|
||||
self.value = parse_attr_value(xmlnode, 'value', str)
|
||||
self.id = parse_attr_value(xmlnode, 'id', str)
|
||||
self.key_id = parse_attr_value(xmlnode, 'cenc:default_KID', str)
|
||||
|
||||
def write(self, xmlnode):
|
||||
write_attr_value(xmlnode, 'schemeIdUri', self.scheme_id_uri)
|
||||
write_attr_value(xmlnode, 'value', self.value)
|
||||
write_attr_value(xmlnode, 'id', self.id)
|
||||
write_attr_value(xmlnode, 'cenc:default_KID', self.key_id)
|
||||
|
||||
Descriptor.__init__ = __init__
|
||||
Descriptor.parse = parse
|
||||
Descriptor.write = write
|
||||
|
||||
def durationtoseconds(period):
|
||||
#Duration format in PTxDxHxMxS
|
||||
if(period[:2] == "PT"):
|
||||
period = period[2:]
|
||||
day = int(period.split("D")[0] if 'D' in period else 0)
|
||||
hour = int(period.split("H")[0].split("D")[-1] if 'H' in period else 0)
|
||||
minute = int(period.split("M")[0].split("H")[-1] if 'M' in period else 0)
|
||||
second = period.split("S")[0].split("M")[-1]
|
||||
print("Total time: " + str(day) + " days " + str(hour) + " hours " + str(minute) + " minutes and " + str(second) + " seconds")
|
||||
total_time = float(str((day * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + (int(second.split('.')[0]))) + '.' + str(int(second.split('.')[-1])))
|
||||
return total_time
|
||||
|
||||
else:
|
||||
print("Duration Format Error")
|
||||
return None
|
||||
|
||||
def download_media(filename,url,epoch = 0):
|
||||
if(os.path.isfile(filename)):
|
||||
print("Segment already downloaded.. skipping..")
|
||||
else:
|
||||
media = requests.get(url, stream=True)
|
||||
media_length = int(media.headers.get("content-length"))
|
||||
if media.status_code == 200:
|
||||
if(os.path.isfile(filename) and os.path.getsize(filename) >= media_length):
|
||||
print("Segment already downloaded.. skipping write to disk..")
|
||||
else:
|
||||
try:
|
||||
with open(filename, 'wb') as video_file:
|
||||
shutil.copyfileobj(media.raw, video_file)
|
||||
print("Segment downloaded: " + filename)
|
||||
return False #Successfully downloaded the file
|
||||
except:
|
||||
print("Connection error: Reattempting download of segment..")
|
||||
download_media(filename,url, epoch + 1)
|
||||
|
||||
if os.path.getsize(filename) >= media_length:
|
||||
pass
|
||||
else:
|
||||
print("Segment is faulty.. Redownloading...")
|
||||
download_media(filename,url, epoch + 1)
|
||||
elif(media.status_code == 404):
|
||||
print("Probably end hit!\n",url)
|
||||
return True #Probably hit the last of the file
|
||||
else:
|
||||
if (epoch > retry):
|
||||
exit("Error fetching segment, exceeded retry times.")
|
||||
print("Error fetching segment file.. Redownloading...")
|
||||
download_media(filename,url, epoch + 1)
|
||||
|
||||
def cleanup(path):
|
||||
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
|
||||
mpd_files = glob.glob(path + '/*.mpd', recursive=True)
|
||||
leftover_files = leftover_files + mpd_files
|
||||
for file_list in leftover_files:
|
||||
try:
|
||||
os.remove(file_list)
|
||||
except OSError:
|
||||
print(f"Error deleting file: {file_list}")
|
||||
|
||||
def mux_process(video_title,outfile):
|
||||
if os.name == "nt":
|
||||
command = f"ffmpeg -y -i decrypted_audio.mp4 -i decrypted_video.mp4 -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z \"{outfile}.mp4\""
|
||||
else:
|
||||
command = f"nice -n 7 ffmpeg -y -i decrypted_audio.mp4 -i decrypted_video.mp4 -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{video_title}\" -metadata creation_time=2020-00-00T70:05:30.000000Z {outfile}.mp4"
|
||||
os.system(command)
|
||||
|
||||
def decrypt(kid,filename):
|
||||
try:
|
||||
key = keyfile[kid.lower()]
|
||||
except KeyError as error:
|
||||
exit("Key not found")
|
||||
if(os.name == "nt"):
|
||||
os.system(f"mp4decrypt --key 1:{key} encrypted_{filename}.mp4 decrypted_{filename}.mp4")
|
||||
else:
|
||||
os.system(f"nice -n 7 mp4decrypt --key 1:{key} encrypted_{filename}.mp4 decrypted_{filename}.mp4")
|
||||
|
||||
|
||||
def handle_irregular_segments(media_info,video_title,output_path):
|
||||
no_segment,video_url,video_init,video_extension,no_segment,audio_url,audio_init,audio_extension = media_info
|
||||
download_media("video_0.seg.mp4",video_init)
|
||||
video_kid = extract_kid("video_0.seg.mp4")
|
||||
print("KID for video file is: " + video_kid)
|
||||
download_media("audio_0.seg.mp4",audio_init)
|
||||
audio_kid = extract_kid("audio_0.seg.mp4")
|
||||
print("KID for audio file is: " + audio_kid)
|
||||
for count in range(1,no_segment):
|
||||
video_segment_url = video_url.replace("$Number$",str(count))
|
||||
audio_segment_url = audio_url.replace("$Number$",str(count))
|
||||
video_status = download_media(f"video_{str(count)}.seg.{video_extension}",video_segment_url)
|
||||
audio_status = download_media(f"audio_{str(count)}.seg.{audio_extension}",audio_segment_url)
|
||||
if(video_status):
|
||||
if os.name == "nt":
|
||||
video_concat_command = "copy /b " + "+".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " encrypted_video.mp4"
|
||||
audio_concat_command = "copy /b " + "+".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " encrypted_audio.mp4"
|
||||
else:
|
||||
video_concat_command = "cat " + " ".join([f"video_{i}.seg.{video_extension}" for i in range(0,count)]) + " > encrypted_video.mp4"
|
||||
audio_concat_command = "cat " + " ".join([f"audio_{i}.seg.{audio_extension}" for i in range(0,count)]) + " > encrypted_audio.mp4"
|
||||
print(video_concat_command)
|
||||
print(audio_concat_command)
|
||||
os.system(video_concat_command)
|
||||
os.system(audio_concat_command)
|
||||
decrypt(video_kid,"video")
|
||||
decrypt(audio_kid,"audio")
|
||||
mux_process(video_title,output_path)
|
||||
break
|
||||
|
||||
|
||||
def manifest_parser(mpd_url):
|
||||
video = []
|
||||
audio = []
|
||||
manifest = requests.get(mpd_url).text
|
||||
with open("manifest.mpd",'w') as manifest_handler:
|
||||
manifest_handler.write(manifest)
|
||||
mpd = MPEGDASHParser.parse("./manifest.mpd")
|
||||
running_time = durationtoseconds(mpd.media_presentation_duration)
|
||||
for period in mpd.periods:
|
||||
for adapt_set in period.adaptation_sets:
|
||||
print("Processing " + adapt_set.mime_type)
|
||||
content_type = adapt_set.mime_type
|
||||
repr = adapt_set.representations[-1] # Max Quality
|
||||
for segment in repr.segment_templates:
|
||||
if(segment.duration):
|
||||
print("Media segments are of equal timeframe")
|
||||
segment_time = segment.duration / segment.timescale
|
||||
total_segments = running_time / segment_time
|
||||
else:
|
||||
print("Media segments are of inequal timeframe")
|
||||
|
||||
approx_no_segments = round(running_time / 6) + 20 # aproximate of 6 sec per segment
|
||||
print("Expected No of segments:",approx_no_segments)
|
||||
if(content_type == "audio/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
audio.append(approx_no_segments)
|
||||
audio.append(segment.media)
|
||||
audio.append(segment.initialization)
|
||||
audio.append(segment_extension)
|
||||
elif(content_type == "video/mp4"):
|
||||
segment_extension = segment.media.split(".")[-1]
|
||||
video.append(approx_no_segments)
|
||||
video.append(segment.media)
|
||||
video.append(segment.initialization)
|
||||
video.append(segment_extension)
|
||||
return video + audio
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mpd = "mpd url"
|
||||
base_url = mpd.split("index.mpd")[0]
|
||||
os.chdir(working_dir)
|
||||
media_info = manifest_parser(mpd)
|
||||
video_title = "175. Inverse Transforming Vectors" # the video title that gets embeded into the mp4 file metadata
|
||||
output_path = os.path.join(download_dir, "175. Inverse Transforming Vectors") # video title used in the filename, dont append .mp4
|
||||
handle_irregular_segments(media_info,video_title,output_path)
|
||||
cleanup(working_dir)
|
277
pyffmpeg.py
Normal file
277
pyffmpeg.py
Normal file
@ -0,0 +1,277 @@
|
||||
#!/usr/bin/python3
|
||||
# pylint: disable=R,C,W,E
|
||||
"""
|
||||
Author : Nasir Khan (r0ot h3x49)
|
||||
Github : https://github.com/r0oth3x49
|
||||
License : MIT
|
||||
Copyright (c) 2018-2025 Nasir Khan (r0ot h3x49)
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
|
||||
ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
|
||||
THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
import re
|
||||
import time
|
||||
import subprocess
|
||||
import sys
|
||||
from colorama import Fore, Style
|
||||
|
||||
|
||||
class FFMPeg:
|
||||
|
||||
_PROGRESS_PATTERN = re.compile(
|
||||
r"(frame|fps|total_size|out_time|bitrate|speed|progress)\s*\=\s*(\S+)")
|
||||
|
||||
def __init__(self,
|
||||
duration,
|
||||
url,
|
||||
token,
|
||||
filepath,
|
||||
quiet=False,
|
||||
callback=lambda *x: None):
|
||||
self.url = url
|
||||
self.filepath = filepath
|
||||
self.quiet = quiet
|
||||
self.duration = duration
|
||||
self.callback = callback
|
||||
self.token = token
|
||||
|
||||
def _command(self):
|
||||
"""
|
||||
ffmpeg.exe -headers "Authorization: Bearer {token}" -i "" -c copy -bsf:a aac_adtstoasc out.mp4
|
||||
"""
|
||||
command = [
|
||||
"ffmpeg",
|
||||
"-headers",
|
||||
f"Authorization: Bearer {self.token}",
|
||||
"-i",
|
||||
f"{self.url}",
|
||||
"-c",
|
||||
"copy",
|
||||
"-bsf:a",
|
||||
"aac_adtstoasc",
|
||||
f"{self.filepath}",
|
||||
"-y",
|
||||
"-progress",
|
||||
"pipe:2",
|
||||
]
|
||||
return command
|
||||
|
||||
def _fetch_total_duration(self, line):
|
||||
duration_in_secs = 0
|
||||
duration_regex = re.compile(
|
||||
r"Duration: (\d{2}):(\d{2}):(\d{2})\.\d{2}")
|
||||
mobj = duration_regex.search(line)
|
||||
if mobj:
|
||||
duration_tuple = mobj.groups()
|
||||
duration_in_secs = (int(duration_tuple[0]) * 60 +
|
||||
int(duration_tuple[1]) * 60 +
|
||||
int(duration_tuple[2]))
|
||||
else:
|
||||
duration_in_secs = self.duration
|
||||
return duration_in_secs
|
||||
|
||||
def _fetch_current_duration_done(self, time_str):
|
||||
time_str = time_str.split(":")
|
||||
return (int(time_str[0]) * 60 + int(time_str[1]) * 60 +
|
||||
int(time_str[2].split(".")[0]))
|
||||
|
||||
def _prepare_time_str(self, secs):
|
||||
(mins, secs) = divmod(secs, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
time_str = "--:--:--"
|
||||
if hours == 0:
|
||||
time_str = "%02d:%02ds" % (mins, secs)
|
||||
else:
|
||||
time_str = "%02d:%02d:%02ds" % (hours, mins, secs)
|
||||
return time_str
|
||||
|
||||
def _progress(self,
|
||||
iterations,
|
||||
total,
|
||||
bytesdone,
|
||||
speed,
|
||||
elapsed,
|
||||
bar_length=30,
|
||||
fps=None):
|
||||
offset = 0
|
||||
filled_length = int(round(bar_length * iterations / float(total)))
|
||||
percents = format(100.00 * (iterations * 1.0 / float(total)), ".2f")
|
||||
|
||||
if bytesdone <= 1048576:
|
||||
_receiving = round(float(bytesdone) / 1024.00, 2)
|
||||
_received = format(
|
||||
_receiving if _receiving < 1024.00 else _receiving / 1024.00,
|
||||
".2f")
|
||||
suffix_recvd = "KB" if _receiving < 1024.00 else "MB"
|
||||
else:
|
||||
_receiving = round(float(bytesdone) / 1048576, 2)
|
||||
_received = format(
|
||||
_receiving if _receiving < 1024.00 else _receiving / 1024.00,
|
||||
".2f")
|
||||
suffix_recvd = "MB" if _receiving < 1024.00 else "GB"
|
||||
|
||||
suffix_rate = "Kb/s" if speed < 1024.00 else "Mb/s"
|
||||
if fps:
|
||||
suffix_rate += f" {fps}/fps"
|
||||
if elapsed:
|
||||
rate = ((float(iterations) - float(offset)) / 1024.0) / elapsed
|
||||
eta = (total - iterations) / (rate * 1024.0)
|
||||
else:
|
||||
rate = 0
|
||||
eta = 0
|
||||
rate = format(speed if speed < 1024.00 else speed / 1024.00, ".2f")
|
||||
(mins, secs) = divmod(eta, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
eta = "--:--:--"
|
||||
if hours == 0:
|
||||
eta = "eta %02d:%02ds" % (mins, secs)
|
||||
else:
|
||||
eta = "eta %02d:%02d:%02ds" % (hours, mins, secs)
|
||||
if secs == 0:
|
||||
eta = "\n"
|
||||
|
||||
total_time = self._prepare_time_str(total)
|
||||
done_time = self._prepare_time_str(iterations)
|
||||
downloaded = f"{total_time}/{done_time}"
|
||||
|
||||
received_bytes = str(_received) + str(suffix_recvd)
|
||||
percents = f"{received_bytes} {percents}"
|
||||
|
||||
self.hls_progress(
|
||||
downloaded=downloaded,
|
||||
percents=percents,
|
||||
filled_length=filled_length,
|
||||
rate=str(rate) + str(suffix_rate),
|
||||
suffix=eta,
|
||||
bar_length=bar_length,
|
||||
)
|
||||
|
||||
def hls_progress(self,
|
||||
downloaded,
|
||||
percents,
|
||||
filled_length,
|
||||
rate,
|
||||
suffix,
|
||||
bar_length=30):
|
||||
bar = (Fore.CYAN + Style.DIM + "#" * filled_length + Fore.WHITE +
|
||||
Style.DIM + "-" * (bar_length - filled_length))
|
||||
sys.stdout.write(
|
||||
"\033[2K\033[1G\r\r{}{}[{}{}*{}{}] : {}{}{} {}% |{}{}{}| {} {}".
|
||||
format(
|
||||
Fore.CYAN,
|
||||
Style.DIM,
|
||||
Fore.MAGENTA,
|
||||
Style.BRIGHT,
|
||||
Fore.CYAN,
|
||||
Style.DIM,
|
||||
Fore.GREEN,
|
||||
Style.BRIGHT,
|
||||
downloaded,
|
||||
percents,
|
||||
bar,
|
||||
Fore.GREEN,
|
||||
Style.BRIGHT,
|
||||
rate,
|
||||
suffix,
|
||||
))
|
||||
sys.stdout.flush()
|
||||
|
||||
def _parse_progress(self, line):
|
||||
items = {
|
||||
key: value
|
||||
for key, value in self._PROGRESS_PATTERN.findall(line)
|
||||
}
|
||||
return items
|
||||
|
||||
def download(self):
|
||||
total_time = None
|
||||
t0 = time.time()
|
||||
progress_lines = []
|
||||
active = True
|
||||
retVal = {}
|
||||
command = self._command()
|
||||
bytes_done = 0
|
||||
download_speed = 0
|
||||
try:
|
||||
with subprocess.Popen(command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE) as proc:
|
||||
while active:
|
||||
elapsed = time.time() - t0
|
||||
try:
|
||||
line = proc.stderr.readline().decode("utf-8").strip()
|
||||
if not total_time:
|
||||
total_time = self._fetch_total_duration(line)
|
||||
if "progress=end" in line:
|
||||
try:
|
||||
self._progress(
|
||||
total_time,
|
||||
total_time,
|
||||
bytes_done,
|
||||
download_speed,
|
||||
elapsed,
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
retVal = {
|
||||
"status": "False",
|
||||
"msg": "Error: KeyboardInterrupt",
|
||||
}
|
||||
raise KeyboardInterrupt
|
||||
except Exception as err:
|
||||
{"status": "False", "msg": f"Error: {err}"}
|
||||
active = False
|
||||
retVal = {"status": "True", "msg": "download"}
|
||||
break
|
||||
if "progress" not in line:
|
||||
progress_lines.append(line)
|
||||
else:
|
||||
lines = "\n".join(progress_lines)
|
||||
items = self._parse_progress(lines)
|
||||
if items:
|
||||
secs = self._fetch_current_duration_done(
|
||||
items.get("out_time"))
|
||||
_tsize = (
|
||||
items.get("total_size").lower().replace(
|
||||
"kb", ""))
|
||||
_brate = (items.get("bitrate").lower().replace(
|
||||
"kbits/s", ""))
|
||||
fps = items.get("fps")
|
||||
bytes_done = float(
|
||||
_tsize) if _tsize != "n/a" else 0
|
||||
download_speed = float(
|
||||
_brate) if _brate != "n/a" else 0
|
||||
try:
|
||||
self._progress(
|
||||
secs,
|
||||
total_time,
|
||||
bytes_done,
|
||||
download_speed,
|
||||
elapsed,
|
||||
fps=fps,
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
retVal = {
|
||||
"status": "False",
|
||||
"msg": "Error: KeyboardInterrupt",
|
||||
}
|
||||
raise KeyboardInterrupt
|
||||
except Exception as err:
|
||||
{"status": "False", "msg": f"Error: {err}"}
|
||||
progress_lines = []
|
||||
except KeyboardInterrupt:
|
||||
active = False
|
||||
retVal = {
|
||||
"status": "False",
|
||||
"msg": "Error: KeyboardInterrupt"
|
||||
}
|
||||
raise KeyboardInterrupt
|
||||
except KeyboardInterrupt:
|
||||
raise KeyboardInterrupt
|
||||
return retVal
|
@ -5,4 +5,6 @@ requests
|
||||
python-dotenv
|
||||
protobuf
|
||||
webvtt-py
|
||||
pysrt
|
||||
pysrt
|
||||
m3u8
|
||||
colorama
|
136
sanitize.py
Normal file
136
sanitize.py
Normal file
@ -0,0 +1,136 @@
|
||||
# This file is from https://github.com/r0oth3x49/udemy-dl/blob/master/udemy/sanitize.py
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import six
|
||||
import unicodedata
|
||||
from unidecode import unidecode
|
||||
|
||||
|
||||
def smart_text(s, encoding="utf-8", errors="strict"):
|
||||
if isinstance(s, six.text_type):
|
||||
return s
|
||||
|
||||
if not isinstance(s, six.string_types):
|
||||
if six.PY3:
|
||||
if isinstance(s, bytes):
|
||||
s = six.text_type(s, encoding, errors)
|
||||
else:
|
||||
s = six.text_type(s)
|
||||
elif hasattr(s, "__unicode__"):
|
||||
s = six.text_type(s)
|
||||
else:
|
||||
s = six.text_type(bytes(s), encoding, errors)
|
||||
else:
|
||||
s = six.text_type(s)
|
||||
return s
|
||||
|
||||
|
||||
# Extra characters outside of alphanumerics that we'll allow.
|
||||
SLUG_OK = "-_~"
|
||||
|
||||
|
||||
def slugify(s,
|
||||
ok=SLUG_OK,
|
||||
lower=True,
|
||||
spaces=False,
|
||||
only_ascii=False,
|
||||
space_replacement="-"):
|
||||
"""
|
||||
Creates a unicode slug for given string with several options.
|
||||
L and N signify letter/number.
|
||||
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
|
||||
:param s: Your unicode string.
|
||||
:param ok: Extra characters outside of alphanumerics to be allowed.
|
||||
Default is '-_~'
|
||||
:param lower: Lower the output string.
|
||||
Default is True
|
||||
:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
|
||||
:param only_ascii: True to replace non-ASCII unicode characters with
|
||||
their ASCII representations.
|
||||
:param space_replacement: Char used to replace spaces if "spaces" is False.
|
||||
Default is dash ("-") or first char in ok if dash not allowed
|
||||
:type s: String
|
||||
:type ok: String
|
||||
:type lower: Bool
|
||||
:type spaces: Bool
|
||||
:type only_ascii: Bool
|
||||
:type space_replacement: String
|
||||
:return: Slugified unicode string
|
||||
"""
|
||||
|
||||
if only_ascii and ok != SLUG_OK and hasattr(ok, "decode"):
|
||||
try:
|
||||
ok.decode("ascii")
|
||||
except UnicodeEncodeError:
|
||||
raise ValueError(
|
||||
('You can not use "only_ascii=True" with '
|
||||
'a non ascii available chars in "ok" ("%s" given)') % ok)
|
||||
|
||||
rv = []
|
||||
for c in unicodedata.normalize("NFKC", smart_text(s)):
|
||||
cat = unicodedata.category(c)[0]
|
||||
if cat in "LN" or c in ok:
|
||||
rv.append(c)
|
||||
elif cat == "Z": # space
|
||||
rv.append(" ")
|
||||
new = "".join(rv).strip()
|
||||
|
||||
if only_ascii:
|
||||
new = unidecode(new)
|
||||
if not spaces:
|
||||
if space_replacement and space_replacement not in ok:
|
||||
space_replacement = ok[0] if ok else ""
|
||||
new = re.sub("[%s\s]+" % space_replacement, space_replacement, new)
|
||||
if lower:
|
||||
new = new.lower()
|
||||
|
||||
return new
|
||||
|
||||
|
||||
def sanitize(title):
|
||||
_locale = {
|
||||
"194": "A",
|
||||
"199": "C",
|
||||
"286": "G",
|
||||
"304": "I",
|
||||
"206": "I",
|
||||
"214": "O",
|
||||
"350": "S",
|
||||
"219": "U",
|
||||
"226": "a",
|
||||
"231": "c",
|
||||
"287": "g",
|
||||
"305": "i",
|
||||
"238": "i",
|
||||
"246": "o",
|
||||
"351": "s",
|
||||
"251": "u",
|
||||
"191": "",
|
||||
"225": "a",
|
||||
"233": "e",
|
||||
"237": "i",
|
||||
"243": "o",
|
||||
"250": "u",
|
||||
"252": "u",
|
||||
"168u": "u",
|
||||
"241": "n",
|
||||
"193": "A",
|
||||
"201": "E",
|
||||
"205": "I",
|
||||
"211": "O",
|
||||
"218": "U",
|
||||
"220": "U",
|
||||
"168U": "U",
|
||||
"209": "N",
|
||||
"223": "ss",
|
||||
}
|
||||
_temp = "".join([str(ord(i)) if ord(i) > 128 else i for i in title])
|
||||
for _ascii, _char in _locale.items():
|
||||
if _ascii in _temp:
|
||||
_temp = _temp.replace(_ascii, _char)
|
||||
|
||||
ok = re.compile(r'[^\\/:*?"<>]')
|
||||
_title = "".join(x if ok.match(x) else "_" for x in _temp)
|
||||
return _title
|
@ -6,8 +6,8 @@ from pysrt.srttime import SubRipTime
|
||||
|
||||
def convert(directory, filename):
|
||||
index = 0
|
||||
vtt_filepath = os.path.join(directory, f"{filename}.vtt")
|
||||
srt_filepath = os.path.join(directory, f"{filename}.srt")
|
||||
vtt_filepath = os.path.join(directory, filename + ".vtt")
|
||||
srt_filepath = os.path.join(directory, filename + ".srt")
|
||||
srt = open(srt_filepath, "w")
|
||||
|
||||
for caption in WebVTT().read(vtt_filepath):
|
||||
|
Loading…
x
Reference in New Issue
Block a user