Merge pull request #27 from Puyodead1/feat-ytdlp

Feat: YT-DLP
This commit is contained in:
Puyodead1 2021-06-01 10:18:41 -04:00 committed by GitHub
commit 1d43d19a47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 136 additions and 470 deletions

View File

@ -66,8 +66,8 @@ You can now run the program, see the examples below. The course will download to
# Advanced Usage # Advanced Usage
``` ```
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions] usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [-cd CONCURRENT_DOWNLOADS] [--skip-lectures] [--download-assets]
[--keep-vtt] [--skip-hls] [--info] [--download-captions] [--keep-vtt] [--skip-hls] [--info]
Udemy Downloader Udemy Downloader
@ -81,6 +81,8 @@ optional arguments:
Download specific video quality. If the requested quality isn't available, the closest quality will be used. If not Download specific video quality. If the requested quality isn't available, the closest quality will be used. If not
specified, the best quality will be downloaded for each lecture specified, the best quality will be downloaded for each lecture
-l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en') -l LANG, --lang LANG The language to download for captions, specify 'all' to download all captions (Default is 'en')
-cd CONCURRENT_DOWNLOADS, --concurrent-downloads CONCURRENT_DOWNLOADS
The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-50)
--skip-lectures If specified, lectures won't be downloaded --skip-lectures If specified, lectures won't be downloaded
--download-assets If specified, lecture assets will be downloaded --download-assets If specified, lecture assets will be downloaded
--download-captions If specified, captions will be downloaded --download-captions If specified, captions will be downloaded
@ -117,6 +119,9 @@ optional arguments:
- `python main.py -c <Course URL> --skip-hls` - `python main.py -c <Course URL> --skip-hls`
- Print course information only: - Print course information only:
- `python main.py -c <Course URL> --info` - `python main.py -c <Course URL> --info`
- Specify max number of concurrent downloads:
- `python main.py -c <Course URL> --concurrent-downloads 20`
- `python main.py -c <Course URL> -cd 20`
# Credits # Credits

317
main.py
View File

@ -18,8 +18,8 @@ from vtt_to_srt import convert
from requests.exceptions import ConnectionError as conn_error from requests.exceptions import ConnectionError as conn_error
from html.parser import HTMLParser as compat_HTMLParser from html.parser import HTMLParser as compat_HTMLParser
from sanitize import sanitize, slugify, SLUG_OK from sanitize import sanitize, slugify, SLUG_OK
from pyffmpeg import FFMPeg as FFMPEG
import subprocess import subprocess
import yt_dlp
home_dir = os.getcwd() home_dir = os.getcwd()
download_dir = os.path.join(os.getcwd(), "out_dir") download_dir = os.path.join(os.getcwd(), "out_dir")
@ -237,19 +237,17 @@ class Udemy:
return _temp return _temp
def _extract_media_sources(self, sources): def _extract_media_sources(self, sources):
_audio = [] _temp = []
_video = []
if sources and isinstance(sources, list): if sources and isinstance(sources, list):
for source in sources: for source in sources:
_type = source.get("type") _type = source.get("type")
src = source.get("src") src = source.get("src")
if _type == "application/dash+xml": if _type == "application/dash+xml":
video, audio = self._extract_mpd(src) out = self._extract_mpd(src)
if video and audio: if out:
_video.extend(video) _temp.extend(out)
_audio.extend(audio) return _temp
return (_video, _audio)
def _extract_subtitles(self, tracks): def _extract_subtitles(self, tracks):
_temp = [] _temp = []
@ -308,83 +306,49 @@ class Udemy:
return _temp return _temp
def _extract_mpd(self, url): def _extract_mpd(self, url):
"""extract mpd streams""" """extracts mpd streams"""
_video = [] _temp = []
_audio = []
try: try:
resp = self.session._get(url) ytdl = yt_dlp.YoutubeDL({
resp.raise_for_status() 'quiet': True,
raw_data = resp.text 'no_warnings': True,
mpd_object = MPEGDASHParser.parse(raw_data) "allow_unplayable_formats": True
})
results = ytdl.extract_info(url,
download=False,
force_generic_extractor=True)
seen = set() seen = set()
for period in mpd_object.periods: formats = results.get("formats")
for adapt_set in period.adaptation_sets:
content_type = adapt_set.mime_type
if content_type == "video/mp4":
for rep in adapt_set.representations:
for segment in rep.segment_templates:
segment_count = 1
timeline = segment.segment_timelines[0]
segment_count += len(timeline.Ss)
for s in timeline.Ss:
if s.r:
segment_count += s.r
segment_extension = segment.media.split( format_id = results.get("format_id")
".")[-1] best_audio_format_id = format_id.split("+")[1]
height = rep.height best_audio = next((x for x in formats
width = rep.width if x.get("format_id") == best_audio_format_id),
None)
for f in formats:
if "video" in f.get("format_note"):
# is a video stream
format_id = f.get("format_id")
extension = f.get("ext")
height = f.get("height")
width = f.get("width")
if height not in seen: if height and height not in seen:
seen.add(height) seen.add(height)
_video.append({ _temp.append({
"type": "type": "dash",
"dash", "height": str(height),
"content_type": "width": str(width),
"video", "format_id": f"{format_id},{best_audio_format_id}",
"height": "extension": extension,
height, "download_url": f.get("manifest_url")
"width": })
width, else:
"extension": # unknown format type
segment_extension, continue
"segment_count":
segment_count,
"media":
segment.media,
"initialization":
segment.initialization
})
elif content_type == "audio/mp4":
for rep in adapt_set.representations:
for segment in rep.segment_templates:
segment_count = 1
timeline = segment.segment_timelines[0]
segment_count += len(timeline.Ss)
for s in timeline.Ss:
if s.r:
segment_count += s.r
segment_extension = segment.media.split(
".")[-1]
_audio.append({
"type":
"dash",
"content_type":
"audio",
"extension":
segment_extension,
"segment_count":
segment_count,
"media":
segment.media,
"initialization":
segment.initialization
})
except Exception as error: except Exception as error:
print(f"Udemy Says : '{error}' while fetching mpd manifest") print(f"Error fetching MPD streams: '{error}'")
return (_video, _audio) return _temp
def extract_course_name(self, url): def extract_course_name(self, url):
""" """
@ -865,24 +829,20 @@ def cleanup(path):
os.removedirs(path) os.removedirs(path)
def mux_process(video_title, lecture_working_dir, output_path): def mux_process(video_title, video_filepath, audio_filepath, output_path):
""" """
@author Jayapraveen @author Jayapraveen
""" """
if os.name == "nt": if os.name == "nt":
command = "ffmpeg -y -i \"{}\" -i \"{}\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format( command = "ffmpeg -y -i \"{}\" -i \"{}\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
os.path.join(lecture_working_dir, "decrypted_audio.mp4"), video_filepath, audio_filepath, video_title, output_path)
os.path.join(lecture_working_dir, "decrypted_video.mp4"),
video_title, output_path)
else: else:
command = "nice -n 7 ffmpeg -y -i \"{}\" -i \"{}\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format( command = "nice -n 7 ffmpeg -y -i \"{}\" -i \"{}\" -acodec copy -vcodec copy -fflags +bitexact -map_metadata -1 -metadata title=\"{}\" \"{}\"".format(
os.path.join(lecture_working_dir, "decrypted_audio.mp4"), video_filepath, audio_filepath, video_title, output_path)
os.path.join(lecture_working_dir, "decrypted_video.mp4"),
video_title, output_path)
os.system(command) os.system(command)
def decrypt(kid, filename, lecture_working_dir): def decrypt(kid, in_filepath, out_filepath):
""" """
@author Jayapraveen @author Jayapraveen
""" """
@ -891,101 +851,49 @@ def decrypt(kid, filename, lecture_working_dir):
key = keyfile[kid.lower()] key = keyfile[kid.lower()]
if (os.name == "nt"): if (os.name == "nt"):
os.system(f"mp4decrypt --key 1:%s \"%s\" \"%s\"" % os.system(f"mp4decrypt --key 1:%s \"%s\" \"%s\"" %
(key, (key, in_filepath, out_filepath))
os.path.join(lecture_working_dir,
"encrypted_{}.mp4".format(filename)),
os.path.join(lecture_working_dir,
"decrypted_{}.mp4".format(filename))))
else: else:
os.system(f"nice -n 7 mp4decrypt --key 1:%s \"%s\" \"%s\"" % os.system(f"nice -n 7 mp4decrypt --key 1:%s \"%s\" \"%s\"" %
(key, (key, in_filepath, out_filepath))
os.path.join(lecture_working_dir,
"encrypted_{}.mp4".format(filename)),
os.path.join(lecture_working_dir,
"decrypted_{}.mp4".format(filename))))
print("> Decryption complete") print("> Decryption complete")
except KeyError: except KeyError:
raise KeyError("Key not found") raise KeyError("Key not found")
def handle_segments(video_source, audio_source, video_title, def handle_segments(url, format_id, video_title, lecture_working_dir,
lecture_working_dir, output_path): output_path, concurrent_connections):
""" temp_filepath = output_path.replace(".mp4", "")
@author Jayapraveen temp_filepath = temp_filepath + ".mpd-part"
""" video_filepath_enc = temp_filepath + ".mp4"
no_vid_segments = video_source.get("segment_count") audio_filepath_enc = temp_filepath + ".m4a"
no_aud_segments = audio_source.get("segment_count") video_filepath_dec = temp_filepath + ".decrypted.mp4"
audio_filepath_dec = temp_filepath + ".decrypted.m4a"
audio_media = audio_source.get("media") print("> Downloading Lecture Tracks...")
audio_init = audio_source.get("initialization")
audio_extension = audio_source.get("extension")
video_media = video_source.get("media")
video_init = video_source.get("initialization")
video_extension = video_source.get("extension")
audio_urls = audio_init + "\n dir={}\n out=audio_0.mp4\n".format(
lecture_working_dir)
video_urls = video_init + "\n dir={}\n out=video_0.mp4\n".format(
lecture_working_dir)
list_path = os.path.join(lecture_working_dir, "list.txt")
for i in range(1, no_aud_segments):
audio_urls += audio_media.replace(
"$Number$", str(i)) + "\n dir={}\n out=audio_{}.mp4\n".format(
lecture_working_dir, i)
for i in range(1, no_vid_segments):
video_urls += video_media.replace(
"$Number$", str(i)) + "\n dir={}\n out=video_{}.mp4\n".format(
lecture_working_dir, i)
with open(list_path, 'w') as f:
f.write("{}\n{}".format(audio_urls, video_urls))
f.close()
print("> Downloading Lecture Segments...")
ret_code = subprocess.Popen([ ret_code = subprocess.Popen([
"aria2c", "-i", list_path, "-j16", "-s20", "-x16", "-c", "yt-dlp", "--force-generic-extractor", "--allow-unplayable-formats",
"--auto-file-renaming=false", "--summary-interval=0" "--concurrent-fragments", f"{concurrent_connections}", "--downloader",
"aria2c", "--fixup", "never", "-k", "-o", f"{temp_filepath}.%(ext)s",
"-f", format_id, f"{url}"
]).wait() ]).wait()
print("> Lecture Segments Downloaded") print("> Lecture Tracks Downloaded")
print("Return code: " + str(ret_code)) print("Return code: " + str(ret_code))
os.remove(list_path) video_kid = extract_kid(video_filepath_enc)
video_kid = extract_kid(os.path.join(lecture_working_dir, "video_0.mp4"))
print("KID for video file is: " + video_kid) print("KID for video file is: " + video_kid)
audio_kid = extract_kid(os.path.join(lecture_working_dir, "audio_0.mp4")) audio_kid = extract_kid(audio_filepath_enc)
print("KID for audio file is: " + audio_kid) print("KID for audio file is: " + audio_kid)
os.chdir(lecture_working_dir)
if os.name == "nt":
video_concat_command = "copy /b " + "+".join([
f"video_{i}.{video_extension}" for i in range(0, no_vid_segments)
]) + " encrypted_video.mp4"
audio_concat_command = "copy /b " + "+".join([
f"audio_{i}.{audio_extension}" for i in range(0, no_aud_segments)
]) + " encrypted_audio.mp4"
else:
video_concat_command = "cat " + " ".join([
f"video_{i}.{video_extension}" for i in range(0, no_aud_segments)
]) + " > encrypted_video.mp4"
audio_concat_command = "cat " + " ".join([
f"audio_{i}.{audio_extension}" for i in range(0, no_vid_segments)
]) + " > encrypted_audio.mp4"
os.system(video_concat_command)
os.system(audio_concat_command)
os.chdir(home_dir)
try: try:
decrypt(video_kid, "video", lecture_working_dir) decrypt(video_kid, video_filepath_enc, video_filepath_dec)
decrypt(audio_kid, "audio", lecture_working_dir) decrypt(audio_kid, audio_filepath_enc, audio_filepath_dec)
os.chdir(home_dir) mux_process(video_title, video_filepath_dec, audio_filepath_dec,
mux_process(video_title, lecture_working_dir, output_path) output_path)
cleanup(lecture_working_dir) os.remove(video_filepath_enc)
os.remove(audio_filepath_enc)
os.remove(video_filepath_dec)
os.remove(audio_filepath_dec)
except Exception as e: except Exception as e:
print(f"Error: ", e) print(f"Error: ", e)
@ -1113,31 +1021,31 @@ def process_caption(caption, lecture_title, lecture_dir, keep_vtt, tries=0):
print(f" > Error converting caption: {e}") print(f" > Error converting caption: {e}")
def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token): def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token,
concurrent_connections):
lecture_title = lecture.get("lecture_title") lecture_title = lecture.get("lecture_title")
is_encrypted = lecture.get("is_encrypted") is_encrypted = lecture.get("is_encrypted")
lecture_video_sources = lecture.get("video_sources") lecture_sources = lecture.get("video_sources")
lecture_audio_sources = lecture.get("audio_sources")
if is_encrypted: if is_encrypted:
if len(lecture_audio_sources) > 0 and len(lecture_video_sources) > 0: if len(lecture_sources) > 0:
lecture_working_dir = os.path.join(working_dir, lecture_working_dir = os.path.join(working_dir,
str(lecture.get("asset_id"))) str(lecture.get("asset_id")))
if not os.path.isfile(lecture_path): if not os.path.isfile(lecture_path):
video_source = lecture_video_sources[ source = lecture_sources[-1] # last index is the best quality
-1] # last index is the best quality
audio_source = lecture_audio_sources[-1]
if isinstance(quality, int): if isinstance(quality, int):
video_source = min( source = min(
lecture_video_sources, lecture_sources,
key=lambda x: abs(int(x.get("height")) - quality)) key=lambda x: abs(int(x.get("height")) - quality))
if not os.path.exists(lecture_working_dir): if not os.path.exists(lecture_working_dir):
os.mkdir(lecture_working_dir) os.mkdir(lecture_working_dir)
print(f" > Lecture '%s' has DRM, attempting to download" % print(f" > Lecture '%s' has DRM, attempting to download" %
lecture_title) lecture_title)
handle_segments(video_source, audio_source, lecture_title, handle_segments(source.get("download_url"),
lecture_working_dir, lecture_path) source.get("format_id"), lecture_title,
lecture_working_dir, lecture_path,
concurrent_connections)
else: else:
print( print(
" > Lecture '%s' is already downloaded, skipping..." % " > Lecture '%s' is already downloaded, skipping..." %
@ -1145,7 +1053,7 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
else: else:
print(f" > Lecture '%s' is missing media links" % print(f" > Lecture '%s' is missing media links" %
lecture_title) lecture_title)
print(len(lecture_audio_sources), len(lecture_video_sources)) print(len(lecture_sources))
else: else:
sources = lecture.get("sources") sources = lecture.get("sources")
sources = sorted(sources, sources = sorted(sources,
@ -1173,8 +1081,14 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
if source_type == "hls": if source_type == "hls":
temp_filepath = lecture_path.replace(".mp4", "") temp_filepath = lecture_path.replace(".mp4", "")
temp_filepath = temp_filepath + ".hls-part.mp4" temp_filepath = temp_filepath + ".hls-part.mp4"
retVal = FFMPEG(None, url, access_token, # retVal = FFMPEG(None, url, access_token,
temp_filepath).download() # temp_filepath).download()
ret_code = subprocess.Popen([
"yt-dlp", "--force-generic-extractor",
"--concurrent-fragments",
f"{concurrent_connections}", "--downloader",
"aria2c", "-o", f"{temp_filepath}", f"{url}"
]).wait()
if retVal: if retVal:
os.rename(temp_filepath, lecture_path) os.rename(temp_filepath, lecture_path)
print(" > HLS Download success") print(" > HLS Download success")
@ -1191,7 +1105,7 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
caption_locale, keep_vtt, access_token): caption_locale, keep_vtt, access_token, concurrent_connections):
total_chapters = _udemy.get("total_chapters") total_chapters = _udemy.get("total_chapters")
total_lectures = _udemy.get("total_lectures") total_lectures = _udemy.get("total_lectures")
print(f"Chapter(s) ({total_chapters})") print(f"Chapter(s) ({total_chapters})")
@ -1234,9 +1148,11 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
continue continue
else: else:
lecture_path = os.path.join( lecture_path = os.path.join(
chapter_dir, "{}.mp4".format(sanitize(lecture_title))) chapter_dir,
sanitize(lecture_title) + ".mp4")
process_lecture(lecture, lecture_path, chapter_dir, process_lecture(lecture, lecture_path, chapter_dir,
quality, access_token) quality, access_token,
concurrent_connections)
if dl_assets: if dl_assets:
assets = lecture.get("assets") assets = lecture.get("assets")
@ -1407,6 +1323,14 @@ if __name__ == "__main__":
type=str, type=str,
help="The language to download for captions, specify 'all' to download all captions (Default is 'en')", help="The language to download for captions, specify 'all' to download all captions (Default is 'en')",
) )
parser.add_argument(
"-cd",
"--concurrent-downloads",
dest="concurrent_downloads",
type=int,
help=
"The number of maximum concurrent downloads for segments (HLS and DASH, must be a number 1-30)",
)
parser.add_argument( parser.add_argument(
"--skip-lectures", "--skip-lectures",
dest="skip_lectures", dest="skip_lectures",
@ -1467,6 +1391,7 @@ if __name__ == "__main__":
course_name = None course_name = None
keep_vtt = False keep_vtt = False
skip_hls = False skip_hls = False
concurrent_downloads = 10
args = parser.parse_args() args = parser.parse_args()
if args.download_assets: if args.download_assets:
@ -1483,6 +1408,15 @@ if __name__ == "__main__":
keep_vtt = args.keep_vtt keep_vtt = args.keep_vtt
if args.skip_hls: if args.skip_hls:
skip_hls = args.skip_hls skip_hls = args.skip_hls
if args.concurrent_downloads:
concurrent_downloads = args.concurrent_downloads
if concurrent_downloads <= 0:
# if the user gave a number that is less than or equal to 0, set cc to default of 10
concurrent_downloads = 10
elif concurrent_downloads > 30:
# if the user gave a number thats greater than 30, set cc to the max of 30
concurrent_downloads = 30
aria_ret_val = check_for_aria() aria_ret_val = check_for_aria()
if not aria_ret_val: if not aria_ret_val:
@ -1559,7 +1493,8 @@ if __name__ == "__main__":
course_info(_udemy) course_info(_udemy)
else: else:
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
caption_locale, keep_vtt, access_token) caption_locale, keep_vtt, access_token,
concurrent_downloads)
else: else:
_udemy = {} _udemy = {}
_udemy["access_token"] = access_token _udemy["access_token"] = access_token
@ -1615,6 +1550,9 @@ if __name__ == "__main__":
counter += 1 counter += 1
if lecture_id: if lecture_id:
print(
f"Processing {course.index(entry)} of {len(course)}"
)
retVal = [] retVal = []
if isinstance(asset, dict): if isinstance(asset, dict):
@ -1701,12 +1639,11 @@ if __name__ == "__main__":
# encrypted # encrypted
data = asset.get("media_sources") data = asset.get("media_sources")
if data and isinstance(data, list): if data and isinstance(data, list):
video_media_sources, audio_media_sources = udemy._extract_media_sources( sources = udemy._extract_media_sources(data)
data)
tracks = asset.get("captions") tracks = asset.get("captions")
# duration = asset.get("time_estimation") # duration = asset.get("time_estimation")
subtitles = udemy._extract_subtitles(tracks) subtitles = udemy._extract_subtitles(tracks)
sources_count = len(video_media_sources) sources_count = len(sources)
subtitle_count = len(subtitles) subtitle_count = len(subtitles)
lectures.append({ lectures.append({
"index": lecture_counter, "index": lecture_counter,
@ -1716,8 +1653,7 @@ if __name__ == "__main__":
# "duration": duration, # "duration": duration,
"assets": retVal, "assets": retVal,
"assets_count": len(retVal), "assets_count": len(retVal),
"video_sources": video_media_sources, "video_sources": sources,
"audio_sources": audio_media_sources,
"subtitles": subtitles, "subtitles": subtitles,
"subtitle_count": subtitle_count, "subtitle_count": subtitle_count,
"sources_count": sources_count, "sources_count": sources_count,
@ -1792,4 +1728,5 @@ if __name__ == "__main__":
course_info(_udemy) course_info(_udemy)
else: else:
parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions, parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
caption_locale, keep_vtt, access_token) caption_locale, keep_vtt, access_token,
concurrent_downloads)

View File

@ -1,277 +0,0 @@
#!/usr/bin/python3
# pylint: disable=R,C,W,E
"""
Author : Nasir Khan (r0ot h3x49)
Github : https://github.com/r0oth3x49
License : MIT
Copyright (c) 2018-2025 Nasir Khan (r0ot h3x49)
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the
Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
import re
import time
import subprocess
import sys
from colorama import Fore, Style
class FFMPeg:
_PROGRESS_PATTERN = re.compile(
r"(frame|fps|total_size|out_time|bitrate|speed|progress)\s*\=\s*(\S+)")
def __init__(self,
duration,
url,
token,
filepath,
quiet=False,
callback=lambda *x: None):
self.url = url
self.filepath = filepath
self.quiet = quiet
self.duration = duration
self.callback = callback
self.token = token
def _command(self):
"""
ffmpeg.exe -headers "Authorization: Bearer {token}" -i "" -c copy -bsf:a aac_adtstoasc out.mp4
"""
command = [
"ffmpeg",
"-headers",
f"Authorization: Bearer {self.token}",
"-i",
f"{self.url}",
"-c",
"copy",
"-bsf:a",
"aac_adtstoasc",
f"{self.filepath}",
"-y",
"-progress",
"pipe:2",
]
return command
def _fetch_total_duration(self, line):
duration_in_secs = 0
duration_regex = re.compile(
r"Duration: (\d{2}):(\d{2}):(\d{2})\.\d{2}")
mobj = duration_regex.search(line)
if mobj:
duration_tuple = mobj.groups()
duration_in_secs = (int(duration_tuple[0]) * 60 +
int(duration_tuple[1]) * 60 +
int(duration_tuple[2]))
else:
duration_in_secs = self.duration
return duration_in_secs
def _fetch_current_duration_done(self, time_str):
time_str = time_str.split(":")
return (int(time_str[0]) * 60 + int(time_str[1]) * 60 +
int(time_str[2].split(".")[0]))
def _prepare_time_str(self, secs):
(mins, secs) = divmod(secs, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
time_str = "--:--:--"
if hours == 0:
time_str = "%02d:%02ds" % (mins, secs)
else:
time_str = "%02d:%02d:%02ds" % (hours, mins, secs)
return time_str
def _progress(self,
iterations,
total,
bytesdone,
speed,
elapsed,
bar_length=30,
fps=None):
offset = 0
filled_length = int(round(bar_length * iterations / float(total)))
percents = format(100.00 * (iterations * 1.0 / float(total)), ".2f")
if bytesdone <= 1048576:
_receiving = round(float(bytesdone) / 1024.00, 2)
_received = format(
_receiving if _receiving < 1024.00 else _receiving / 1024.00,
".2f")
suffix_recvd = "KB" if _receiving < 1024.00 else "MB"
else:
_receiving = round(float(bytesdone) / 1048576, 2)
_received = format(
_receiving if _receiving < 1024.00 else _receiving / 1024.00,
".2f")
suffix_recvd = "MB" if _receiving < 1024.00 else "GB"
suffix_rate = "Kb/s" if speed < 1024.00 else "Mb/s"
if fps:
suffix_rate += f" {fps}/fps"
if elapsed:
rate = ((float(iterations) - float(offset)) / 1024.0) / elapsed
eta = (total - iterations) / (rate * 1024.0)
else:
rate = 0
eta = 0
rate = format(speed if speed < 1024.00 else speed / 1024.00, ".2f")
(mins, secs) = divmod(eta, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
eta = "--:--:--"
if hours == 0:
eta = "eta %02d:%02ds" % (mins, secs)
else:
eta = "eta %02d:%02d:%02ds" % (hours, mins, secs)
if secs == 0:
eta = "\n"
total_time = self._prepare_time_str(total)
done_time = self._prepare_time_str(iterations)
downloaded = f"{total_time}/{done_time}"
received_bytes = str(_received) + str(suffix_recvd)
percents = f"{received_bytes} {percents}"
self.hls_progress(
downloaded=downloaded,
percents=percents,
filled_length=filled_length,
rate=str(rate) + str(suffix_rate),
suffix=eta,
bar_length=bar_length,
)
def hls_progress(self,
downloaded,
percents,
filled_length,
rate,
suffix,
bar_length=30):
bar = (Fore.CYAN + Style.DIM + "#" * filled_length + Fore.WHITE +
Style.DIM + "-" * (bar_length - filled_length))
sys.stdout.write(
"\033[2K\033[1G\r\r{}{}[{}{}*{}{}] : {}{}{} {}% |{}{}{}| {} {}".
format(
Fore.CYAN,
Style.DIM,
Fore.MAGENTA,
Style.BRIGHT,
Fore.CYAN,
Style.DIM,
Fore.GREEN,
Style.BRIGHT,
downloaded,
percents,
bar,
Fore.GREEN,
Style.BRIGHT,
rate,
suffix,
))
sys.stdout.flush()
def _parse_progress(self, line):
items = {
key: value
for key, value in self._PROGRESS_PATTERN.findall(line)
}
return items
def download(self):
total_time = None
t0 = time.time()
progress_lines = []
active = True
retVal = {}
command = self._command()
bytes_done = 0
download_speed = 0
try:
with subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) as proc:
while active:
elapsed = time.time() - t0
try:
line = proc.stderr.readline().decode("utf-8").strip()
if not total_time:
total_time = self._fetch_total_duration(line)
if "progress=end" in line:
try:
self._progress(
total_time,
total_time,
bytes_done,
download_speed,
elapsed,
)
except KeyboardInterrupt:
retVal = {
"status": "False",
"msg": "Error: KeyboardInterrupt",
}
raise KeyboardInterrupt
except Exception as err:
{"status": "False", "msg": f"Error: {err}"}
active = False
retVal = {"status": "True", "msg": "download"}
break
if "progress" not in line:
progress_lines.append(line)
else:
lines = "\n".join(progress_lines)
items = self._parse_progress(lines)
if items:
secs = self._fetch_current_duration_done(
items.get("out_time"))
_tsize = (
items.get("total_size").lower().replace(
"kb", ""))
_brate = (items.get("bitrate").lower().replace(
"kbits/s", ""))
fps = items.get("fps")
bytes_done = float(
_tsize) if _tsize != "n/a" else 0
download_speed = float(
_brate) if _brate != "n/a" else 0
try:
self._progress(
secs,
total_time,
bytes_done,
download_speed,
elapsed,
fps=fps,
)
except KeyboardInterrupt:
retVal = {
"status": "False",
"msg": "Error: KeyboardInterrupt",
}
raise KeyboardInterrupt
except Exception as err:
{"status": "False", "msg": f"Error: {err}"}
progress_lines = []
except KeyboardInterrupt:
active = False
retVal = {
"status": "False",
"msg": "Error: KeyboardInterrupt"
}
raise KeyboardInterrupt
except KeyboardInterrupt:
raise KeyboardInterrupt
return retVal

View File

@ -8,3 +8,4 @@ webvtt-py
pysrt pysrt
m3u8 m3u8
colorama colorama
yt-dlp