mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-01 13:54:27 +02:00
New Experimental Downloader, bug fixes, and small updates
+ Updated cleanup function to remove the entire temporary lecture folder instead of just leaving behind tons of empty folders + Fixed typo in mux function + Segment count is now properly calculated from segment timeline + Manifest is now parsed from the URL instead of being downloaded, this should be better for downloading multiple courses at once. + Fixed a bug where audio content_type would try to find a max quality + New Downloader: Threaded Downloader uses multiple threads to download files, this should improve download speeds greatly. By default, the threaded downloader is not used, you can use the threaded downloader by passing ``--use-threaded-downloader``. By default, it only uses 10 threads, you can set a custom number of threads with the ``--threads`` option
This commit is contained in:
parent
88a411d708
commit
aab19bf66f
16
README.md
16
README.md
@ -68,8 +68,8 @@ You can now run `python main.py` to start downloading. The course will download
|
|||||||
# Advanced Usage
|
# Advanced Usage
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-d] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets]
|
usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-t THREADS] [-l LANG] [--skip-lectures] [--download-assets]
|
||||||
[--download-captions]
|
[--download-captions] [--use-threaded-downloader] [-d]
|
||||||
|
|
||||||
Udemy Downloader
|
Udemy Downloader
|
||||||
|
|
||||||
@ -79,13 +79,15 @@ optional arguments:
|
|||||||
The URL of the course to download
|
The URL of the course to download
|
||||||
-b BEARER_TOKEN, --bearer BEARER_TOKEN
|
-b BEARER_TOKEN, --bearer BEARER_TOKEN
|
||||||
The Bearer token to use
|
The Bearer token to use
|
||||||
-d, --debug Use test_data.json rather than fetch from the udemy api.
|
|
||||||
-q QUALITY, --quality QUALITY
|
-q QUALITY, --quality QUALITY
|
||||||
Download specific video quality. (144, 360, 480, 720, 1080)
|
Download specific video quality. (144, 360, 480, 720, 1080)
|
||||||
|
-t THREADS, --threads THREADS
|
||||||
|
Max number of threads to use when using the threaded downloader (default 10)
|
||||||
-l LANG, --lang LANG The language to download for captions (Default is en)
|
-l LANG, --lang LANG The language to download for captions (Default is en)
|
||||||
--skip-lectures If specified, lectures won't be downloaded.
|
--skip-lectures If specified, lectures won't be downloaded.
|
||||||
--download-assets If specified, lecture assets will be downloaded.
|
--download-assets If specified, lecture assets will be downloaded.
|
||||||
--download-captions If specified, captions will be downloaded.
|
--download-captions If specified, captions will be downloaded.
|
||||||
|
--use-threaded-downloader If specified, the experimental threaded downloader will be used
|
||||||
```
|
```
|
||||||
|
|
||||||
- Passing a Bearer Token and Course ID as an argument
|
- Passing a Bearer Token and Course ID as an argument
|
||||||
@ -107,8 +109,12 @@ optional arguments:
|
|||||||
- `python main.py -c <Course URL> --download-captions -l all` - Downloads all subtitles
|
- `python main.py -c <Course URL> --download-captions -l all` - Downloads all subtitles
|
||||||
- etc
|
- etc
|
||||||
- Skip downloading lecture videos
|
- Skip downloading lecture videos
|
||||||
- `python main.py --skip-lectures --download-captions` - Downloads only captions
|
- `python main.py -c <Course URL> --skip-lectures --download-captions` - Downloads only captions
|
||||||
- `python main.py --skip-lectures --download-assets` - Downloads only assets
|
- `python main.py -c <Course URL> --skip-lectures --download-assets` - Downloads only assets
|
||||||
|
- Use threaded downloader
|
||||||
|
- `python main.py -c <Course URL> --use-threaded-downloader`
|
||||||
|
- Use threaded downloader with custom max threads
|
||||||
|
- `python main.py -c <Course URL> --use-threaded-downloader --threads 15`
|
||||||
|
|
||||||
# Credits
|
# Credits
|
||||||
|
|
||||||
|
120
downloader.py
Normal file
120
downloader.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
import os, threading, requests
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
class FileDownloader():
|
||||||
|
"""
|
||||||
|
@source: https://gist.github.com/stefanfortuin/9dbfe8618701507d0ef2b5515b165c5f
|
||||||
|
"""
|
||||||
|
def __init__(self, max_threads=10):
|
||||||
|
print("> Threaded downloader using {} threads.".format(
|
||||||
|
str(max_threads)))
|
||||||
|
self.sema = threading.Semaphore(value=max_threads)
|
||||||
|
self.headers = {
|
||||||
|
'user-agent':
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
|
||||||
|
}
|
||||||
|
self.block_size = 1024
|
||||||
|
|
||||||
|
def t_getfile(self, link, filepath, filename, bar, session):
|
||||||
|
"""
|
||||||
|
Threaded function that uses a semaphore
|
||||||
|
to not instantiate too many threads
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.sema.acquire()
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||||
|
|
||||||
|
if not os.path.isfile(filepath):
|
||||||
|
headers = requests.head(link).headers
|
||||||
|
if 'content-length' not in headers:
|
||||||
|
print(f"server doesn't support content-length for {link}")
|
||||||
|
self.sema.release()
|
||||||
|
return
|
||||||
|
|
||||||
|
total_bytes = int(requests.head(link).headers['content-length'])
|
||||||
|
|
||||||
|
if not bar:
|
||||||
|
bar = tqdm(total=total_bytes,
|
||||||
|
initial=0,
|
||||||
|
unit='B',
|
||||||
|
unit_scale=True,
|
||||||
|
desc=filename)
|
||||||
|
self.download_new_file(link, filename, filepath, total_bytes, bar,
|
||||||
|
session)
|
||||||
|
else:
|
||||||
|
current_bytes = os.stat(filepath).st_size
|
||||||
|
|
||||||
|
headers = requests.head(link).headers
|
||||||
|
if 'content-length' not in headers:
|
||||||
|
print(f"server doesn't support content-length for {link}")
|
||||||
|
self.sema.release()
|
||||||
|
return
|
||||||
|
|
||||||
|
total_bytes = int(requests.head(link).headers['content-length'])
|
||||||
|
if not bar:
|
||||||
|
bar = tqdm(total=total_bytes,
|
||||||
|
initial=current_bytes,
|
||||||
|
unit='B',
|
||||||
|
unit_scale=True,
|
||||||
|
desc=filename)
|
||||||
|
if current_bytes < total_bytes:
|
||||||
|
self.continue_file_download(link, filename, filepath,
|
||||||
|
current_bytes, total_bytes, bar)
|
||||||
|
else:
|
||||||
|
# print(f"already done: {filename}")
|
||||||
|
if bar.unit == "B":
|
||||||
|
bar.update(self.block_size)
|
||||||
|
else:
|
||||||
|
bar.update(1)
|
||||||
|
|
||||||
|
self.sema.release()
|
||||||
|
|
||||||
|
def download_new_file(self, link, filename, filepath, total_bytes, bar,
|
||||||
|
session):
|
||||||
|
if session == None:
|
||||||
|
try:
|
||||||
|
request = requests.get(link,
|
||||||
|
headers=self.headers,
|
||||||
|
timeout=30,
|
||||||
|
stream=True)
|
||||||
|
self.write_file(request, filepath, 'wb', bar)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(e)
|
||||||
|
else:
|
||||||
|
request = session.get(link, stream=True)
|
||||||
|
self.write_file(request, filepath, 'wb', bar)
|
||||||
|
|
||||||
|
def continue_file_download(self, link, filename, filepath, current_bytes,
|
||||||
|
total_bytes, bar):
|
||||||
|
range_header = self.headers.copy()
|
||||||
|
range_header['Range'] = f"bytes={current_bytes}-{total_bytes}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
request = requests.get(link,
|
||||||
|
headers=range_header,
|
||||||
|
timeout=30,
|
||||||
|
stream=True)
|
||||||
|
self.write_file(request, filepath, 'ab', bar)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
def write_file(self, content, filepath, writemode, bar):
|
||||||
|
with open(filepath, writemode) as f:
|
||||||
|
for chunk in content.iter_content(chunk_size=self.block_size):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
if bar.unit == "B":
|
||||||
|
bar.update(self.block_size)
|
||||||
|
|
||||||
|
# print(f"completed file {filepath}", end='\n')
|
||||||
|
f.close()
|
||||||
|
bar.update(1)
|
||||||
|
|
||||||
|
def get_file(self, link, path, filename, bar=None, session=None):
|
||||||
|
""" Downloads the file"""
|
||||||
|
thread = threading.Thread(target=self.t_getfile,
|
||||||
|
args=(link, path, filename, bar, session))
|
||||||
|
thread.start()
|
||||||
|
return thread
|
304
main.py
304
main.py
@ -5,6 +5,7 @@ from dotenv import load_dotenv
|
|||||||
from mpegdash.parser import MPEGDASHParser
|
from mpegdash.parser import MPEGDASHParser
|
||||||
from utils import extract_kid
|
from utils import extract_kid
|
||||||
from vtt_to_srt import convert
|
from vtt_to_srt import convert
|
||||||
|
from downloader import FileDownloader
|
||||||
|
|
||||||
download_dir = os.path.join(os.getcwd(), "out_dir")
|
download_dir = os.path.join(os.getcwd(), "out_dir")
|
||||||
working_dir = os.path.join(os.getcwd(), "working_dir")
|
working_dir = os.path.join(os.getcwd(), "working_dir")
|
||||||
@ -12,6 +13,7 @@ retry = 3
|
|||||||
home_dir = os.getcwd()
|
home_dir = os.getcwd()
|
||||||
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
keyfile_path = os.path.join(os.getcwd(), "keyfile.json")
|
||||||
valid_qualities = [144, 360, 480, 720, 1080]
|
valid_qualities = [144, 360, 480, 720, 1080]
|
||||||
|
downloader = None
|
||||||
|
|
||||||
if not os.path.exists(working_dir):
|
if not os.path.exists(working_dir):
|
||||||
os.makedirs(working_dir)
|
os.makedirs(working_dir)
|
||||||
@ -115,13 +117,12 @@ def cleanup(path):
|
|||||||
@author Jayapraveen
|
@author Jayapraveen
|
||||||
"""
|
"""
|
||||||
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
|
leftover_files = glob.glob(path + '/*.mp4', recursive=True)
|
||||||
mpd_files = glob.glob(path + '/*.mpd', recursive=True)
|
|
||||||
leftover_files = leftover_files + mpd_files
|
|
||||||
for file_list in leftover_files:
|
for file_list in leftover_files:
|
||||||
try:
|
try:
|
||||||
os.remove(file_list)
|
os.remove(file_list)
|
||||||
except OSError:
|
except OSError:
|
||||||
print(f"Error deleting file: {file_list}")
|
print(f"Error deleting file: {file_list}")
|
||||||
|
os.removedirs(path)
|
||||||
|
|
||||||
|
|
||||||
def mux_process(video_title, lecture_working_dir, outfile):
|
def mux_process(video_title, lecture_working_dir, outfile):
|
||||||
@ -147,30 +148,30 @@ def decrypt(kid, filename, lecture_working_dir):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
key = keyfile[kid.lower()]
|
key = keyfile[kid.lower()]
|
||||||
except KeyError as error:
|
except KeyError:
|
||||||
exit("Key not found")
|
exit("Key not found")
|
||||||
if (os.name == "nt"):
|
if (os.name == "nt"):
|
||||||
os.system("mp4decrypt --key 1:{} \"{}\" \"{}\"".format(
|
code = os.system("mp4decrypt --key 1:{0} \"{1}\" \"{2}\"".format(
|
||||||
key,
|
key,
|
||||||
os.path.join(lecture_working_dir,
|
os.path.join(lecture_working_dir,
|
||||||
"encrypted_{}.mp4".format(filename)),
|
"encrypted_{0}.mp4".format(filename)),
|
||||||
os.path.join(lecture_working_dir,
|
os.path.join(lecture_working_dir,
|
||||||
"decrypted{}.mp4".format(filename))))
|
"decrypted_{0}.mp4".format(filename))))
|
||||||
else:
|
else:
|
||||||
os.system("nice -n 7 mp4decrypt --key 1:{} \"{}\" \"{}\"".format(
|
os.system("nice -n 7 mp4decrypt --key 1:{0} \"{1}\" \"{2}\"".format(
|
||||||
key,
|
key,
|
||||||
os.path.join(lecture_working_dir,
|
os.path.join(lecture_working_dir,
|
||||||
"encrypted_{}.mp4".format(filename)),
|
"encrypted_{0}.mp4".format(filename)),
|
||||||
os.path.join(lecture_working_dir,
|
os.path.join(lecture_working_dir,
|
||||||
"decrypted{}.mp4".format(filename))))
|
"decrypted_{0}.mp4".format(filename))))
|
||||||
|
|
||||||
|
|
||||||
def handle_irregular_segments(media_info, video_title, lecture_working_dir,
|
def handle_segments(media_info, video_title, lecture_working_dir, output_path):
|
||||||
output_path):
|
|
||||||
"""
|
"""
|
||||||
@author Jayapraveen
|
@author Jayapraveen
|
||||||
"""
|
"""
|
||||||
no_segment, video_url, video_init, video_extension, no_segment, audio_url, audio_init, audio_extension = media_info
|
no_segment, video_url, video_init, video_extension, no_segment, audio_url, audio_init, audio_extension = media_info
|
||||||
|
no_segment += 10 # because the download_media function relies on hittin a 404 to know when to finish
|
||||||
download_media("video_0.seg.mp4", video_init, lecture_working_dir)
|
download_media("video_0.seg.mp4", video_init, lecture_working_dir)
|
||||||
video_kid = extract_kid(
|
video_kid = extract_kid(
|
||||||
os.path.join(lecture_working_dir, "video_0.seg.mp4"))
|
os.path.join(lecture_working_dir, "video_0.seg.mp4"))
|
||||||
@ -179,7 +180,7 @@ def handle_irregular_segments(media_info, video_title, lecture_working_dir,
|
|||||||
audio_kid = extract_kid(
|
audio_kid = extract_kid(
|
||||||
os.path.join(lecture_working_dir, "audio_0.seg.mp4"))
|
os.path.join(lecture_working_dir, "audio_0.seg.mp4"))
|
||||||
print("KID for audio file is: " + audio_kid)
|
print("KID for audio file is: " + audio_kid)
|
||||||
for count in range(1, no_segment):
|
for count in range(1, no_segment + 4):
|
||||||
video_segment_url = video_url.replace("$Number$", str(count))
|
video_segment_url = video_url.replace("$Number$", str(count))
|
||||||
audio_segment_url = audio_url.replace("$Number$", str(count))
|
audio_segment_url = audio_url.replace("$Number$", str(count))
|
||||||
video_status = download_media(
|
video_status = download_media(
|
||||||
@ -214,73 +215,138 @@ def handle_irregular_segments(media_info, video_title, lecture_working_dir,
|
|||||||
decrypt(audio_kid, "audio", lecture_working_dir)
|
decrypt(audio_kid, "audio", lecture_working_dir)
|
||||||
os.chdir(home_dir)
|
os.chdir(home_dir)
|
||||||
mux_process(video_title, lecture_working_dir, output_path)
|
mux_process(video_title, lecture_working_dir, output_path)
|
||||||
|
cleanup(lecture_working_dir)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def manifest_parser(mpd_url):
|
def handle_segments_threaded(media_info, video_title, lecture_working_dir,
|
||||||
|
output_path):
|
||||||
|
"""
|
||||||
|
@author Jayapraveen
|
||||||
|
"""
|
||||||
|
no_segment, video_url, video_init, video_extension, no_segment, audio_url, audio_init, audio_extension = media_info
|
||||||
|
download_media("video_0.seg.mp4", video_init, lecture_working_dir)
|
||||||
|
video_kid = extract_kid(
|
||||||
|
os.path.join(lecture_working_dir, "video_0.seg.mp4"))
|
||||||
|
print("KID for video file is: " + video_kid)
|
||||||
|
download_media("audio_0.seg.mp4", audio_init, lecture_working_dir)
|
||||||
|
audio_kid = extract_kid(
|
||||||
|
os.path.join(lecture_working_dir, "audio_0.seg.mp4"))
|
||||||
|
print("KID for audio file is: " + audio_kid)
|
||||||
|
|
||||||
|
vbar = tqdm(total=no_segment,
|
||||||
|
initial=1,
|
||||||
|
unit='Video Segments',
|
||||||
|
desc=video_title + " (Video)")
|
||||||
|
abar = tqdm(total=no_segment,
|
||||||
|
initial=1,
|
||||||
|
unit='Audio Segments',
|
||||||
|
desc=video_title + " (Audio)")
|
||||||
|
|
||||||
|
threads = []
|
||||||
|
|
||||||
|
for count in range(1, no_segment):
|
||||||
|
video_filename = f"video_{str(count)}.seg.{video_extension}"
|
||||||
|
video_path = os.path.join(lecture_working_dir, video_filename)
|
||||||
|
video_segment_url = video_url.replace("$Number$", str(count))
|
||||||
|
video = downloader.get_file(video_segment_url, video_path,
|
||||||
|
video_filename, vbar)
|
||||||
|
threads.append(video)
|
||||||
|
|
||||||
|
for count in range(1, no_segment):
|
||||||
|
audio_filename = f"audio_{str(count)}.seg.{audio_extension}"
|
||||||
|
audio_path = os.path.join(lecture_working_dir, audio_filename)
|
||||||
|
audio_segment_url = audio_url.replace("$Number$", str(count))
|
||||||
|
audio = downloader.get_file(audio_segment_url, audio_path,
|
||||||
|
audio_filename, abar)
|
||||||
|
threads.append(audio)
|
||||||
|
|
||||||
|
for x in threads:
|
||||||
|
x.join()
|
||||||
|
|
||||||
|
os.chdir(lecture_working_dir)
|
||||||
|
if os.name == "nt":
|
||||||
|
video_concat_command = "copy /b " + "+".join(
|
||||||
|
[f"video_{i}.seg.{video_extension}"
|
||||||
|
for i in range(0, count)]) + " encrypted_video.mp4"
|
||||||
|
audio_concat_command = "copy /b " + "+".join(
|
||||||
|
[f"audio_{i}.seg.{audio_extension}"
|
||||||
|
for i in range(0, count)]) + " encrypted_audio.mp4"
|
||||||
|
else:
|
||||||
|
video_concat_command = "cat " + " ".join(
|
||||||
|
[f"video_{i}.seg.{video_extension}"
|
||||||
|
for i in range(0, count)]) + " > encrypted_video.mp4"
|
||||||
|
audio_concat_command = "cat " + " ".join(
|
||||||
|
[f"audio_{i}.seg.{audio_extension}"
|
||||||
|
for i in range(0, count)]) + " > encrypted_audio.mp4"
|
||||||
|
os.system(video_concat_command)
|
||||||
|
os.system(audio_concat_command)
|
||||||
|
decrypt(video_kid, "video", lecture_working_dir)
|
||||||
|
decrypt(audio_kid, "audio", lecture_working_dir)
|
||||||
|
os.chdir(home_dir)
|
||||||
|
mux_process(video_title, lecture_working_dir, output_path)
|
||||||
|
cleanup(lecture_working_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def manifest_parser(mpd_url, quality):
|
||||||
"""
|
"""
|
||||||
@author Jayapraveen
|
@author Jayapraveen
|
||||||
"""
|
"""
|
||||||
video = []
|
video = []
|
||||||
audio = []
|
audio = []
|
||||||
manifest = requests.get(mpd_url).text
|
mpd = MPEGDASHParser.parse(mpd_url)
|
||||||
with open("manifest.mpd", 'w') as manifest_handler:
|
|
||||||
manifest_handler.write(manifest)
|
|
||||||
mpd = MPEGDASHParser.parse("./manifest.mpd")
|
|
||||||
running_time = durationtoseconds(mpd.media_presentation_duration)
|
|
||||||
for period in mpd.periods:
|
for period in mpd.periods:
|
||||||
for adapt_set in period.adaptation_sets:
|
for adapt_set in period.adaptation_sets:
|
||||||
print("Processing " + adapt_set.mime_type)
|
print("Processing " + adapt_set.mime_type)
|
||||||
content_type = adapt_set.mime_type
|
content_type = adapt_set.mime_type
|
||||||
if quality and content_type == "video/mp4":
|
if content_type == "video/mp4":
|
||||||
print(adapt_set.representations[0].height, quality)
|
if quality:
|
||||||
repr = next((x for x in adapt_set.representations
|
repr = next((x for x in adapt_set.representations
|
||||||
if x.height == quality), None)
|
if x.height == quality), None)
|
||||||
if not repr:
|
if not repr:
|
||||||
qualities = []
|
qualities = []
|
||||||
for rep in adapt_set.representations:
|
for rep in adapt_set.representations:
|
||||||
qualities.append(rep.height)
|
qualities.append(rep.height)
|
||||||
print(quality, qualities)
|
if quality < qualities[0]:
|
||||||
if quality < qualities[0]:
|
# they want a lower quality than whats available
|
||||||
# they want a lower quality than whats available
|
repr = adapt_set.representations[
|
||||||
repr = adapt_set.representations[0] # Lowest Quality
|
0] # Lowest Quality
|
||||||
elif quality > qualities[-1]:
|
elif quality > qualities[-1]:
|
||||||
# they want a higher quality than whats available
|
# they want a higher quality than whats available
|
||||||
repr = adapt_set.representations[-1] # Max Quality
|
repr = adapt_set.representations[-1] # Max Quality
|
||||||
print(
|
print(
|
||||||
"> Could not find video with requested quality, falling back to closest!"
|
"> Could not find video with requested quality, falling back to closest!"
|
||||||
)
|
)
|
||||||
print("> Using quality of %s" % repr.height)
|
print("> Using quality of %s" % repr.height)
|
||||||
|
else:
|
||||||
|
print("> Found MPD representation with quality %s" %
|
||||||
|
repr.height)
|
||||||
else:
|
else:
|
||||||
print("> Found MPD representation with quality %s" %
|
repr = adapt_set.representations[-1] # Max Quality
|
||||||
repr.height)
|
print("> Using max quality of %s" % repr.height)
|
||||||
else:
|
segment_count = 0
|
||||||
repr = adapt_set.representations[-1] # Max Quality
|
|
||||||
print("> Using max quality of %s" % repr.height)
|
segment = repr.segment_templates[0]
|
||||||
for segment in repr.segment_templates:
|
timeline = segment.segment_timelines[0]
|
||||||
if (segment.duration):
|
segment_count += len(timeline.Ss)
|
||||||
print("Media segments are of equal timeframe")
|
for s in timeline.Ss:
|
||||||
segment_time = segment.duration / segment.timescale
|
if s.r:
|
||||||
total_segments = running_time / segment_time
|
segment_count += s.r
|
||||||
else:
|
|
||||||
print("Media segments are of inequal timeframe")
|
print("Expected No of segments:", segment_count)
|
||||||
|
if (content_type == "audio/mp4"):
|
||||||
|
segment_extension = segment.media.split(".")[-1]
|
||||||
|
audio.append(segment_count)
|
||||||
|
audio.append(segment.media)
|
||||||
|
audio.append(segment.initialization)
|
||||||
|
audio.append(segment_extension)
|
||||||
|
elif (content_type == "video/mp4"):
|
||||||
|
segment_extension = segment.media.split(".")[-1]
|
||||||
|
video.append(segment_count)
|
||||||
|
video.append(segment.media)
|
||||||
|
video.append(segment.initialization)
|
||||||
|
video.append(segment_extension)
|
||||||
|
|
||||||
approx_no_segments = round(
|
|
||||||
running_time /
|
|
||||||
6) + 10 # aproximate of 6 sec per segment
|
|
||||||
print("Expected No of segments:", approx_no_segments)
|
|
||||||
if (content_type == "audio/mp4"):
|
|
||||||
segment_extension = segment.media.split(".")[-1]
|
|
||||||
audio.append(approx_no_segments)
|
|
||||||
audio.append(segment.media)
|
|
||||||
audio.append(segment.initialization)
|
|
||||||
audio.append(segment_extension)
|
|
||||||
elif (content_type == "video/mp4"):
|
|
||||||
segment_extension = segment.media.split(".")[-1]
|
|
||||||
video.append(approx_no_segments)
|
|
||||||
video.append(segment.media)
|
|
||||||
video.append(segment.initialization)
|
|
||||||
video.append(segment_extension)
|
|
||||||
return video + audio
|
return video + audio
|
||||||
|
|
||||||
|
|
||||||
@ -316,6 +382,8 @@ def process_caption(caption,
|
|||||||
lecture_index,
|
lecture_index,
|
||||||
lecture_title,
|
lecture_title,
|
||||||
lecture_dir,
|
lecture_dir,
|
||||||
|
use_threaded_downloader,
|
||||||
|
threads,
|
||||||
tries=0):
|
tries=0):
|
||||||
filename = f"%s. %s_%s.%s" % (lecture_index, sanitize(lecture_title),
|
filename = f"%s. %s_%s.%s" % (lecture_index, sanitize(lecture_title),
|
||||||
caption.get("locale_id"), caption.get("ext"))
|
caption.get("locale_id"), caption.get("ext"))
|
||||||
@ -328,7 +396,12 @@ def process_caption(caption,
|
|||||||
else:
|
else:
|
||||||
print(f"> Downloading captions: '%s'" % filename)
|
print(f"> Downloading captions: '%s'" % filename)
|
||||||
try:
|
try:
|
||||||
download(caption.get("url"), filepath, filename)
|
if use_threaded_downloader:
|
||||||
|
thread = downloader.get_file(caption.get("url"), filepath,
|
||||||
|
filename)
|
||||||
|
thread.join()
|
||||||
|
else:
|
||||||
|
download(caption.get("url"), filepath, filename)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if tries >= 3:
|
if tries >= 3:
|
||||||
print(
|
print(
|
||||||
@ -340,7 +413,8 @@ def process_caption(caption,
|
|||||||
f"> Error downloading captions: {e}. Will retry {3-tries} more times."
|
f"> Error downloading captions: {e}. Will retry {3-tries} more times."
|
||||||
)
|
)
|
||||||
process_caption(caption, lecture_index, lecture_title,
|
process_caption(caption, lecture_index, lecture_title,
|
||||||
lecture_dir, tries + 1)
|
lecture_dir, use_threaded_downloader, threads,
|
||||||
|
tries + 1)
|
||||||
if caption.get("ext") == "vtt":
|
if caption.get("ext") == "vtt":
|
||||||
try:
|
try:
|
||||||
print("> Converting captions to SRT format...")
|
print("> Converting captions to SRT format...")
|
||||||
@ -352,7 +426,8 @@ def process_caption(caption,
|
|||||||
|
|
||||||
|
|
||||||
def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
||||||
skip_lectures, dl_assets, dl_captions, caption_locale):
|
skip_lectures, dl_assets, dl_captions, caption_locale,
|
||||||
|
use_threaded_downloader):
|
||||||
lecture_title = lecture["title"]
|
lecture_title = lecture["title"]
|
||||||
lecture_asset = lecture["asset"]
|
lecture_asset = lecture["asset"]
|
||||||
if not skip_lectures:
|
if not skip_lectures:
|
||||||
@ -371,7 +446,12 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
|||||||
|
|
||||||
if not os.path.isfile(lecture_path):
|
if not os.path.isfile(lecture_path):
|
||||||
try:
|
try:
|
||||||
download(lecture_url, lecture_path, lecture_title)
|
if use_threaded_downloader:
|
||||||
|
thread = downloader.get_file(lecture_url, lecture_path,
|
||||||
|
lecture_title)
|
||||||
|
thread.join()
|
||||||
|
else:
|
||||||
|
download(lecture_url, lecture_path, lecture_title)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# We could add a retry here
|
# We could add a retry here
|
||||||
print(f"> Error downloading lecture: {e}. Skipping...")
|
print(f"> Error downloading lecture: {e}. Skipping...")
|
||||||
@ -396,10 +476,13 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
|||||||
"> Couldn't find dash url for lecture '%s', skipping...",
|
"> Couldn't find dash url for lecture '%s', skipping...",
|
||||||
lecture_title)
|
lecture_title)
|
||||||
return
|
return
|
||||||
media_info = manifest_parser(mpd_url)
|
media_info = manifest_parser(mpd_url, quality)
|
||||||
handle_irregular_segments(media_info, lecture_title,
|
if use_threaded_downloader:
|
||||||
lecture_working_dir, lecture_path)
|
handle_segments_threaded(media_info, lecture_title,
|
||||||
cleanup(lecture_working_dir)
|
lecture_working_dir, lecture_path)
|
||||||
|
else:
|
||||||
|
handle_segments(media_info, lecture_title,
|
||||||
|
lecture_working_dir, lecture_path)
|
||||||
else:
|
else:
|
||||||
print("> Lecture '%s' is already downloaded, skipping..." %
|
print("> Lecture '%s' is already downloaded, skipping..." %
|
||||||
lecture_title)
|
lecture_title)
|
||||||
@ -418,9 +501,16 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
|||||||
if x["label"] == "download"), None)
|
if x["label"] == "download"), None)
|
||||||
if download_url:
|
if download_url:
|
||||||
try:
|
try:
|
||||||
download(download_url,
|
if use_threaded_downloader:
|
||||||
os.path.join(lecture_dir, asset_filename),
|
thread = downloader.get_file(
|
||||||
asset_filename)
|
download_url,
|
||||||
|
os.path.join(lecture_dir, asset_filename),
|
||||||
|
asset_filename)
|
||||||
|
thread.join()
|
||||||
|
else:
|
||||||
|
download(download_url,
|
||||||
|
os.path.join(lecture_dir, asset_filename),
|
||||||
|
asset_filename)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(
|
print(
|
||||||
f"> Error downloading lecture asset: {e}. Skipping"
|
f"> Error downloading lecture asset: {e}. Skipping"
|
||||||
@ -472,11 +562,12 @@ def process_lecture(lecture, lecture_index, lecture_path, lecture_dir, quality,
|
|||||||
})
|
})
|
||||||
|
|
||||||
for caption in captions:
|
for caption in captions:
|
||||||
process_caption(caption, lecture_index, lecture_title, lecture_dir)
|
process_caption(caption, lecture_index, lecture_title, lecture_dir,
|
||||||
|
use_threaded_downloader)
|
||||||
|
|
||||||
|
|
||||||
def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
||||||
quality, caption_locale):
|
quality, caption_locale, use_threaded_downloader):
|
||||||
course_dir = os.path.join(download_dir, course_name)
|
course_dir = os.path.join(download_dir, course_name)
|
||||||
if not os.path.exists(course_dir):
|
if not os.path.exists(course_dir):
|
||||||
os.mkdir(course_dir)
|
os.mkdir(course_dir)
|
||||||
@ -498,9 +589,18 @@ def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
|||||||
lecture_path = os.path.join(
|
lecture_path = os.path.join(
|
||||||
course_dir, "{}. {}.mp4".format(lecture_index,
|
course_dir, "{}. {}.mp4".format(lecture_index,
|
||||||
sanitize(obj["title"])))
|
sanitize(obj["title"])))
|
||||||
process_lecture(obj, lecture_index, lecture_path, download_dir,
|
process_lecture(
|
||||||
quality, skip_lectures, dl_assets, dl_captions,
|
obj,
|
||||||
caption_locale)
|
lecture_index,
|
||||||
|
lecture_path,
|
||||||
|
download_dir,
|
||||||
|
quality,
|
||||||
|
skip_lectures,
|
||||||
|
dl_assets,
|
||||||
|
dl_captions,
|
||||||
|
caption_locale,
|
||||||
|
use_threaded_downloader,
|
||||||
|
)
|
||||||
|
|
||||||
for chapter in chapters:
|
for chapter in chapters:
|
||||||
chapter_dir = os.path.join(
|
chapter_dir = os.path.join(
|
||||||
@ -516,7 +616,7 @@ def parse(data, course_id, course_name, skip_lectures, dl_assets, dl_captions,
|
|||||||
sanitize(lecture["title"])))
|
sanitize(lecture["title"])))
|
||||||
process_lecture(lecture, lecture_index, lecture_path, chapter_dir,
|
process_lecture(lecture, lecture_index, lecture_path, chapter_dir,
|
||||||
quality, skip_lectures, dl_assets, dl_captions,
|
quality, skip_lectures, dl_assets, dl_captions,
|
||||||
caption_locale)
|
caption_locale, use_threaded_downloader)
|
||||||
print("\n\n\n\n\n\n\n\n=====================")
|
print("\n\n\n\n\n\n\n\n=====================")
|
||||||
print("All downloads completed for course!")
|
print("All downloads completed for course!")
|
||||||
print("=====================")
|
print("=====================")
|
||||||
@ -571,13 +671,6 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
help="The Bearer token to use",
|
help="The Bearer token to use",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"-d",
|
|
||||||
"--debug",
|
|
||||||
dest="debug",
|
|
||||||
action="store_true",
|
|
||||||
help="Use test_data.json rather than fetch from the udemy api.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-q",
|
"-q",
|
||||||
"--quality",
|
"--quality",
|
||||||
@ -585,6 +678,14 @@ if __name__ == "__main__":
|
|||||||
type=int,
|
type=int,
|
||||||
help="Download specific video quality. (144, 360, 480, 720, 1080)",
|
help="Download specific video quality. (144, 360, 480, 720, 1080)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-t",
|
||||||
|
"--threads",
|
||||||
|
dest="threads",
|
||||||
|
type=int,
|
||||||
|
help=
|
||||||
|
"Max number of threads to use when using the threaded downloader (default 10)",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-l",
|
"-l",
|
||||||
"--lang",
|
"--lang",
|
||||||
@ -610,6 +711,19 @@ if __name__ == "__main__":
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="If specified, captions will be downloaded.",
|
help="If specified, captions will be downloaded.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--use-threaded-downloader",
|
||||||
|
dest="use_threaded_downloader",
|
||||||
|
action="store_true",
|
||||||
|
help="If specified, the experimental threaded downloader will be used",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-d",
|
||||||
|
"--debug",
|
||||||
|
dest="debug",
|
||||||
|
action="store_true",
|
||||||
|
help="Use test_data.json rather than fetch from the udemy api.",
|
||||||
|
)
|
||||||
|
|
||||||
dl_assets = False
|
dl_assets = False
|
||||||
skip_lectures = False
|
skip_lectures = False
|
||||||
@ -619,6 +733,8 @@ if __name__ == "__main__":
|
|||||||
bearer_token = None
|
bearer_token = None
|
||||||
portal_name = None
|
portal_name = None
|
||||||
course_name = None
|
course_name = None
|
||||||
|
use_threaded_downloader = False
|
||||||
|
threads = 10
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.download_assets:
|
if args.download_assets:
|
||||||
@ -635,6 +751,11 @@ if __name__ == "__main__":
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
quality = args.quality
|
quality = args.quality
|
||||||
|
if args.use_threaded_downloader:
|
||||||
|
use_threaded_downloader = args.use_threaded_downloader
|
||||||
|
if args.threads:
|
||||||
|
threads = args.threads
|
||||||
|
downloader = FileDownloader(max_threads=threads)
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
if args.bearer_token:
|
if args.bearer_token:
|
||||||
@ -693,7 +814,8 @@ if __name__ == "__main__":
|
|||||||
course_data = json.loads(f.read())
|
course_data = json.loads(f.read())
|
||||||
parse(course_data["results"], course_id, course_name,
|
parse(course_data["results"], course_id, course_name,
|
||||||
skip_lectures, dl_assets, dl_captions, quality,
|
skip_lectures, dl_assets, dl_captions, quality,
|
||||||
caption_locale)
|
caption_locale, use_threaded_downloader)
|
||||||
else:
|
else:
|
||||||
parse(course_data["results"], course_id, course_name, skip_lectures,
|
parse(course_data["results"], course_id, course_name, skip_lectures,
|
||||||
dl_assets, dl_captions, quality, caption_locale)
|
dl_assets, dl_captions, quality, caption_locale,
|
||||||
|
use_threaded_downloader)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user