mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-02 15:24:26 +02:00

+ Updated cleanup function to remove the entire temporary lecture folder instead of just leaving behind tons of empty folders + Fixed typo in mux function + Segment count is now properly calculated from segment timeline + Manifest is now parsed from the URL instead of being downloaded, this should be better for downloading multiple courses at once. + Fixed a bug where audio content_type would try to find a max quality + New Downloader: Threaded Downloader uses multiple threads to download files, this should improve download speeds greatly. By default, the threaded downloader is not used, you can use the threaded downloader by passing ``--use-threaded-downloader``. By default, it only uses 10 threads, you can set a custom number of threads with the ``--threads`` option
121 lines
4.5 KiB
Python
121 lines
4.5 KiB
Python
import os, threading, requests
|
|
from tqdm import tqdm
|
|
|
|
|
|
class FileDownloader():
|
|
"""
|
|
@source: https://gist.github.com/stefanfortuin/9dbfe8618701507d0ef2b5515b165c5f
|
|
"""
|
|
def __init__(self, max_threads=10):
|
|
print("> Threaded downloader using {} threads.".format(
|
|
str(max_threads)))
|
|
self.sema = threading.Semaphore(value=max_threads)
|
|
self.headers = {
|
|
'user-agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
|
|
}
|
|
self.block_size = 1024
|
|
|
|
def t_getfile(self, link, filepath, filename, bar, session):
|
|
"""
|
|
Threaded function that uses a semaphore
|
|
to not instantiate too many threads
|
|
"""
|
|
|
|
self.sema.acquire()
|
|
|
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
|
|
if not os.path.isfile(filepath):
|
|
headers = requests.head(link).headers
|
|
if 'content-length' not in headers:
|
|
print(f"server doesn't support content-length for {link}")
|
|
self.sema.release()
|
|
return
|
|
|
|
total_bytes = int(requests.head(link).headers['content-length'])
|
|
|
|
if not bar:
|
|
bar = tqdm(total=total_bytes,
|
|
initial=0,
|
|
unit='B',
|
|
unit_scale=True,
|
|
desc=filename)
|
|
self.download_new_file(link, filename, filepath, total_bytes, bar,
|
|
session)
|
|
else:
|
|
current_bytes = os.stat(filepath).st_size
|
|
|
|
headers = requests.head(link).headers
|
|
if 'content-length' not in headers:
|
|
print(f"server doesn't support content-length for {link}")
|
|
self.sema.release()
|
|
return
|
|
|
|
total_bytes = int(requests.head(link).headers['content-length'])
|
|
if not bar:
|
|
bar = tqdm(total=total_bytes,
|
|
initial=current_bytes,
|
|
unit='B',
|
|
unit_scale=True,
|
|
desc=filename)
|
|
if current_bytes < total_bytes:
|
|
self.continue_file_download(link, filename, filepath,
|
|
current_bytes, total_bytes, bar)
|
|
else:
|
|
# print(f"already done: {filename}")
|
|
if bar.unit == "B":
|
|
bar.update(self.block_size)
|
|
else:
|
|
bar.update(1)
|
|
|
|
self.sema.release()
|
|
|
|
def download_new_file(self, link, filename, filepath, total_bytes, bar,
|
|
session):
|
|
if session == None:
|
|
try:
|
|
request = requests.get(link,
|
|
headers=self.headers,
|
|
timeout=30,
|
|
stream=True)
|
|
self.write_file(request, filepath, 'wb', bar)
|
|
except requests.exceptions.RequestException as e:
|
|
print(e)
|
|
else:
|
|
request = session.get(link, stream=True)
|
|
self.write_file(request, filepath, 'wb', bar)
|
|
|
|
def continue_file_download(self, link, filename, filepath, current_bytes,
|
|
total_bytes, bar):
|
|
range_header = self.headers.copy()
|
|
range_header['Range'] = f"bytes={current_bytes}-{total_bytes}"
|
|
|
|
try:
|
|
request = requests.get(link,
|
|
headers=range_header,
|
|
timeout=30,
|
|
stream=True)
|
|
self.write_file(request, filepath, 'ab', bar)
|
|
except requests.exceptions.RequestException as e:
|
|
print(e)
|
|
|
|
def write_file(self, content, filepath, writemode, bar):
|
|
with open(filepath, writemode) as f:
|
|
for chunk in content.iter_content(chunk_size=self.block_size):
|
|
if chunk:
|
|
f.write(chunk)
|
|
if bar.unit == "B":
|
|
bar.update(self.block_size)
|
|
|
|
# print(f"completed file {filepath}", end='\n')
|
|
f.close()
|
|
bar.update(1)
|
|
|
|
def get_file(self, link, path, filename, bar=None, session=None):
|
|
""" Downloads the file"""
|
|
thread = threading.Thread(target=self.t_getfile,
|
|
args=(link, path, filename, bar, session))
|
|
thread.start()
|
|
return thread
|