udemy-downloader/downloader.py
Puyodead1 aab19bf66f New Experimental Downloader, bug fixes, and small updates
+ Updated cleanup function to remove the entire temporary lecture folder instead of just leaving behind tons of empty folders
+ Fixed typo in mux function
+ Segment count is now properly calculated from segment timeline
+ Manifest is now parsed from the URL instead of being downloaded, this should be better for downloading multiple courses at once.
+ Fixed a bug where audio content_type would try to find a max quality
+ New Downloader: Threaded Downloader uses multiple threads to download files, this should improve download speeds greatly. By default, the threaded downloader is not used, you can use the threaded downloader by passing ``--use-threaded-downloader``. By default, it only uses 10 threads, you can set a custom number of threads with the ``--threads`` option
2021-05-21 13:38:24 -04:00

121 lines
4.5 KiB
Python

import os, threading, requests
from tqdm import tqdm
class FileDownloader():
"""
@source: https://gist.github.com/stefanfortuin/9dbfe8618701507d0ef2b5515b165c5f
"""
def __init__(self, max_threads=10):
print("> Threaded downloader using {} threads.".format(
str(max_threads)))
self.sema = threading.Semaphore(value=max_threads)
self.headers = {
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
}
self.block_size = 1024
def t_getfile(self, link, filepath, filename, bar, session):
"""
Threaded function that uses a semaphore
to not instantiate too many threads
"""
self.sema.acquire()
os.makedirs(os.path.dirname(filepath), exist_ok=True)
if not os.path.isfile(filepath):
headers = requests.head(link).headers
if 'content-length' not in headers:
print(f"server doesn't support content-length for {link}")
self.sema.release()
return
total_bytes = int(requests.head(link).headers['content-length'])
if not bar:
bar = tqdm(total=total_bytes,
initial=0,
unit='B',
unit_scale=True,
desc=filename)
self.download_new_file(link, filename, filepath, total_bytes, bar,
session)
else:
current_bytes = os.stat(filepath).st_size
headers = requests.head(link).headers
if 'content-length' not in headers:
print(f"server doesn't support content-length for {link}")
self.sema.release()
return
total_bytes = int(requests.head(link).headers['content-length'])
if not bar:
bar = tqdm(total=total_bytes,
initial=current_bytes,
unit='B',
unit_scale=True,
desc=filename)
if current_bytes < total_bytes:
self.continue_file_download(link, filename, filepath,
current_bytes, total_bytes, bar)
else:
# print(f"already done: {filename}")
if bar.unit == "B":
bar.update(self.block_size)
else:
bar.update(1)
self.sema.release()
def download_new_file(self, link, filename, filepath, total_bytes, bar,
session):
if session == None:
try:
request = requests.get(link,
headers=self.headers,
timeout=30,
stream=True)
self.write_file(request, filepath, 'wb', bar)
except requests.exceptions.RequestException as e:
print(e)
else:
request = session.get(link, stream=True)
self.write_file(request, filepath, 'wb', bar)
def continue_file_download(self, link, filename, filepath, current_bytes,
total_bytes, bar):
range_header = self.headers.copy()
range_header['Range'] = f"bytes={current_bytes}-{total_bytes}"
try:
request = requests.get(link,
headers=range_header,
timeout=30,
stream=True)
self.write_file(request, filepath, 'ab', bar)
except requests.exceptions.RequestException as e:
print(e)
def write_file(self, content, filepath, writemode, bar):
with open(filepath, writemode) as f:
for chunk in content.iter_content(chunk_size=self.block_size):
if chunk:
f.write(chunk)
if bar.unit == "B":
bar.update(self.block_size)
# print(f"completed file {filepath}", end='\n')
f.close()
bar.update(1)
def get_file(self, link, path, filename, bar=None, session=None):
""" Downloads the file"""
thread = threading.Thread(target=self.t_getfile,
args=(link, path, filename, bar, session))
thread.start()
return thread