udemy-downloader/downloader.py

import os, threading, requests
from tqdm import tqdm


class FileDownloader():
    """
    @source: https://gist.github.com/stefanfortuin/9dbfe8618701507d0ef2b5515b165c5f
    """
    def __init__(self, max_threads=10):
        print("> Threaded downloader using {} threads.".format(
            str(max_threads)))
        self.sema = threading.Semaphore(value=max_threads)
        self.headers = {
            'user-agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
        }
        self.block_size = 1024

    def t_getfile(self, link, filepath, filename, bar, session):
        """
        Threaded function that uses a semaphore
        to not instantiate too many threads
        """

        self.sema.acquire()

        os.makedirs(os.path.dirname(filepath), exist_ok=True)

        if not os.path.isfile(filepath):
            headers = requests.head(link).headers
            if 'content-length' not in headers:
                print(f"server doesn't support content-length for {link}")
                self.sema.release()
                return

            total_bytes = int(requests.head(link).headers['content-length'])

            if not bar:
                bar = tqdm(total=total_bytes,
                           initial=0,
                           unit='B',
                           unit_scale=True,
                           desc=filename)
            self.download_new_file(link, filename, filepath, total_bytes, bar,
                                   session)
        else:
            current_bytes = os.stat(filepath).st_size

            headers = requests.head(link).headers
            if 'content-length' not in headers:
                print(f"server doesn't support content-length for {link}")
                self.sema.release()
                return

            total_bytes = int(requests.head(link).headers['content-length'])
            if not bar:
                bar = tqdm(total=total_bytes,
                           initial=current_bytes,
                           unit='B',
                           unit_scale=True,
                           desc=filename)
            if current_bytes < total_bytes:
                self.continue_file_download(link, filename, filepath,
                                            current_bytes, total_bytes, bar)
            else:
                # print(f"already done: {filename}")
                if bar.unit == "B":
                    bar.update(self.block_size)
                else:
                    bar.update(1)

        self.sema.release()

    def download_new_file(self, link, filename, filepath, total_bytes, bar,
                          session):
        if session == None:
            try:
                request = requests.get(link,
                                       headers=self.headers,
                                       timeout=30,
                                       stream=True)
                self.write_file(request, filepath, 'wb', bar)
            except requests.exceptions.RequestException as e:
                print(e)
        else:
            request = session.get(link, stream=True)
            self.write_file(request, filepath, 'wb', bar)

    def continue_file_download(self, link, filename, filepath, current_bytes,
                               total_bytes, bar):
        range_header = self.headers.copy()
        range_header['Range'] = f"bytes={current_bytes}-{total_bytes}"

        try:
            request = requests.get(link,
                                   headers=range_header,
                                   timeout=30,
                                   stream=True)
            self.write_file(request, filepath, 'ab', bar)
        except requests.exceptions.RequestException as e:
            print(e)

    def write_file(self, content, filepath, writemode, bar):
        with open(filepath, writemode) as f:
            for chunk in content.iter_content(chunk_size=self.block_size):
                if chunk:
                    f.write(chunk)
                    if bar.unit == "B":
                        bar.update(self.block_size)

        # print(f"completed file {filepath}", end='\n')
        f.close()
        bar.update(1)

    def get_file(self, link, path, filename, bar=None, session=None):
        """ Downloads the file"""
        thread = threading.Thread(target=self.t_getfile,
                                  args=(link, path, filename, bar, session))
        thread.start()
        return thread