npo/npo all-in-one.py

# Pre-requisites:
# * N_m3u8DL-RE and mp4decrypt in current directory
# * ffmpeg in PATH
# * pip install -r requirements.txt


import argparse
import requests
import subprocess
import os
from bs4 import BeautifulSoup
import json
import platform                         # check for windows OS
import shutil                           # check for ffmpeg in PATH
import browser_cookie3                  # cookies for premium accs
from fake_useragent import UserAgent    # sets useragent
import concurrent.futures               # concurrent downloads when using a -file
from cdm.wks import WvDecrypt, device_android_generic, PsshExtractor, KeyExtractor

# dont need any of these headers but makes it look like normal clients at least
# for extra "normal behavior": save the UA chosen here in some temp file so we can use the same one every time this utility is run
headers = {
    'User-Agent': UserAgent(platforms='pc', min_version=122.0).random,
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Cache-Control': 'no-cache',
}

if platform.system() == "Windows":
    windows_flag = True
else:
    windows_flag = False


parser = argparse.ArgumentParser(description='PYWKS-NPO')
parser.add_argument('-url', dest='url', required=False, help='NPO Video URL')
parser.add_argument('-file', dest='file', required=False, help='File with NPO Video URLs, one per line')
args = parser.parse_args()


def parse_url_file(file_path):
    with open(file_path, 'r') as file:
        urls = [line.strip() for line in file]
    return urls

if args.file and args.url:
    print("ERR: Please specify just one argument.")
    print("-url:     input NPO video URL")
    print("-file:    input a file with NPO video URLS, one per line")
    exit()
elif args.file:
    urls = parse_url_file(args.file)
elif args.url:
    urls = [args.url]
else:
    print("ERR: Please input your URL(s).")
    print("-url:     input NPO video URL")
    print("-file:    input a file with NPO video URLS, one per line")
    exit()


def find_cookies():
    print("NPO Plus subscribers are able to download in 1080p instead of 540p.")
    print("Are you an NPO Plus subscriber and logged in on your browser? (y/N)")
    userinput = input().lower()
    print("\033[F\033[K\033[F\033[K\033[F\033[K")
    if not userinput or userinput.lower() != 'y':
        return

# browser_cookie3.load() should use ALL browsers' cookies. If this doesn't work, replace browser_cookie3.load with browser_cookie3.<browser>.
# See notes at the end of this script for possible options. Example: browser_cookie3.chrome or browser_cookie3.librewolf.
    cookies = browser_cookie3.librewolf(domain_name='npo.nl')
    return cookies


def find_targetId(url):
    # Get full HTML and extract productId and episode number
    # "future proof" :)
    response_targetId = requests.get(url)
    content = response_targetId.content

    try:
        url_split = url.split("/")
        target_slug = url_split[7]
    except:
        print("URL invalid.")
        print("URL format: https://npo.nl/start/serie/wie-is-de-mol/seizoen-24/wie-is-de-mol_56/afspelen")
        print(f"Your URL: {url}")
        exit()

    soup = BeautifulSoup(content, 'html.parser')
    script_tag = soup.find('script', {'id': '__NEXT_DATA__'})

    if script_tag:
        script_content = script_tag.contents[0]
    else:
        print("Script tag not found.")

    def search(data, target_slug):
        if isinstance(data, list):
            for item in data:
                result = search(item, target_slug)
                if result:
                    return result
        elif isinstance(data, dict):
            for key, value in data.items():
                if key == "slug" and value == target_slug:
                    return data.get("productId"), data.get("programKey")
                else:
                    result = search(value, target_slug)
                    if result:
                        return result
        return None

    data_dict = json.loads(script_content)
    target_product_id = search(data_dict, target_slug)
    return target_product_id


def find_CSRF(targetId, plus_cookie):
    response_CSRF = requests.get('https://npo.nl/start/api/auth/session', headers=headers, cookies=plus_cookie)
    response_cookies = response_CSRF.cookies.get_dict()

    json_productId = {
        'productId': targetId,
    }

    url = f'https://npo.nl/start/api/domain/player-token'
    response_token = requests.get(url, cookies=response_cookies, headers=headers, params=json_productId)
    token = response_token.json()["jwt"]
    return token


def find_MPD(token, url, plus_cookie):
    headers['Authorization'] = token

    json_auth = {
        'profileName': 'dash',
        'drmType': 'widevine',
        'referrerUrl': url,
    }
    response = requests.post('https://prod.npoplayer.nl/stream-link', headers=headers, json=json_auth, cookies=plus_cookie)
    response_data = response.json()
    stream_data = response_data.get('stream', {})

    if stream_data.get('streamURL'):
        return stream_data
    else:
        print("NO MPD URL - BAD TOKEN")
        print(response_data)
        exit()


def find_PSSH(mpd):
    mpd_url = mpd.get('streamURL')

    response = requests.get(mpd_url, headers=headers)
    pssh_extractor = PsshExtractor(response.text)
    pssh_value = pssh_extractor.extract_pssh()
    return pssh_value, mpd_url


def find_key(mpd, pssh):
    headers_license = {
        'x-custom-data': mpd.get('drmToken'),
        'origin': 'https://start-player.npo.nl',
        'referer': 'https://start-player.npo.nl/',
    }

    cert_b64 = None
    key_extractor = KeyExtractor(pssh, cert_b64, "https://npo-drm-gateway.samgcloud.nepworldwide.nl/authentication", headers_license)
    keys = key_extractor.get_keys()
    wvdecrypt = WvDecrypt(init_data_b64=pssh, cert_data_b64=cert_b64, device=device_android_generic)
    raw_challenge = wvdecrypt.get_challenge()
    data = raw_challenge
    for key in keys:
        if isinstance(key, list):
            if key:
                for key_str in key:
                    return key_str


def check_prereq():
    if windows_flag == True:
        prereq_filelist = ['mp4decrypt.exe', 'N_m3u8DL-RE.exe']
    else:
        prereq_filelist = ['mp4decrypt', 'N_m3u8DL-RE']

    for file in prereq_filelist:
        if not os.path.isfile(file):
            print(f"ERR: {file} not found!")
            print("Please check your directory and try again.")
            exit()
    if shutil.which("ffmpeg") is None:
        print("ffmpeg not found in PATH.")
        exit()


def create_filename(url, programKey):
#                                                                                                    season            title
#                                        1      2      3     4         5                               6                 7                                8 (optional)
# create filename based on input URL: https://npo.nl/start/serie /wie-is-de-mol                   /seizoen-24     /wie-is-de-mol_56                   /afspelen
#                                     https://npo.nl/start/serie /de-avondshow-met-arjen-lubach   /seizoen-8_1    /de-avondshow-met-arjen-lubach_93   /afspelen
#                                     https://npo.nl/start/serie /taarten-van-abel                /seizoen-17     /joto                               /afspelen
    url_split = url.split("/")
    title = url_split[7].split("_")[0]
    season = url_split[6].split("_")[0]
    filename_enc = title + "_" + season + "_ep-" + programKey + "_encrypted"
    filename = filename_enc.replace("_encrypted", "")
    return filename_enc, filename

def download(mpd_url, filename_enc, productId, filename):
# output: filename.m4a (audio), filename.mp4 (video), filename.vtt (subtitles)

    subtitle_url = f'https://cdn.npoplayer.nl/subtitles/nl/{productId}.vtt'
    response = requests.get(subtitle_url)
    with open(f"{filename}.vtt", 'wb') as subtitle_file:
        subtitle_file.write(response.content)
    if windows_flag == True:
        subprocess.run(['N_m3u8DL-RE.exe', '--auto-select', '--no-log', '--save-name', filename_enc, mpd_url], stdout=subprocess.DEVNULL)
    else:
        subprocess.run(['N_m3u8DL-RE', '--auto-select', '--no-log', '--save-name', filename_enc, mpd_url], stdout=subprocess.DEVNULL)


def decrypt(key, filename_enc, filename):
    if windows_flag == True:
        subprocess.run(['mp4decrypt.exe', '--key', key, str(filename_enc + ".mp4"), str(filename + "_video.mp4")], stdout=subprocess.DEVNULL)
        subprocess.run(['mp4decrypt.exe', '--key', key, str(filename_enc + ".m4a"), str(filename + "_audio.m4a")], stdout=subprocess.DEVNULL)
    else:
        subprocess.run(['mp4decrypt', '--key', key, str(filename_enc + ".mp4"), str(filename + "_video.mp4")], stdout=subprocess.DEVNULL)
        subprocess.run(['mp4decrypt', '--key', key, str(filename_enc + ".m4a"), str(filename + "_audio.m4a")], stdout=subprocess.DEVNULL)


def merge(filename):
    ffmpeg_command = [
        'ffmpeg', '-v', 'quiet', # '-v stats',
        '-i', filename + "_video.mp4",
        '-i', filename + "_audio.m4a",
        '-i', filename + ".vtt",  # Subtitle file
        '-c:v', 'copy',                    # Copy video codec
        '-c:a', 'copy',                    # Copy audio codec
        '-c:s', 'mov_text',                # Subtitle codec for MP4
        '-map', '0:v:0',                   # Map video stream
        '-map', '1:a:0',                   # Map audio stream
        '-map', '2:s:0',                   # Map subtitle stream
        '-strict', 'experimental',
        filename + ".mp4"
    ]

    subprocess.run(ffmpeg_command)


def clean(filename_enc, filename):
        os.remove(filename_enc + ".mp4")
        os.remove(filename_enc + ".m4a")
        os.remove(filename + "_audio.m4a")
        os.remove(filename + "_video.mp4")
        os.remove(filename + ".vtt")


def check_file(filename):
    if not os.path.exists(filename + ".mp4"):
        print("File not found. Continue anyway? (y/N)")
        userinput = input().lower()
        if not userinput or userinput != 'y':
            exit()


def execute(url, plus_cookie, process_no):
    productId, programKey = find_targetId(url)
    token = find_CSRF(productId,plus_cookie)
    mpd = find_MPD(token, url, plus_cookie)
    pssh, mpd_url = find_PSSH(mpd)
    key = find_key(mpd, pssh)
    check_prereq()
    filename_enc, filename = create_filename(url, programKey)
    download(mpd_url, filename_enc, productId, filename)
    decrypt(key, filename_enc, filename)
    merge(filename)
    clean(filename_enc, filename)
    check_file(filename)
    return process_no # keeps track of process index to return x/y videos completed message


plus_cookie = find_cookies()
max_workers = min(os.cpu_count(), len(urls))

with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = [executor.submit(execute, url, plus_cookie, i + 1) for i, url in enumerate(urls)]

    completed_videos = 0
    print(f"0/{len(urls)} videos completed")
    for future in concurrent.futures.as_completed(futures):
        result = future.result()
        completed_videos += 1
        print("\033[F\033[K\033[F\033[K")
        print(f"{completed_videos}/{len(urls)} video{'s'[:len(urls) != 1]} completed")


#########
# NOTES #
#########
# The downloader *should* work across every platform, linux/mac/win.
# It has not been extensively tested on anything but windows. DM me if you need help :D
# Discord: quinten._.       (That includes the ._.)

# Supported browsers for NPO Plus cookies:
# (https://github.com/borisbabic/browser_cookie3#testing-dates--ddmmyy)
# * Chrome
# * Firefox
# * LibreWolf
# * Opera
# * Opera GX
# * Edge
# * Chromium
# * Brave
# * Vivaldi
# * Safari