# fixed and improved!
* fixed cookies for chromium browsers by switching to rookie * fixed productId which moved around since I coded this script up * added support for movie-type content e.g. documentaries, which dont have seasons or episodes and are just a single video * general code cleanup and probably some minor other things
This commit is contained in:
parent
5ff9d7f5cf
commit
8e46e9159b
@ -10,8 +10,8 @@ The python package `browser_cookie3` doesn't seem to be functional out of the bo
|
|||||||
# Setup
|
# Setup
|
||||||
* `git clone https://gitea.quinten0508.com/quinten/npo`
|
* `git clone https://gitea.quinten0508.com/quinten/npo`
|
||||||
* `cd npo`
|
* `cd npo`
|
||||||
|
* Download [N_m3u8DL-RE](https://github.com/nilaoda/N_m3u8DL-RE) and [mp4decrypt](https://www.bento4.com/downloads/) and put `N_m3u8DL-RE.exe` and `mp4decrypt.exe` in the root project folder
|
||||||
* Download [`/cdm/wks.py`](https://github.com/SASUKE-DUCK/pywks/blob/main/cdm/wks.py) and put it in an empty `/cdm` folder within the `npo` folder
|
* Download [`/cdm/wks.py`](https://github.com/SASUKE-DUCK/pywks/blob/main/cdm/wks.py) and put it in an empty `/cdm` folder within the `npo` folder
|
||||||
* Download [N_m3u8DL-RE](https://github.com/nilaoda/N_m3u8DL-RE) and [mp4decrypt](https://www.bento4.com/downloads/)
|
|
||||||
* Add your own extracted android keys in `cdm/devices/android_generic/` (you can use [KeyDive](https://cdm-project.com/Android-Tools/KeyDive) or [possibly this guide](https://forum.videohelp.com/threads/408031-Dumping-Your-own-L3-CDM-with-Android-Studio) to extract them):
|
* Add your own extracted android keys in `cdm/devices/android_generic/` (you can use [KeyDive](https://cdm-project.com/Android-Tools/KeyDive) or [possibly this guide](https://forum.videohelp.com/threads/408031-Dumping-Your-own-L3-CDM-with-Android-Studio) to extract them):
|
||||||
* `device_client_id_blob`
|
* `device_client_id_blob`
|
||||||
* `device_private_key`
|
* `device_private_key`
|
||||||
|
@ -4,15 +4,17 @@
|
|||||||
# * pip install -r requirements.txt
|
# * pip install -r requirements.txt
|
||||||
|
|
||||||
|
|
||||||
import argparse
|
from datetime import datetime # unix timestamps from content published dates
|
||||||
import requests
|
import sys # proper process exiting if you messed up!
|
||||||
import subprocess
|
import argparse # your -url and -file options
|
||||||
import os
|
import requests # sending web requests
|
||||||
from bs4 import BeautifulSoup
|
import subprocess # multiprocessing
|
||||||
import json
|
import os # file operations
|
||||||
|
import re # regex for filename sanitizing so it'll actually save (thanks "Wie is de Mol? België 2025" - question marks are not allowed)
|
||||||
|
from unidecode import unidecode # see above
|
||||||
import platform # check for windows OS
|
import platform # check for windows OS
|
||||||
import shutil # check for ffmpeg in PATH
|
import shutil # check for ffmpeg in PATH
|
||||||
import browser_cookie3 # cookies for premium accs
|
import rookiepy # replaced browser_cookie3 with rookiepy
|
||||||
from fake_useragent import UserAgent # sets useragent
|
from fake_useragent import UserAgent # sets useragent
|
||||||
import concurrent.futures # concurrent downloads when using a -file
|
import concurrent.futures # concurrent downloads when using a -file
|
||||||
from cdm.wks import WvDecrypt, device_android_generic, PsshExtractor, KeyExtractor
|
from cdm.wks import WvDecrypt, device_android_generic, PsshExtractor, KeyExtractor
|
||||||
@ -67,80 +69,93 @@ def find_cookies():
|
|||||||
if not userinput or userinput.lower() != 'y':
|
if not userinput or userinput.lower() != 'y':
|
||||||
return
|
return
|
||||||
|
|
||||||
# browser_cookie3.load() should use ALL browsers' cookies. If this doesn't work, replace browser_cookie3.load with browser_cookie3.<browser>.
|
# Now using rookie instead of browser_cookie3, which supports a TON of browsers and works with chromium again.
|
||||||
# See notes at the end of this script for possible options. Example: browser_cookie3.chrome or browser_cookie3.librewolf.
|
# check here for compatibility https://github.com/thewh1teagle/rookie?tab=readme-ov-file#contribute-
|
||||||
cookies = browser_cookie3.load(domain_name='npo.nl')
|
cookies = rookiepy.load(["npo.nl"])
|
||||||
|
cookies = rookiepy.to_cookiejar(cookies)
|
||||||
return cookies
|
return cookies
|
||||||
|
|
||||||
|
|
||||||
def find_targetId(url):
|
def find_content_type(url):
|
||||||
# Get full HTML and extract productId and episode number
|
content_type = url.split("/")[4] # 'video' or 'serie'
|
||||||
# "future proof" :)
|
return content_type
|
||||||
response_targetId = requests.get(url)
|
|
||||||
content = response_targetId.content
|
|
||||||
|
|
||||||
try:
|
def find_content_info(url, content_type):
|
||||||
url_split = url.split("/")
|
|
||||||
target_slug = url_split[7]
|
|
||||||
except:
|
|
||||||
print("URL invalid.")
|
|
||||||
print("URL format: https://npo.nl/start/serie/wie-is-de-mol/seizoen-24/wie-is-de-mol_56/afspelen")
|
|
||||||
print(f"Your URL: {url}")
|
|
||||||
exit()
|
|
||||||
|
|
||||||
soup = BeautifulSoup(content, 'html.parser')
|
if content_type == 'serie':
|
||||||
script_tag = soup.find('script', {'id': '__NEXT_DATA__'})
|
# url safetycheck - no way for me to grab the "latest" video from a series without reverse engineering about a megabyte of minified js aafaik :(
|
||||||
|
if len(url.split("/")) < 8:
|
||||||
|
print("\n\nERROR: URL invalid!\n" \
|
||||||
|
"You are currently on the homepage of whatever series it is you want to download, not on the episode-specific page.\n" \
|
||||||
|
"Please click on the episode you want to download so your url becomes something like ../serie/<serie>/seizoen*/episode/...\n" \
|
||||||
|
f"Your current url is: {url}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
if script_tag:
|
# grab "slug" from url - not my word this is what they call it
|
||||||
script_content = script_tag.contents[0]
|
# with the found slug we can grab the productid which we need to make our second request
|
||||||
else:
|
params = {
|
||||||
print("Script tag not found.")
|
'slug': url.split("/")[7]
|
||||||
|
}
|
||||||
|
response = requests.get('https://npo.nl/start/api/domain/program-detail', params=params)
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
def search(data, target_slug):
|
content_info = {
|
||||||
if isinstance(data, list):
|
'seasonnumber': data.get('season', {}).get('seasonKey'),
|
||||||
for item in data:
|
'episodetitle': data.get("title"),
|
||||||
result = search(item, target_slug)
|
'episodenumber': data.get("programKey"),
|
||||||
if result:
|
}
|
||||||
return result
|
# some shows have this set to `None`, do better NPO!
|
||||||
elif isinstance(data, dict):
|
published_ts = data.get('publishedDateTime')
|
||||||
for key, value in data.items():
|
if published_ts is not None:
|
||||||
if key == "slug" and value == target_slug:
|
content_info['episodedate'] = datetime.fromtimestamp(published_ts).strftime("%Y-%m-%d")
|
||||||
return data.get("productId"), data.get("programKey")
|
|
||||||
else:
|
|
||||||
result = search(value, target_slug)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
return None
|
|
||||||
|
|
||||||
data_dict = json.loads(script_content)
|
elif content_type == 'video':
|
||||||
target_product_id = search(data_dict, target_slug)
|
params = {
|
||||||
return target_product_id
|
'slug': url.split("/")[5]
|
||||||
|
|
||||||
|
|
||||||
def find_CSRF(targetId, plus_cookie):
|
|
||||||
response_CSRF = requests.get('https://npo.nl/start/api/auth/session', headers=headers, cookies=plus_cookie)
|
|
||||||
response_cookies = response_CSRF.cookies.get_dict()
|
|
||||||
|
|
||||||
json_productId = {
|
|
||||||
'productId': targetId,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
url = f'https://npo.nl/start/api/domain/player-token'
|
response = requests.get('https://npo.nl/start/api/domain/program-detail', params=params)
|
||||||
response_token = requests.get(url, cookies=response_cookies, headers=headers, params=json_productId)
|
data = response.json()
|
||||||
token = response_token.json()["jwt"]
|
|
||||||
|
content_info = {
|
||||||
|
'videotitle': data.get("title"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# some videos have this set to `None`, do better NPO!
|
||||||
|
published_ts = data.get('publishedDateTime')
|
||||||
|
if published_ts is not None:
|
||||||
|
content_info['videodate'] = datetime.fromtimestamp(published_ts).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
|
||||||
|
productid = data.get("productId")
|
||||||
|
return productid, content_info
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def find_token(productid, plus_cookie):
|
||||||
|
params = {
|
||||||
|
'productId': productid,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get('https://npo.nl/start/api/domain/player-token', params=params, cookies=plus_cookie)
|
||||||
|
token = response.json().get('jwt')
|
||||||
return token
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_MPD(token, url, plus_cookie):
|
|
||||||
headers['Authorization'] = token
|
|
||||||
|
|
||||||
json_auth = {
|
def find_MPD(token, url):
|
||||||
|
headers = {
|
||||||
|
'Authorization': token
|
||||||
|
}
|
||||||
|
|
||||||
|
json_data = {
|
||||||
'profileName': 'dash',
|
'profileName': 'dash',
|
||||||
'drmType': 'widevine',
|
'drmType': 'widevine',
|
||||||
'referrerUrl': url,
|
'referrerUrl': url
|
||||||
}
|
}
|
||||||
response = requests.post('https://prod.npoplayer.nl/stream-link', headers=headers, json=json_auth, cookies=plus_cookie)
|
response = requests.post('https://prod.npoplayer.nl/stream-link', headers=headers, json=json_data)
|
||||||
|
|
||||||
response_data = response.json()
|
response_data = response.json()
|
||||||
stream_data = response_data.get('stream', {})
|
stream_data = response_data.get('stream', {})
|
||||||
|
|
||||||
@ -149,7 +164,7 @@ def find_MPD(token, url, plus_cookie):
|
|||||||
else:
|
else:
|
||||||
print("NO MPD URL - BAD TOKEN")
|
print("NO MPD URL - BAD TOKEN")
|
||||||
print(response_data)
|
print(response_data)
|
||||||
exit()
|
print(stream_data.get('streamURL'))
|
||||||
|
|
||||||
|
|
||||||
def find_PSSH(mpd):
|
def find_PSSH(mpd):
|
||||||
@ -191,29 +206,43 @@ def check_prereq():
|
|||||||
if not os.path.isfile(file):
|
if not os.path.isfile(file):
|
||||||
print(f"ERR: {file} not found!")
|
print(f"ERR: {file} not found!")
|
||||||
print("Please check your directory and try again.")
|
print("Please check your directory and try again.")
|
||||||
exit()
|
sys.exit(1)
|
||||||
if shutil.which("ffmpeg") is None:
|
if shutil.which("ffmpeg") is None:
|
||||||
print("ffmpeg not found in PATH.")
|
print("ffmpeg not found in PATH.")
|
||||||
exit()
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def create_filename(url, programKey):
|
def create_filename(url, content_info, content_type):
|
||||||
# season title
|
if content_type == 'serie':
|
||||||
# 1 2 3 4 5 6 7 8 (optional)
|
# grab slug from url
|
||||||
# create filename based on input URL: https://npo.nl/start/serie /wie-is-de-mol /seizoen-24 /wie-is-de-mol_56 /afspelen
|
|
||||||
# https://npo.nl/start/serie /de-avondshow-met-arjen-lubach /seizoen-8_1 /de-avondshow-met-arjen-lubach_93 /afspelen
|
|
||||||
# https://npo.nl/start/serie /taarten-van-abel /seizoen-17 /joto /afspelen
|
|
||||||
url_split = url.split("/")
|
url_split = url.split("/")
|
||||||
title = url_split[7].split("_")[0]
|
seasontitle = url_split[5].split("_")[0]
|
||||||
season = url_split[6].split("_")[0]
|
|
||||||
filename_enc = title + "_" + season + "_ep-" + programKey + "_encrypted"
|
filename = f"{seasontitle}_S{content_info['seasonnumber']}E{content_info['episodenumber']}_{content_info['episodetitle']}"
|
||||||
filename = filename_enc.replace("_encrypted", "")
|
if 'episodedate' in content_info:
|
||||||
|
filename += f"_{content_info['episodedate']}"
|
||||||
|
|
||||||
|
elif content_type == 'video':
|
||||||
|
filename = f"{content_info['videotitle']}"
|
||||||
|
if 'videodate' in content_info:
|
||||||
|
filename += f"_{content_info['videodate']}"
|
||||||
|
|
||||||
|
|
||||||
|
# unidecode converts unicode to ascii (e.g. removes accents on characters)
|
||||||
|
# "takes a string object, possibly containing non-ASCII characters, and returns a string that can be safely encoded to ASCII"
|
||||||
|
filename = unidecode(filename).replace(' ', '_')
|
||||||
|
# remove everything not a-z, A-Z, 0-9, -, _
|
||||||
|
filename = re.sub(r'[^a-zA-Z0-9\-_]', '', filename)
|
||||||
|
filename_enc = f"{filename}_encrypted"
|
||||||
|
|
||||||
return filename_enc, filename
|
return filename_enc, filename
|
||||||
|
|
||||||
def download(mpd_url, filename_enc, productId, filename):
|
|
||||||
|
|
||||||
|
def download(mpd_url, filename_enc, productid, filename):
|
||||||
# output: filename.m4a (audio), filename.mp4 (video), filename.vtt (subtitles)
|
# output: filename.m4a (audio), filename.mp4 (video), filename.vtt (subtitles)
|
||||||
|
|
||||||
subtitle_url = f'https://cdn.npoplayer.nl/subtitles/nl/{productId}.vtt'
|
subtitle_url = f'https://cdn.npoplayer.nl/subtitles/nl/{productid}.vtt'
|
||||||
response = requests.get(subtitle_url)
|
response = requests.get(subtitle_url)
|
||||||
with open(f"{filename}.vtt", 'wb') as subtitle_file:
|
with open(f"{filename}.vtt", 'wb') as subtitle_file:
|
||||||
subtitle_file.write(response.content)
|
subtitle_file.write(response.content)
|
||||||
@ -237,7 +266,7 @@ def merge(filename):
|
|||||||
'ffmpeg', '-v', 'quiet', # '-v stats',
|
'ffmpeg', '-v', 'quiet', # '-v stats',
|
||||||
'-i', filename + "_video.mp4",
|
'-i', filename + "_video.mp4",
|
||||||
'-i', filename + "_audio.m4a",
|
'-i', filename + "_audio.m4a",
|
||||||
'-i', filename + ".vtt", # Subtitle file
|
'-i', filename + ".vtt", # Subtitle file (seems to be present on NPO's side even if it's empty / the content has no subs)
|
||||||
'-c:v', 'copy', # Copy video codec
|
'-c:v', 'copy', # Copy video codec
|
||||||
'-c:a', 'copy', # Copy audio codec
|
'-c:a', 'copy', # Copy audio codec
|
||||||
'-c:s', 'mov_text', # Subtitle codec for MP4
|
'-c:s', 'mov_text', # Subtitle codec for MP4
|
||||||
@ -264,18 +293,23 @@ def check_file(filename):
|
|||||||
print("File not found. Continue anyway? (y/N)")
|
print("File not found. Continue anyway? (y/N)")
|
||||||
userinput = input().lower()
|
userinput = input().lower()
|
||||||
if not userinput or userinput != 'y':
|
if not userinput or userinput != 'y':
|
||||||
exit()
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def execute(url, plus_cookie, process_no):
|
def execute(url, plus_cookie, process_no):
|
||||||
productId, programKey = find_targetId(url)
|
|
||||||
token = find_CSRF(productId,plus_cookie)
|
content_type = find_content_type(url)
|
||||||
mpd = find_MPD(token, url, plus_cookie)
|
productid, content_info = find_content_info(url, content_type)
|
||||||
|
token = find_token(productid, plus_cookie)
|
||||||
|
mpd = find_MPD(token, url)
|
||||||
pssh, mpd_url = find_PSSH(mpd)
|
pssh, mpd_url = find_PSSH(mpd)
|
||||||
key = find_key(mpd, pssh)
|
key = find_key(mpd, pssh)
|
||||||
check_prereq()
|
check_prereq()
|
||||||
filename_enc, filename = create_filename(url, programKey)
|
|
||||||
download(mpd_url, filename_enc, productId, filename)
|
|
||||||
|
|
||||||
|
filename_enc, filename = create_filename(url, content_info, content_type)
|
||||||
|
download(mpd_url, filename_enc, productid, filename)
|
||||||
decrypt(key, filename_enc, filename)
|
decrypt(key, filename_enc, filename)
|
||||||
merge(filename)
|
merge(filename)
|
||||||
clean(filename_enc, filename)
|
clean(filename_enc, filename)
|
||||||
@ -299,22 +333,17 @@ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|||||||
print(f"{completed_videos}/{len(urls)} video{'s'[:len(urls) != 1]} completed")
|
print(f"{completed_videos}/{len(urls)} video{'s'[:len(urls) != 1]} completed")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#########
|
#########
|
||||||
# NOTES #
|
# NOTES #
|
||||||
#########
|
#########
|
||||||
# The downloader *should* work across every platform, linux/mac/win.
|
# The downloader *should* work across every platform, linux/mac/win.
|
||||||
# It has not been extensively tested on anything but windows. DM me if you need help :D
|
# It has not been tested on anything but windows though.
|
||||||
# Discord: quinten._. (That includes the ._.)
|
# I've tried my best to comment the code, but I understand if it's messy and overwhelming.
|
||||||
|
# Most of the lines are either:
|
||||||
|
# a) getting relevant cookies/keys/urls by mimicking what your browser would do: getting an ID, using that to get a key, using that to get a URl, etc
|
||||||
|
# b) pre- and post processing: creating nice filenames, extracting info for those filenames, downloading, decrypting, merging files, etc
|
||||||
|
|
||||||
# Supported browsers for NPO Plus cookies:
|
# However, don't spend hours rummaging through my code, just DM me if you need help :D
|
||||||
# (https://github.com/borisbabic/browser_cookie3#testing-dates--ddmmyy)
|
# Discord: wtquin
|
||||||
# * Chrome
|
|
||||||
# * Firefox
|
|
||||||
# * LibreWolf
|
|
||||||
# * Opera
|
|
||||||
# * Opera GX
|
|
||||||
# * Edge
|
|
||||||
# * Chromium
|
|
||||||
# * Brave
|
|
||||||
# * Vivaldi
|
|
||||||
# * Safari
|
|
@ -1,7 +1,8 @@
|
|||||||
protobuf
|
beautifulsoup4==4.13.4
|
||||||
bs4
|
fake_useragent==2.2.0
|
||||||
xmltodict
|
protobuf==6.30.2
|
||||||
browser_cookie3
|
pycryptodomex==3.22.0
|
||||||
requests
|
Requests==2.32.3
|
||||||
pycryptodomex
|
rookiepy==0.5.6
|
||||||
fake-useragent
|
unidecode==1.3.8
|
||||||
|
xmltodict==0.14.2
|
Loading…
x
Reference in New Issue
Block a user