mirror of
https://cdm-project.com/Download-Tools/udemy-downloader.git
synced 2025-05-04 03:44:26 +02:00
update manifest downloading
This commit is contained in:
parent
340d4c6786
commit
06e295d2b6
3
.gitignore
vendored
3
.gitignore
vendored
@ -128,4 +128,5 @@ cookies.txt
|
|||||||
selenium_test.py
|
selenium_test.py
|
||||||
selenium_data/
|
selenium_data/
|
||||||
config.dev.toml
|
config.dev.toml
|
||||||
temp/*.exe
|
temp/
|
||||||
|
*.exe
|
53
main.py
53
main.py
@ -483,24 +483,52 @@ class Udemy:
|
|||||||
|
|
||||||
def _extract_m3u8(self, url):
|
def _extract_m3u8(self, url):
|
||||||
"""extracts m3u8 streams"""
|
"""extracts m3u8 streams"""
|
||||||
|
asset_id_re = re.compile(r"assets/(?P<id>\d+)/")
|
||||||
_temp = []
|
_temp = []
|
||||||
|
|
||||||
|
# get temp folder
|
||||||
|
temp_path = Path(Path.cwd(), "temp")
|
||||||
|
|
||||||
|
# ensure the folder exists
|
||||||
|
temp_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# # extract the asset id from the url
|
||||||
|
asset_id = asset_id_re.search(url).group("id")
|
||||||
|
|
||||||
|
m3u8_path = Path(temp_path, f"index_{asset_id}.m3u8")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = self.session._get(url)
|
r = self.session._get(url)
|
||||||
resp.raise_for_status()
|
r.raise_for_status()
|
||||||
raw_data = resp.text
|
raw_data = r.text
|
||||||
|
|
||||||
|
# write to temp file for later
|
||||||
|
with open(m3u8_path, "w") as f:
|
||||||
|
f.write(r.text)
|
||||||
|
|
||||||
m3u8_object = m3u8.loads(raw_data)
|
m3u8_object = m3u8.loads(raw_data)
|
||||||
playlists = m3u8_object.playlists
|
playlists = m3u8_object.playlists
|
||||||
seen = set()
|
seen = set()
|
||||||
for pl in playlists:
|
for pl in playlists:
|
||||||
resolution = pl.stream_info.resolution
|
resolution = pl.stream_info.resolution
|
||||||
codecs = pl.stream_info.codecs
|
codecs = pl.stream_info.codecs
|
||||||
|
|
||||||
if not resolution:
|
if not resolution:
|
||||||
continue
|
continue
|
||||||
if not codecs:
|
if not codecs:
|
||||||
continue
|
continue
|
||||||
width, height = resolution
|
width, height = resolution
|
||||||
download_url = pl.uri
|
|
||||||
if height not in seen:
|
if height in seen: continue
|
||||||
|
|
||||||
|
# we need to save the individual playlists to disk also
|
||||||
|
playlist_path = Path(temp_path, f"index_{asset_id}_{width}x{height}.m3u8")
|
||||||
|
|
||||||
|
with open(playlist_path, "w") as f:
|
||||||
|
r = self.session._get(pl.uri)
|
||||||
|
r.raise_for_status()
|
||||||
|
f.write(r.text)
|
||||||
|
|
||||||
seen.add(height)
|
seen.add(height)
|
||||||
_temp.append(
|
_temp.append(
|
||||||
{
|
{
|
||||||
@ -508,7 +536,7 @@ class Udemy:
|
|||||||
"height": height,
|
"height": height,
|
||||||
"width": width,
|
"width": width,
|
||||||
"extension": "mp4",
|
"extension": "mp4",
|
||||||
"download_url": download_url,
|
"download_url": playlist_path.as_uri(),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
@ -517,8 +545,7 @@ class Udemy:
|
|||||||
|
|
||||||
def _extract_mpd(self, url):
|
def _extract_mpd(self, url):
|
||||||
"""extracts mpd streams"""
|
"""extracts mpd streams"""
|
||||||
|
asset_id_re = re.compile(r"assets/(?P<id>\d+)/")
|
||||||
asset_id_re = re.compile(r"assets/(?P<id>\d+)/files")
|
|
||||||
_temp = []
|
_temp = []
|
||||||
|
|
||||||
# get temp folder
|
# get temp folder
|
||||||
@ -536,6 +563,7 @@ class Udemy:
|
|||||||
try:
|
try:
|
||||||
with open(mpd_path, "wb") as f:
|
with open(mpd_path, "wb") as f:
|
||||||
r = self.session._get(url)
|
r = self.session._get(url)
|
||||||
|
r.raise_for_status()
|
||||||
f.write(r.content)
|
f.write(r.content)
|
||||||
|
|
||||||
ytdl = yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True, "allow_unplayable_formats": True, "enable_file_urls": True})
|
ytdl = yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True, "allow_unplayable_formats": True, "enable_file_urls": True})
|
||||||
@ -1168,6 +1196,7 @@ def handle_segments(url, format_id, video_title, output_path, lecture_file_name,
|
|||||||
logger.info("> Downloading Lecture Tracks...")
|
logger.info("> Downloading Lecture Tracks...")
|
||||||
args = [
|
args = [
|
||||||
"yt-dlp",
|
"yt-dlp",
|
||||||
|
"--enable-file-urls",
|
||||||
"--force-generic-extractor",
|
"--force-generic-extractor",
|
||||||
"--allow-unplayable-formats",
|
"--allow-unplayable-formats",
|
||||||
"--concurrent-fragments",
|
"--concurrent-fragments",
|
||||||
@ -1237,6 +1266,12 @@ def handle_segments(url, format_id, video_title, output_path, lecture_file_name,
|
|||||||
logger.exception(f"Error: ")
|
logger.exception(f"Error: ")
|
||||||
finally:
|
finally:
|
||||||
os.chdir(HOME_DIR)
|
os.chdir(HOME_DIR)
|
||||||
|
# if the url is a file url, we need to remove the file after we're done with it
|
||||||
|
if url.startswith("file://"):
|
||||||
|
try:
|
||||||
|
os.unlink(url[7:])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def check_for_aria():
|
def check_for_aria():
|
||||||
@ -1372,7 +1407,7 @@ def process_lecture(lecture, lecture_path, lecture_file_name, chapter_dir):
|
|||||||
source_type = source.get("type")
|
source_type = source.get("type")
|
||||||
if source_type == "hls":
|
if source_type == "hls":
|
||||||
temp_filepath = lecture_path.replace(".mp4", ".%(ext)s")
|
temp_filepath = lecture_path.replace(".mp4", ".%(ext)s")
|
||||||
cmd = ["yt-dlp", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"]
|
cmd = ["yt-dlp", "--enable-file-urls", "--force-generic-extractor", "--concurrent-fragments", f"{concurrent_downloads}", "--downloader", "aria2c", "-o", f"{temp_filepath}", f"{url}"]
|
||||||
if disable_ipv6:
|
if disable_ipv6:
|
||||||
cmd.append("--downloader-args")
|
cmd.append("--downloader-args")
|
||||||
cmd.append('aria2c:"--disable-ipv6"')
|
cmd.append('aria2c:"--disable-ipv6"')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user