Feat: Info argument

+ Added info argument to print course information + Updated spacing of some text to be more 'tree' like and easier to read
2025-04-30 17:34:25 +02:00 · 2021-05-28 16:59:52 -04:00 · 2021-05-28 16:59:52 -04:00 · 1ad4f1edde
commit 1ad4f1edde
parent 66aad0dc50
3 changed files with 134 additions and 38 deletions
--- a/.gitignore
+++ b/.gitignore
@ -122,3 +122,4 @@ manifest.mpd
 .vscode
 saved
 *.aria2
+info.py
--- a/README.md
+++ b/README.md
@ -69,7 +69,7 @@ You can now run `python main.py` to start downloading. The course will download

 ```
 usage: main.py [-h] -c COURSE_URL [-b BEARER_TOKEN] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions]
-               [--keep-vtt] [--skip-hls]
+               [--keep-vtt] [--skip-hls] [--info]

 Udemy Downloader

@ -89,6 +89,7 @@ optional arguments:
  --keep-vtt            If specified, .vtt files won't be removed
  --skip-hls            If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm
                        lectures)
+  --info                If specified, only course information will be printed, nothing will be downloaded
 ```

 - Passing a Bearer Token and Course ID as an argument
@ -116,6 +117,8 @@ optional arguments:
  - `python main.py -c <Course URL> --download-captions --keep-vtt`
 - Skip parsing HLS Streams (HLS streams usually contain 1080p quality for Non-DRM lectures):
  - `python main.py -c <Course URL> --skip-hls`
+- Print course information only:
+  - `python main.py -c <Course URL> --info`

 # Credits

--- a/main.py
+++ b/main.py
@ -1040,8 +1040,6 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):

    if is_encrypted:
        if len(lecture_audio_sources) > 0 and len(lecture_video_sources) > 0:
-            print(f"> Lecture '%s' has DRM, attempting to download" %
-                  lecture_title)
            lecture_working_dir = os.path.join(working_dir,
                                               str(lecture.get("asset_id")))

@ -1055,16 +1053,19 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
                        key=lambda x: abs(int(x.get("height")) - quality))
                if not os.path.exists(lecture_working_dir):
                    os.mkdir(lecture_working_dir)
+                print(f"      > Lecture '%s' has DRM, attempting to download" %
+                      lecture_title)
                handle_segments(video_source, audio_source, lecture_title,
                                lecture_working_dir, lecture_path)
            else:
-                print("> Lecture '%s' is already downloaded, skipping..." %
+                print(
+                    "      > Lecture '%s' is already downloaded, skipping..." %
                    lecture_title)
        else:
-            print(f"> Lecture '%s' is missing media links" % lecture_title)
+            print(f"      > Lecture '%s' is missing media links" %
+                  lecture_title)
            print(len(lecture_audio_sources), len(lecture_video_sources))
    else:
-        print("> Lecture doesn't have DRM, attempting to download...")
        sources = lecture.get("sources")
        sources = sorted(sources,
                         key=lambda x: int(x.get("height")),
@ -1075,14 +1076,17 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
            if not os.path.exists(lecture_working_dir):
                os.mkdir(lecture_working_dir)
            if not os.path.isfile(lecture_path):
+                print(
+                    "      > Lecture doesn't have DRM, attempting to download..."
+                )
                source = sources[0]  # first index is the best quality
                if isinstance(quality, int):
                    source = min(
                        sources,
                        key=lambda x: abs(int(x.get("height")) - quality))
                try:
-                    print("====== Selected quality: ", source.get("type"),
-                          source.get("height"))
+                    print("      ====== Selected quality: ",
+                          source.get("type"), source.get("height"))
                    url = source.get("download_url")
                    source_type = source.get("type")
                    if source_type == "hls":
@ -1098,10 +1102,11 @@ def process_lecture(lecture, lecture_path, lecture_dir, quality, access_token):
                except Exception as e:
                    print(f"      > Error downloading lecture: ", e)
            else:
-                print("> Lecture '%s' is already downloaded, skipping..." %
+                print(
+                    "      > Lecture '%s' is already downloaded, skipping..." %
                    lecture_title)
        else:
-            print("Missing sources for lecture", lecture)
+            print("      > Missing sources for lecture", lecture)


 def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
@ -1131,7 +1136,8 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
            lecture_index = lecture.get("lecture_index")

            extension = lecture.get("extension")
-            print(f"> Processing lecture {lecture_index} of {total_lectures}")
+            print(
+                f"  > Processing lecture {lecture_index} of {total_lectures}")
            if not skip_lectures:
                if extension == "html":
                    html_content = lecture.get("html_content").encode(
@ -1143,7 +1149,7 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
                            f.write(html_content)
                            f.close()
                    except Exception as e:
-                        print("Failed to write html file: ", e)
+                        print("    > Failed to write html file: ", e)
                        continue
                else:
                    lecture_path = os.path.join(
@ -1220,6 +1226,75 @@ def parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
                                        keep_vtt)


+def course_info(course_data):
+    print("\n\n\n\n")
+    course_title = course_data.get("title")
+    chapter_count = course_data.get("total_chapters")
+    lecture_count = course_data.get("total_lectures")
+
+    print("> Course: {}".format(course_title))
+    print("> Total Chapters: {}".format(chapter_count))
+    print("> Total Lectures: {}".format(lecture_count))
+    print("\n")
+
+    chapters = course_data.get("chapters")
+    for chapter in chapters:
+        chapter_title = chapter.get("chapter_title")
+        chapter_index = chapter.get("chapter_index")
+        chapter_lecture_count = chapter.get("lecture_count")
+        chapter_lectures = chapter.get("lectures")
+
+        print("> Chapter: {} ({} of {})".format(chapter_title, chapter_index,
+                                                chapter_count))
+
+        for lecture in chapter_lectures:
+            lecture_title = lecture.get("lecture_title")
+            lecture_index = lecture.get("index")
+            lecture_asset_count = lecture.get("assets_count")
+            lecture_is_encrypted = lecture.get("is_encrypted")
+            lecture_subtitles = lecture.get("subtitles")
+            lecture_extension = lecture.get("extension")
+            lecture_sources = lecture.get("sources")
+            lecture_video_sources = lecture.get("video_sources")
+
+            if lecture_sources:
+                lecture_sources = sorted(lecture.get("sources"),
+                                         key=lambda x: int(x.get("height")),
+                                         reverse=True)
+            if lecture_video_sources:
+                lecture_video_sources = sorted(
+                    lecture.get("video_sources"),
+                    key=lambda x: int(x.get("height")),
+                    reverse=True)
+
+            if lecture_is_encrypted:
+                lecture_qualities = [
+                    "{}@{}x{}".format(x.get("type"), x.get("width"),
+                                      x.get("height"))
+                    for x in lecture_video_sources
+                ]
+            elif not lecture_is_encrypted and lecture_sources:
+                lecture_qualities = [
+                    "{}@{}x{}".format(x.get("type"), x.get("height"),
+                                      x.get("width")) for x in lecture_sources
+                ]
+
+            if lecture_extension:
+                continue
+
+            print("  > Lecture: {} ({} of {})".format(lecture_title,
+                                                      lecture_index,
+                                                      chapter_lecture_count))
+            print("    > DRM: {}".format(lecture_is_encrypted))
+            print("    > Asset Count: {}".format(lecture_asset_count))
+            print("    > Captions: {}".format(
+                [x.get("language") for x in lecture_subtitles]))
+            print("    > Qualities: {}".format(lecture_qualities))
+
+        if chapter_index != chapter_count:
+            print("\n\n")
+
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Udemy Downloader')
    parser.add_argument("-c",
@ -1282,6 +1357,13 @@ if __name__ == "__main__":
        help=
        "If specified, hls streams will be skipped (faster fetching) (hls streams usually contain 1080p quality for non-drm lectures)",
    )
+    parser.add_argument(
+        "--info",
+        dest="info",
+        action="store_true",
+        help=
+        "If specified, only course information will be printed, nothing will be downloaded",
+    )

    parser.add_argument(
        "--save-to-file",
@ -1360,8 +1442,9 @@ if __name__ == "__main__":
        access_token = os.getenv("UDEMY_BEARER")

    udemy = Udemy(access_token)
-    print("> Fetching course information, this may take a minute...")

+    print("> Fetching course information, this may take a minute...")
+    if not args.load_from_file:
        course_id, course_info = udemy._extract_course_info(args.course_url)
        print("> Course information retrieved!")
        if course_info and isinstance(course_info, dict):
@ -1374,6 +1457,9 @@ if __name__ == "__main__":
        course_json = json.loads(
            open(os.path.join(os.getcwd(), "saved", "course_content.json"),
                 'r').read())
+        title = course_json.get("title")
+        course_title = course_json.get("published_title")
+        portal_name = course_json.get("portal_name")
    else:
        course_json = udemy._extract_course_json(args.course_url, course_id,
                                                 portal_name)
@ -1390,6 +1476,9 @@ if __name__ == "__main__":
    if args.load_from_file:
        _udemy = json.loads(
            open(os.path.join(os.getcwd(), "saved", "_udemy.json")).read())
+        if args.info:
+            course_info(_udemy)
+        else:
            parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
                      caption_locale, keep_vtt, access_token)
    else:
@ -1620,5 +1709,8 @@ if __name__ == "__main__":
                f.close()
            print("Saved parsed data to json")

+        if args.info:
+            course_info(_udemy)
+        else:
            parse_new(_udemy, quality, skip_lectures, dl_assets, dl_captions,
                      caption_locale, keep_vtt, access_token)