diff --git a/README.md b/README.md index 11f6c16..f3e0f04 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ You will need to get a few things before you can use this program: - locate the `Request Headers` section - copy the the text after `Authorization`, it should look like `Bearer xxxxxxxxxxx` - ![bearer token example](https://i.imgur.com/FhQdwgD.png) -- enter this in the `.env` file after `UDEMY_BEARER=` +- enter this in the `.env` file after `UDEMY_BEARER=` (you can also pass this as an argument, see advanced usage for more information) ### Aquire Course ID @@ -54,7 +54,7 @@ You will need to get a few things before you can use this program: - locate the request url field - ![request url](https://i.imgur.com/EUIV3bk.png) - copy the number after `/api-2.0/courses/` as seen highlighed in the above picture -- enter this in the `.env` file after `UDEMY_COURSE_ID=` +- enter this in the `.env` file after `UDEMY_COURSE_ID=` (you can also pass this as an argument, see advanced usage for more information) ### Key ID and Key @@ -71,19 +71,27 @@ You can now run `python main.py` to start downloading. The course will download # Advanced Usage ``` -usage: main.py [-h] [-d] [-q] [-l] [--download-assets] [--download-captions] +usage: main.py [-h] [-d] [-b BEARER_TOKEN] [-c COURSE_ID] [-q QUALITY] [-l LANG] [--skip-lectures] [--download-assets] [--download-captions] Udemy Downloader optional arguments: - -h, --help show this help message and exit - -d, --debug Use test_data.json rather than fetch from the udemy api. - -q , --quality Download specific video quality. (144, 360, 480, 720, 1080) - -l , --lang The language to download for captions (Default is en) - --download-assets If specified, lecture assets will be downloaded. - --download-captions If specified, captions will be downloaded. + -h, --help show this help message and exit + -d, --debug Use test_data.json rather than fetch from the udemy api. + -b BEARER_TOKEN, --bearer BEARER_TOKEN + The Bearer token to use + -c COURSE_ID, --course-id COURSE_ID + The ID of the course to download + -q QUALITY, --quality QUALITY + Download specific video quality. (144, 360, 480, 720, 1080) + -l LANG, --lang LANG The language to download for captions (Default is en) + --skip-lectures If specified, lectures won't be downloaded. + --download-assets If specified, lecture assets will be downloaded. + --download-captions If specified, captions will be downloaded. ``` +- Passing a Bearer Token and Course ID as an argument + - `python main.py -b -c ` - Download a specific quality - `python main.py -q 720` - Download assets along with lectures @@ -99,6 +107,9 @@ optional arguments: - `python main.py --download-captions -l pl` - Polish Subtitles - `python main.py --download-captions -l all` - Downloads all subtitles - etc +- Skip downloading lecture videos + - `python main.py --skip-lectures --download-captions` - Downloads only captions + - `python main.py --skip-lectures --download-assets` - Downloads only assets # Getting an error about "Accepting the latest terms of service"? diff --git a/main.py b/main.py index 0a067d6..fe87f82 100644 --- a/main.py +++ b/main.py @@ -11,13 +11,8 @@ from mpegdash.utils import (parse_attr_value, parse_child_nodes, from utils import extract_kid from vtt_to_srt import convert -load_dotenv() - -course_id = os.getenv("UDEMY_COURSE_ID") # the course id to download -bearer_token = os.getenv( - "UDEMY_BEARER" -) # you can find this in the network tab, its a request header under Authorization/x-udemy-authorization -header_bearer = "Bearer " + bearer_token +course_id = None +header_bearer = None download_dir = "%s\out_dir" % os.getcwd() working_dir = "%s\working_dir" % os.getcwd( ) # set the folder to download segments for DRM videos @@ -26,6 +21,7 @@ home_dir = os.getcwd() keyfile_path = "%s\keyfile.json" % os.getcwd() dl_assets = False dl_captions = False +skip_lectures = False caption_locale = "en" quality = None # None will download the best possible valid_qualities = [144, 360, 480, 720, 1080] @@ -361,53 +357,55 @@ def process_caption(caption, def process_lecture(lecture, lecture_index, lecture_path, lecture_dir): lecture_title = lecture["title"] lecture_asset = lecture["asset"] - if lecture_asset["media_license_token"] == None: - # not encrypted - media_sources = lecture_asset["media_sources"] - if quality: # if quality is specified, try to find the requested quality - lecture_url = next( - (x["src"] - for x in media_sources if x["label"] == str(quality)), - media_sources[0]["src"] - ) # find the quality requested or return the best available - else: - lecture_url = media_sources[0][ - "src"] # best quality is the first index + if not skip_lectures: + if lecture_asset["media_license_token"] == None: + # not encrypted + media_sources = lecture_asset["media_sources"] + if quality: # if quality is specified, try to find the requested quality + lecture_url = next( + (x["src"] + for x in media_sources if x["label"] == str(quality)), + media_sources[0]["src"] + ) # find the quality requested or return the best available + else: + lecture_url = media_sources[0][ + "src"] # best quality is the first index - if not os.path.isfile(lecture_path): - try: - download(lecture_url, lecture_path, lecture_title) - except Exception as e: - # We could add a retry here - print(f"> Error downloading lecture: {e}. Skipping...") - else: - print(f"> Lecture '%s' is already downloaded, skipping..." % - lecture_title) - else: - # encrypted - print(f"> Lecture '%s' has DRM, attempting to download" % - lecture_title) - lecture_working_dir = "%s\%s" % ( - working_dir, lecture_asset["id"] - ) # set the folder to download ephemeral files - media_sources = lecture_asset["media_sources"] - if not os.path.exists(lecture_working_dir): - os.mkdir(lecture_working_dir) - if not os.path.isfile(lecture_path): - mpd_url = next((x["src"] for x in media_sources - if x["type"] == "application/dash+xml"), None) - if not mpd_url: - print("> Couldn't find dash url for lecture '%s', skipping...", + if not os.path.isfile(lecture_path): + try: + download(lecture_url, lecture_path, lecture_title) + except Exception as e: + # We could add a retry here + print(f"> Error downloading lecture: {e}. Skipping...") + else: + print(f"> Lecture '%s' is already downloaded, skipping..." % lecture_title) - return - base_url = mpd_url.split("index.mpd")[0] - media_info = manifest_parser(mpd_url) - handle_irregular_segments(media_info, lecture_title, - lecture_working_dir, lecture_path) - cleanup(lecture_working_dir) else: - print("> Lecture '%s' is already downloaded, skipping..." % + # encrypted + print(f"> Lecture '%s' has DRM, attempting to download" % lecture_title) + lecture_working_dir = "%s\%s" % ( + working_dir, lecture_asset["id"] + ) # set the folder to download ephemeral files + media_sources = lecture_asset["media_sources"] + if not os.path.exists(lecture_working_dir): + os.mkdir(lecture_working_dir) + if not os.path.isfile(lecture_path): + mpd_url = next((x["src"] for x in media_sources + if x["type"] == "application/dash+xml"), None) + if not mpd_url: + print( + "> Couldn't find dash url for lecture '%s', skipping...", + lecture_title) + return + base_url = mpd_url.split("index.mpd")[0] + media_info = manifest_parser(mpd_url) + handle_irregular_segments(media_info, lecture_title, + lecture_working_dir, lecture_path) + cleanup(lecture_working_dir) + else: + print("> Lecture '%s' is already downloaded, skipping..." % + lecture_title) # process assets if dl_assets: @@ -505,13 +503,26 @@ if __name__ == "__main__": action="store_true", help="Use test_data.json rather than fetch from the udemy api.", ) + parser.add_argument( + "-b", + "--bearer", + dest="bearer_token", + type=str, + help="The Bearer token to use", + ) + parser.add_argument( + "-c", + "--course-id", + dest="course_id", + type=str, + help="The ID of the course to download", + ) parser.add_argument( "-q", "--quality", dest="quality", type=int, help="Download specific video quality. (144, 360, 480, 720, 1080)", - metavar="", ) parser.add_argument( "-l", @@ -519,7 +530,12 @@ if __name__ == "__main__": dest="lang", type=str, help="The language to download for captions (Default is en)", - metavar="", + ) + parser.add_argument( + "--skip-lectures", + dest="skip_lectures", + action="store_true", + help="If specified, lectures won't be downloaded.", ) parser.add_argument( "--download-assets", @@ -541,6 +557,8 @@ if __name__ == "__main__": caption_locale = args.lang if args.download_captions: dl_captions = True + if args.skip_lectures: + skip_lectures = True if args.quality: if not args.quality in valid_qualities: print("Invalid quality specified! %s" % quality) @@ -548,6 +566,25 @@ if __name__ == "__main__": else: quality = args.quality + load_dotenv() + if args.bearer_token: + header_bearer = f"Bearer %s" % args.bearer_token + else: + header_bearer = f"Bearer %s" % os.getenv("UDEMY_BEARER") + if args.course_id: + course_id = args.course_id + else: + course_id = os.getenv("UDEMY_COURSE_ID") + + if not course_id: + print("> Missing Course ID!") + sys.exit(1) + if not header_bearer: + print("> Missing Bearer Token!") + sys.exit(1) + + print(f"> Using course ID {course_id}") + if args.debug: # this is for development purposes so we dont need to make tons of requests when testing # course data json is just stored and read from a file