commit d088d935f0186314291929dfbc6953eec421b8e8 Author: Michael Reber Date: Thu Nov 21 12:44:04 2019 +0100 Initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..62f74e8 --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ +# Downloader for Linux Academy + +This script will download courses from [Linux Academy](https://linuxacademy.com) for offline consumption. + +## Important Notice + +**Use of this script is for personal consumption of content only.** The content this script downloads is protected by copyright and must not be shared. + +#### Good uses + +* Downloading a lesson before embarking to a destination with little or no internet access. +* Keep lessons you've completed for a personal backup. + +#### Bad uses + +* Uploading the downloaded videos to a content sharing site. +* Sending copies of the videos to your friends and family. +* Hoarding lessons for future consumption beyond your subscription period. + +Please exercise good judgement when using this script. The folks at Linux Academy work hard to make quality courses and you should support them by paying for a subscription if you can. You may also wish to speak with your employer to find out if they would be willing to pay for your subscription. + +## Installation + +Tested on a fresh install of Ubuntu 18.04 desktop. Your mileage may vary depending on your OS. + + sudo apt update + sudo apt install python3 python3-pip git unzip ffmpeg + sudo pip3 install selenium youtube-dl + +### Browser + +You will need Chrome or Firefox and its matching driver. + +#### Chrome + +Install [Google Chrome](https://www.google.com/chrome/) and download the appropriate [ChromeDriver](https://chromedriver.chromium.org/downloads) version. Make sure the `chromedriver` executable is in your PATH (e.g., `/usr/local/bin`). + + sudo dpkg -i google-chrome-stable_current_amd64.deb + unzip chromedriver_linux64.zip + sudo mv chromedriver /usr/local/bin + +#### Firefox + +Install Mozilla Firefox and download [geckodriver](https://github.com/mozilla/geckodriver/releases). + + sudo apt install firefox + tar xzf geckodriver-*-linux64.tar.gz + sudo mv geckodriver /usr/local/bin + +## Usage + +This will only work with an active Linux Academy subscription. If you do not have one, please get one [here](https://linuxacademy.com/pricing/). + + Usage: + ./linuxacademy-dl.py [-u|--username] [-p|--password] [-d|--download-dir] [-c|--cookies-file] COURSE_URL + + Options may be replaced with environment variables. Command line options take precedence. + LADL_USERNAME, LADL_PASSWORD, LADL_DIR, LADL_COOKIES + + Examples: + $ ./linuxacademy-dl.py -u person@exmple.com -p p@ssw0rd https://linuxacademy.com/cp/modules/view/id/346 + + $ export LADL_USERNAME=person@example.com + $ export LADL_PASSWORD=p@ssw0rd + $ export LADL_DIR=/home/jdoe/linux-academy + $ ./linuxacademy-dl.py https://linuxacademy.com/cp/modules/view/id/346 + +The username/email and password fields are required. The cookie file will opt to `$PWD/cookies.txt` and the download directory will default to `$SCRIPT_DIR/download`, where `$SCRIPT_DIR` is the path of the `linuxacademy-dl.py` script. diff --git a/linuxacademy-dl.py b/linuxacademy-dl.py new file mode 100644 index 0000000..5be212f --- /dev/null +++ b/linuxacademy-dl.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + +import os +import sys +import time +import getopt +import shutil +import youtube_dl +from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException +from selenium.webdriver.common.keys import Keys + + +def usage(error=0, msg=None): + if msg: + print(msg) + print("Usage:") + print("\t{} [-u|--username] [-p|--password] [-d|--download-dir] [-c|--cookies-file] COURSE_URL\n" + .format(sys.argv[0])) + print("\tOptions may be replaced with environment variables. Command line options take precedence.") + print("\t\tLADL_USERNAME, LADL_PASSWORD, LADL_DIR, LADL_COOKIES\n") + print("Examples:") + print("\t$ ./linuxacademy-dl.py -u person@exmple.com -p p@ssw0rd https://linuxacademy.com/cp/modules/view/id/346\n") + print("\t$ export LADL_USERNAME=person@example.com") + print("\t$ export LADL_PASSWORD=p@ssw0rd") + print("\t$ export LADL_DIR=/home/jdoe/linux-academy") + print("\t$ ./linuxacademy-dl.py https://linuxacademy.com/cp/modules/view/id/346") + exit(error) + + +def parse_args(): + opts = args = [] + username = password = download_dir = cookies_file = None + ignore_missing_title = False + + try: + opts, args = getopt.getopt(sys.argv[1:], 'hu:p:d:c:', + ['help', 'username=', 'password=', 'download-dir=', 'cookies-file=', + 'ignore-missing-title']) + except getopt.GetoptError as err: + print(str(err)) + usage(1) + + if len(args) < 1: + usage(1, "Missing course URL.") + + for opt, arg in opts: + if opt in ('-h', '--help'): + usage() + elif opt in ('-u', '--username'): + username = arg + elif opt in ('-p', '--password'): + password = arg + elif opt in ('-d', '--download-dir'): + download_dir = arg + elif opt in ('-c', '--cookies-file'): + cookies_file = arg + elif opt == '--ignore-missing-title': + ignore_missing_title = True + + if username is None: + username = os.environ.get('LADL_USERNAME') or usage(1, "Missing username.") + if password is None: + password = os.environ.get('LADL_PASSWORD') or usage(1, "Missing password.") + if download_dir is None: + download_dir = os.environ.get('LADL_DIR', "{}/download".format(os.path.dirname(os.path.realpath(__file__)))) + if cookies_file is None: + cookies_file = os.environ.get('LADL_COOKIES', "{}/cookies.txt".format(os.path.dirname(os.path.realpath(__file__)))) + + course_url = args[0] + + return { + 'username': username, + 'password': password, + 'download_dir': download_dir, + 'cookies_file': cookies_file, + 'course_url': course_url, + 'ignore_missing_title': ignore_missing_title + } + + +def launch_browser(name): + if name == 'chrome': + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--window-size=1920,1080") + driver = webdriver.Chrome(options=options) + else: + options = webdriver.FirefoxOptions() + options.add_argument("--headless") + options.add_argument("--window-size=1920,1080") + driver = webdriver.Firefox(options=options) + + driver.get("https://linuxacademy.com/") + return driver + + +def la_login(driver, args): + link = driver.find_element_by_partial_link_text('Log In') + link.click() + time.sleep(5) + + user = driver.find_element_by_name('username') + user.send_keys(args['username']) + password = driver.find_element_by_name('password') + password.send_keys(args['password']) + password.send_keys(Keys.RETURN) + time.sleep(10) + + try: + driver.find_element_by_id('navigationUsername') + print("Login success.") + except NoSuchElementException: + print("Login failed. Exiting.") + exit(1) + + +def load_course(driver, args): + course_title = None + driver.get(args['course_url']) + time.sleep(5) + + try: + course_title = driver.find_elements_by_class_name('course-title')[0].text + except IndexError: + if args['ignore_missing_title']: + course_title = "COURSE-ID-{}".format(args['course_url'].split('/')[-1]) + else: + print("Error: Could not find course title. Try running with --ignore-missing-title.") + exit(3) + + return course_title + + +def fetch_video_list(driver): + lessons = driver.find_elements_by_tag_name('a') + video_list = [] + counter = 0 + + for lesson in lessons: + try: + url = lesson.get_attribute('href') + if '/course/' in url: + counter += 1 + video_list.append( + {'url': url, 'counter': "{:03d}".format(counter), 'title': lesson.text.split('\n')[0]}) + except TypeError: + pass + + return video_list + + +def write_cookies(driver, file): + cookies = open(file, 'a+') + cookies.write("# Netscape HTTP Cookie File\n\n") + + for c in driver.get_cookies(): + expiry = c.get('expiry') if c.get('expiry') else 0 + any_domain_flag = str(c['domain'].startswith('.')).upper() + cookie = "{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( + c['domain'], any_domain_flag, c['path'], str(c['secure']).upper(), expiry, c['name'], c['value']) + cookies.write(cookie) + + cookies.close() + + +def download_video(driver, args, course_title, video): + driver.get(video['url']) + time.sleep(5) + write_cookies(driver, args['cookies_file']) + + file_name = "{} - {}".format(video['counter'], video['title'].replace('/', '_')) + print("Downloading: {}...".format(file_name)) + + ydl_opts = {'cookiefile': args['cookies_file'], 'force_generic_extractor': True, 'quiet': True, 'no_warnings': True, + 'outtmpl': '{}/{}/{}.%(ext)s'.format(args['download_dir'], course_title, file_name), + 'restrictfilenames': True} + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + ydl.download([video['url']]) + + +def main(): + args = parse_args() + + driver = None + if shutil.which('chromedriver'): + print("Launching Chrome...") + driver = launch_browser('chrome') + elif shutil.which('geckodriver'): + print("Launching Firefox...") + driver = launch_browser('firefox') + else: + print("Error: No browser driver found.") + exit(2) + + print("Logging in...") + la_login(driver, args) + + print("Loading course...") + course_title = load_course(driver, args) + print("Course title: {}".format(course_title)) + + print("Fetching video list...") + video_list = fetch_video_list(driver) + print("Found {} video{}.".format(len(video_list), "s" if len(video_list) > 1 else "")) + + for video in video_list: + download_video(driver, args, course_title, video) + + os.remove(args['cookies_file']) + print("Done.") + + +if __name__ == "__main__": + main()