#!/usr/bin/env python3 import os import sys import time import getopt import shutil import youtube_dl from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.keys import Keys from os.path import expanduser # if running as root...kick out if os.geteuid()==0: sys.exit("\nPlease run this script as unprivileged user only.\n") home = expanduser("~") def usage(error=0, msg=None): if msg: print(msg) print("Usage:") print("\t{} [-u|--username] [-p|--password] [-d|--download-dir] [-c|--cookies-file] COURSE_URL\n" .format(sys.argv[0])) print("\tOptions may be replaced with environment variables. Command line options take precedence.") print("\t\tLADL_USERNAME, LADL_PASSWORD, LADL_DIR, LADL_COOKIES\n") print("Examples:") print("\t$ ./linuxacademy-dl.py -u person@exmple.com -p my_p@ssw0rd https://linuxacademy.com/cp/modules/view/id/388\n") print("\t$ export LADL_USERNAME=person@example.com") print("\t$ export LADL_PASSWORD=my_p@ssw0rd") print('\t$ export LADL_DIR=' + str(home) + '/linux-academy-videos') print("\t$ ./linuxacademy-dl.py https://linuxacademy.com/cp/modules/view/id/388") exit(error) def parse_args(): opts = args = [] username = password = download_dir = cookies_file = None ignore_missing_title = False try: opts, args = getopt.getopt(sys.argv[1:], 'hu:p:d:c:', ['help', 'username=', 'password=', 'download-dir=', 'cookies-file=', 'ignore-missing-title']) except getopt.GetoptError as err: print(str(err)) usage(1) if len(args) < 1: usage(1, "Missing course URL.") for opt, arg in opts: if opt in ('-h', '--help'): usage() elif opt in ('-u', '--username'): username = arg elif opt in ('-p', '--password'): password = arg elif opt in ('-d', '--download-dir'): download_dir = arg elif opt in ('-c', '--cookies-file'): cookies_file = arg elif opt == '--ignore-missing-title': ignore_missing_title = True if username is None: username = os.environ.get('LADL_USERNAME') or usage(1, "Missing username.") if password is None: password = os.environ.get('LADL_PASSWORD') or usage(1, "Missing password.") if download_dir is None: download_dir = os.environ.get('LADL_DIR', "{}/download".format(os.path.dirname(os.path.realpath(__file__)))) if cookies_file is None: cookies_file = os.environ.get('LADL_COOKIES', "{}/cookies.txt".format(os.path.dirname(os.path.realpath(__file__)))) course_url = args[0] return { 'username': username, 'password': password, 'download_dir': download_dir, 'cookies_file': cookies_file, 'course_url': course_url, 'ignore_missing_title': ignore_missing_title } def launch_browser(name): if name == 'chrome': options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--window-size=1920,1080") driver = webdriver.Chrome(options=options) else: options = webdriver.FirefoxOptions() options.add_argument("--headless") options.add_argument("--window-size=1920,1080") driver = webdriver.Firefox(options=options) driver.get("https://linuxacademy.com/cp/ssologin") return driver def la_login(driver, args): #link = driver.find_element_by_partial_link_text('Log In') #link.click() time.sleep(5) user = driver.find_element_by_name('username') user.send_keys(args['username']) password = driver.find_element_by_name('password') password.send_keys(args['password']) password.send_keys(Keys.RETURN) time.sleep(15) try: driver.find_element_by_id('navigationUsername') print("Login success.") except NoSuchElementException: print("Login failed. Exiting.") exit(1) def load_course(driver, args): course_title = None driver.get(args['course_url']) time.sleep(5) try: course_title = driver.find_elements_by_class_name('course-title')[0].text except IndexError: if args['ignore_missing_title']: course_title = "COURSE-ID-{}".format(args['course_url'].split('/')[-1]) else: print("Error: Could not find course title. Try running with --ignore-missing-title.") exit(3) return course_title def fetch_video_list(driver): lessons = driver.find_elements_by_tag_name('a') video_list = [] counter = 0 for lesson in lessons: try: url = lesson.get_attribute('href') if '/course/' in url: counter += 1 video_list.append( {'url': url, 'counter': "{:03d}".format(counter), 'title': lesson.text.split('\n')[0]}) except TypeError: pass return video_list def write_cookies(driver, file): cookies = open(file, 'a+') cookies.write("# Netscape HTTP Cookie File\n\n") for c in driver.get_cookies(): expiry = c.get('expiry') if c.get('expiry') else 0 any_domain_flag = str(c['domain'].startswith('.')).upper() cookie = "{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( c['domain'], any_domain_flag, c['path'], str(c['secure']).upper(), expiry, c['name'], c['value']) cookies.write(cookie) cookies.close() def download_video(driver, args, course_title, video): driver.get(video['url']) time.sleep(5) write_cookies(driver, args['cookies_file']) file_name = "{} - {}".format(video['counter'], video['title'].replace('/', '_')) print("Downloading: {}...".format(file_name)) ydl_opts = {'cookiefile': args['cookies_file'], 'force_generic_extractor': True, 'quiet': True, 'no_warnings': True, 'outtmpl': '{}/{}/{}.%(ext)s'.format(args['download_dir'], course_title, file_name), 'restrictfilenames': True} with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([video['url']]) def main(): args = parse_args() driver = None if shutil.which('chromedriver'): print("Launching Chrome...") driver = launch_browser('chrome') elif shutil.which('geckodriver'): print("Launching Firefox...") driver = launch_browser('firefox') else: print("Error: No browser driver found.") exit(2) print("Logging in...") la_login(driver, args) print("Loading course...") course_title = load_course(driver, args) print("Course title: {}".format(course_title)) print("Fetching video list...") video_list = fetch_video_list(driver) print("Found {} video{}.".format(len(video_list), "s" if len(video_list) > 1 else "")) for video in video_list: download_video(driver, args, course_title, video) os.remove(args['cookies_file']) print("Done.") if __name__ == "__main__": main()