You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

221 lines
7.0 KiB
Python

#!/usr/bin/env python3
import os
import sys
import time
import getopt
import shutil
import youtube_dl
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from os.path import expanduser
# if running as root...kick out
if os.geteuid()==0:
sys.exit("\nPlease run this script as unprivileged user only.\n")
home = expanduser("~")
def usage(error=0, msg=None):
if msg:
print(msg)
print("Usage:")
print("\t{} [-u|--username] [-p|--password] [-d|--download-dir] [-c|--cookies-file] COURSE_URL\n"
.format(sys.argv[0]))
print("\tOptions may be replaced with environment variables. Command line options take precedence.")
print("\t\tLADL_USERNAME, LADL_PASSWORD, LADL_DIR, LADL_COOKIES\n")
print("Examples:")
print("\t$ ./linuxacademy-dl.py -u person@exmple.com -p my_p@ssw0rd https://linuxacademy.com/cp/modules/view/id/388\n")
print("\t$ export LADL_USERNAME=person@example.com")
print("\t$ export LADL_PASSWORD=my_p@ssw0rd")
print('\t$ export LADL_DIR=' + str(home) + '/linux-academy-videos')
print("\t$ ./linuxacademy-dl.py https://linuxacademy.com/cp/modules/view/id/388")
exit(error)
def parse_args():
opts = args = []
username = password = download_dir = cookies_file = None
ignore_missing_title = False
try:
opts, args = getopt.getopt(sys.argv[1:], 'hu:p:d:c:',
['help', 'username=', 'password=', 'download-dir=', 'cookies-file=',
'ignore-missing-title'])
except getopt.GetoptError as err:
print(str(err))
usage(1)
if len(args) < 1:
usage(1, "Missing course URL.")
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
elif opt in ('-u', '--username'):
username = arg
elif opt in ('-p', '--password'):
password = arg
elif opt in ('-d', '--download-dir'):
download_dir = arg
elif opt in ('-c', '--cookies-file'):
cookies_file = arg
elif opt == '--ignore-missing-title':
ignore_missing_title = True
if username is None:
username = os.environ.get('LADL_USERNAME') or usage(1, "Missing username.")
if password is None:
password = os.environ.get('LADL_PASSWORD') or usage(1, "Missing password.")
if download_dir is None:
download_dir = os.environ.get('LADL_DIR', "{}/download".format(os.path.dirname(os.path.realpath(__file__))))
if cookies_file is None:
cookies_file = os.environ.get('LADL_COOKIES', "{}/cookies.txt".format(os.path.dirname(os.path.realpath(__file__))))
course_url = args[0]
return {
'username': username,
'password': password,
'download_dir': download_dir,
'cookies_file': cookies_file,
'course_url': course_url,
'ignore_missing_title': ignore_missing_title
}
def launch_browser(name):
if name == 'chrome':
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)
else:
options = webdriver.FirefoxOptions()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Firefox(options=options)
driver.get("https://linuxacademy.com/cp/ssologin")
return driver
def la_login(driver, args):
#link = driver.find_element_by_partial_link_text('Log In')
#link.click()
time.sleep(5)
user = driver.find_element_by_name('username')
user.send_keys(args['username'])
password = driver.find_element_by_name('password')
password.send_keys(args['password'])
password.send_keys(Keys.RETURN)
time.sleep(15)
try:
driver.find_element_by_id('navigationUsername')
print("Login success.")
except NoSuchElementException:
print("Login failed. Exiting.")
exit(1)
def load_course(driver, args):
course_title = None
driver.get(args['course_url'])
time.sleep(5)
try:
course_title = driver.find_elements_by_class_name('course-title')[0].text
except IndexError:
if args['ignore_missing_title']:
course_title = "COURSE-ID-{}".format(args['course_url'].split('/')[-1])
else:
print("Error: Could not find course title. Try running with --ignore-missing-title.")
exit(3)
return course_title
def fetch_video_list(driver):
lessons = driver.find_elements_by_tag_name('a')
video_list = []
counter = 0
for lesson in lessons:
try:
url = lesson.get_attribute('href')
if '/course/' in url:
counter += 1
video_list.append(
{'url': url, 'counter': "{:03d}".format(counter), 'title': lesson.text.split('\n')[0]})
except TypeError:
pass
return video_list
def write_cookies(driver, file):
cookies = open(file, 'a+')
cookies.write("# Netscape HTTP Cookie File\n\n")
for c in driver.get_cookies():
expiry = c.get('expiry') if c.get('expiry') else 0
any_domain_flag = str(c['domain'].startswith('.')).upper()
cookie = "{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
c['domain'], any_domain_flag, c['path'], str(c['secure']).upper(), expiry, c['name'], c['value'])
cookies.write(cookie)
cookies.close()
def download_video(driver, args, course_title, video):
driver.get(video['url'])
time.sleep(5)
write_cookies(driver, args['cookies_file'])
file_name = "{} - {}".format(video['counter'], video['title'].replace('/', '_'))
print("Downloading: {}...".format(file_name))
ydl_opts = {'cookiefile': args['cookies_file'], 'force_generic_extractor': True, 'quiet': True, 'no_warnings': True,
'outtmpl': '{}/{}/{}.%(ext)s'.format(args['download_dir'], course_title, file_name),
'restrictfilenames': True}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([video['url']])
def main():
args = parse_args()
driver = None
if shutil.which('chromedriver'):
print("Launching Chrome...")
driver = launch_browser('chrome')
elif shutil.which('geckodriver'):
print("Launching Firefox...")
driver = launch_browser('firefox')
else:
print("Error: No browser driver found.")
exit(2)
print("Logging in...")
la_login(driver, args)
print("Loading course...")
course_title = load_course(driver, args)
print("Course title: {}".format(course_title))
print("Fetching video list...")
video_list = fetch_video_list(driver)
print("Found {} video{}.".format(len(video_list), "s" if len(video_list) > 1 else ""))
for video in video_list:
download_video(driver, args, course_title, video)
os.remove(args['cookies_file'])
print("Done.")
if __name__ == "__main__":
main()