Initial commit

This commit is contained in:
Michael Reber 2019-11-21 12:44:04 +01:00
commit d088d935f0
2 changed files with 283 additions and 0 deletions

68
README.md Normal file
View File

@ -0,0 +1,68 @@
# Downloader for Linux Academy
This script will download courses from [Linux Academy](https://linuxacademy.com) for offline consumption.
## Important Notice
**Use of this script is for personal consumption of content only.** The content this script downloads is protected by copyright and must not be shared.
#### Good uses
* Downloading a lesson before embarking to a destination with little or no internet access.
* Keep lessons you've completed for a personal backup.
#### Bad uses
* Uploading the downloaded videos to a content sharing site.
* Sending copies of the videos to your friends and family.
* Hoarding lessons for future consumption beyond your subscription period.
Please exercise good judgement when using this script. The folks at Linux Academy work hard to make quality courses and you should support them by paying for a subscription if you can. You may also wish to speak with your employer to find out if they would be willing to pay for your subscription.
## Installation
Tested on a fresh install of Ubuntu 18.04 desktop. Your mileage may vary depending on your OS.
sudo apt update
sudo apt install python3 python3-pip git unzip ffmpeg
sudo pip3 install selenium youtube-dl
### Browser
You will need Chrome or Firefox and its matching driver.
#### Chrome
Install [Google Chrome](https://www.google.com/chrome/) and download the appropriate [ChromeDriver](https://chromedriver.chromium.org/downloads) version. Make sure the `chromedriver` executable is in your PATH (e.g., `/usr/local/bin`).
sudo dpkg -i google-chrome-stable_current_amd64.deb
unzip chromedriver_linux64.zip
sudo mv chromedriver /usr/local/bin
#### Firefox
Install Mozilla Firefox and download [geckodriver](https://github.com/mozilla/geckodriver/releases).
sudo apt install firefox
tar xzf geckodriver-*-linux64.tar.gz
sudo mv geckodriver /usr/local/bin
## Usage
This will only work with an active Linux Academy subscription. If you do not have one, please get one [here](https://linuxacademy.com/pricing/).
Usage:
./linuxacademy-dl.py [-u|--username] [-p|--password] [-d|--download-dir] [-c|--cookies-file] COURSE_URL
Options may be replaced with environment variables. Command line options take precedence.
LADL_USERNAME, LADL_PASSWORD, LADL_DIR, LADL_COOKIES
Examples:
$ ./linuxacademy-dl.py -u person@exmple.com -p p@ssw0rd https://linuxacademy.com/cp/modules/view/id/346
$ export LADL_USERNAME=person@example.com
$ export LADL_PASSWORD=p@ssw0rd
$ export LADL_DIR=/home/jdoe/linux-academy
$ ./linuxacademy-dl.py https://linuxacademy.com/cp/modules/view/id/346
The username/email and password fields are required. The cookie file will opt to `$PWD/cookies.txt` and the download directory will default to `$SCRIPT_DIR/download`, where `$SCRIPT_DIR` is the path of the `linuxacademy-dl.py` script.

215
linuxacademy-dl.py Normal file
View File

@ -0,0 +1,215 @@
#!/usr/bin/env python3
import os
import sys
import time
import getopt
import shutil
import youtube_dl
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
def usage(error=0, msg=None):
if msg:
print(msg)
print("Usage:")
print("\t{} [-u|--username] [-p|--password] [-d|--download-dir] [-c|--cookies-file] COURSE_URL\n"
.format(sys.argv[0]))
print("\tOptions may be replaced with environment variables. Command line options take precedence.")
print("\t\tLADL_USERNAME, LADL_PASSWORD, LADL_DIR, LADL_COOKIES\n")
print("Examples:")
print("\t$ ./linuxacademy-dl.py -u person@exmple.com -p p@ssw0rd https://linuxacademy.com/cp/modules/view/id/346\n")
print("\t$ export LADL_USERNAME=person@example.com")
print("\t$ export LADL_PASSWORD=p@ssw0rd")
print("\t$ export LADL_DIR=/home/jdoe/linux-academy")
print("\t$ ./linuxacademy-dl.py https://linuxacademy.com/cp/modules/view/id/346")
exit(error)
def parse_args():
opts = args = []
username = password = download_dir = cookies_file = None
ignore_missing_title = False
try:
opts, args = getopt.getopt(sys.argv[1:], 'hu:p:d:c:',
['help', 'username=', 'password=', 'download-dir=', 'cookies-file=',
'ignore-missing-title'])
except getopt.GetoptError as err:
print(str(err))
usage(1)
if len(args) < 1:
usage(1, "Missing course URL.")
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
elif opt in ('-u', '--username'):
username = arg
elif opt in ('-p', '--password'):
password = arg
elif opt in ('-d', '--download-dir'):
download_dir = arg
elif opt in ('-c', '--cookies-file'):
cookies_file = arg
elif opt == '--ignore-missing-title':
ignore_missing_title = True
if username is None:
username = os.environ.get('LADL_USERNAME') or usage(1, "Missing username.")
if password is None:
password = os.environ.get('LADL_PASSWORD') or usage(1, "Missing password.")
if download_dir is None:
download_dir = os.environ.get('LADL_DIR', "{}/download".format(os.path.dirname(os.path.realpath(__file__))))
if cookies_file is None:
cookies_file = os.environ.get('LADL_COOKIES', "{}/cookies.txt".format(os.path.dirname(os.path.realpath(__file__))))
course_url = args[0]
return {
'username': username,
'password': password,
'download_dir': download_dir,
'cookies_file': cookies_file,
'course_url': course_url,
'ignore_missing_title': ignore_missing_title
}
def launch_browser(name):
if name == 'chrome':
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)
else:
options = webdriver.FirefoxOptions()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Firefox(options=options)
driver.get("https://linuxacademy.com/")
return driver
def la_login(driver, args):
link = driver.find_element_by_partial_link_text('Log In')
link.click()
time.sleep(5)
user = driver.find_element_by_name('username')
user.send_keys(args['username'])
password = driver.find_element_by_name('password')
password.send_keys(args['password'])
password.send_keys(Keys.RETURN)
time.sleep(10)
try:
driver.find_element_by_id('navigationUsername')
print("Login success.")
except NoSuchElementException:
print("Login failed. Exiting.")
exit(1)
def load_course(driver, args):
course_title = None
driver.get(args['course_url'])
time.sleep(5)
try:
course_title = driver.find_elements_by_class_name('course-title')[0].text
except IndexError:
if args['ignore_missing_title']:
course_title = "COURSE-ID-{}".format(args['course_url'].split('/')[-1])
else:
print("Error: Could not find course title. Try running with --ignore-missing-title.")
exit(3)
return course_title
def fetch_video_list(driver):
lessons = driver.find_elements_by_tag_name('a')
video_list = []
counter = 0
for lesson in lessons:
try:
url = lesson.get_attribute('href')
if '/course/' in url:
counter += 1
video_list.append(
{'url': url, 'counter': "{:03d}".format(counter), 'title': lesson.text.split('\n')[0]})
except TypeError:
pass
return video_list
def write_cookies(driver, file):
cookies = open(file, 'a+')
cookies.write("# Netscape HTTP Cookie File\n\n")
for c in driver.get_cookies():
expiry = c.get('expiry') if c.get('expiry') else 0
any_domain_flag = str(c['domain'].startswith('.')).upper()
cookie = "{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
c['domain'], any_domain_flag, c['path'], str(c['secure']).upper(), expiry, c['name'], c['value'])
cookies.write(cookie)
cookies.close()
def download_video(driver, args, course_title, video):
driver.get(video['url'])
time.sleep(5)
write_cookies(driver, args['cookies_file'])
file_name = "{} - {}".format(video['counter'], video['title'].replace('/', '_'))
print("Downloading: {}...".format(file_name))
ydl_opts = {'cookiefile': args['cookies_file'], 'force_generic_extractor': True, 'quiet': True, 'no_warnings': True,
'outtmpl': '{}/{}/{}.%(ext)s'.format(args['download_dir'], course_title, file_name),
'restrictfilenames': True}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([video['url']])
def main():
args = parse_args()
driver = None
if shutil.which('chromedriver'):
print("Launching Chrome...")
driver = launch_browser('chrome')
elif shutil.which('geckodriver'):
print("Launching Firefox...")
driver = launch_browser('firefox')
else:
print("Error: No browser driver found.")
exit(2)
print("Logging in...")
la_login(driver, args)
print("Loading course...")
course_title = load_course(driver, args)
print("Course title: {}".format(course_title))
print("Fetching video list...")
video_list = fetch_video_list(driver)
print("Found {} video{}.".format(len(video_list), "s" if len(video_list) > 1 else ""))
for video in video_list:
download_video(driver, args, course_title, video)
os.remove(args['cookies_file'])
print("Done.")
if __name__ == "__main__":
main()