diff --git a/download b/download index 6ade6fb..d743784 100755 --- a/download +++ b/download @@ -1 +1,29 @@ -#!/bin/env python3 +#!/bin/env fish + +if ! test -d venv + /bin/env python3 -m venv venv + . venv/bin/activate.fish + /bin/env python3 -m pip install -r requirements.txt + echo +else + . venv/bin/activate.fish +end + +if ! type -q curl + echo "curl is required to run this script" + exit +end + +if ! type -q chromedriver || ! type -q chromium-browser + echo "both chromium-browser and chromedriver are required to run this script" + exit +end + +if ! type -q ffmpeg + echo "ffmpeg is required to run this script" + exit +end + +if ! /bin/env python3 download.py $argv + echo "download failed" +end diff --git a/download.py b/download.py new file mode 100644 index 0000000..3d3bb74 --- /dev/null +++ b/download.py @@ -0,0 +1,126 @@ +import os +import shutil +import tempfile +import subprocess +from time import sleep +from argparse import ArgumentParser + +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + + +STUDON_LOGIN_URL = 'https://www.studon.fau.de/studon/saml.php' + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('url', help='Interactive StudOn screencast url') + parser.add_argument('outfile', help='Destination file name without extension') + return parser.parse_args() + + +def find_video_urls(url): + driver = webdriver.Chrome() + wait = WebDriverWait(driver, 10) + + try: + driver.get(STUDON_LOGIN_URL) + + while not driver.current_url.startswith('https://www.studon.fau.de'): + driver.implicitly_wait(1) + + driver.get(url) + + wait.until(EC.presence_of_element_located((By.TAG_NAME, 'iframe'))) + iframe = driver.find_element(By.TAG_NAME, 'iframe') + driver.switch_to.frame(iframe) + + video_selector = ( + By.CSS_SELECTOR, + '.h5p-current video' + ) + + prev_button_selector = ( + By.CSS_SELECTOR, + '.h5p-footer-previous-slide[role="button"]' + ) + + next_button_selector = ( + By.CSS_SELECTOR, + '.h5p-footer-next-slide[role="button"]' + ) + + wait.until(EC.presence_of_element_located(prev_button_selector)) + for i in range(100): + prev_button = driver.find_element(*prev_button_selector) + if prev_button.get_attribute('aria-disabled') == 'true': + break + + prev_button.click() + sleep(0.1) + + video_urls = [] + for i in range(100): + wait.until(EC.presence_of_element_located(video_selector)) + video = driver.find_element(*video_selector) + video_urls.append(video.get_attribute('src')) + + next_button = driver.find_element(*next_button_selector) + if next_button.get_attribute('aria-disabled') == 'true': + print('breaking') + break + + next_button.click() + sleep(0.25) + + return video_urls + + finally: + driver.quit() + + +def download_and_merge_videos(urls, outfile): + temp_dir = tempfile.mkdtemp() + + filenames = [] + + for idx, url in enumerate(urls): + ext = url.split('.')[-1] + filename = os.path.join(temp_dir, f'video_{idx}.{ext}') + filenames.append(filename) + + status = subprocess.call(['curl', '-o', filename, url]) + if status != 0: + return False + + list_content = '' + for filename in filenames: + list_content += f"file '{filename}'\n" + + list_path = os.path.join(temp_dir, 'list.txt') + with open(list_path, 'w') as f: + f.write(list_content) + + ext = urls[0].split('.')[-1] + status = subprocess.call(['ffmpeg', '-f', 'concat', '-safe', '0', + '-i', list_path, '-c', 'copy', outfile + '.' + ext]) + + shutil.rmtree(temp_dir, ignore_errors=True) + + return status == 0 + + +def main(): + args = parse_args() + + urls = find_video_urls(args.url) + if not urls or len(urls) <= 0: + exit(1) + + if not download_and_merge_videos(urls, args.outfile): + exit(1) + + +main() diff --git a/setup.fish b/setup.fish deleted file mode 100644 index d09b492..0000000 --- a/setup.fish +++ /dev/null @@ -1,7 +0,0 @@ -/bin/env python3 -m venv venv -. venv/bin/activate.fish -python3 -m pip install -r requirements.txt - -echo -echo "Usage: ./download " -