import os import shutil import tempfile import subprocess from time import sleep from argparse import ArgumentParser from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC STUDON_LOGIN_URL = 'https://www.studon.fau.de/studon/saml.php' def parse_args(): parser = ArgumentParser() parser.add_argument('url', help='Interactive StudOn screencast url') parser.add_argument('outfile', help='Destination file name without extension') return parser.parse_args() def find_video_urls(url): driver = webdriver.Chrome() wait = WebDriverWait(driver, 10) try: driver.get(STUDON_LOGIN_URL) while not driver.current_url.startswith('https://www.studon.fau.de'): driver.implicitly_wait(1) driver.get(url) wait.until(EC.presence_of_element_located((By.TAG_NAME, 'iframe'))) iframe = driver.find_element(By.TAG_NAME, 'iframe') driver.switch_to.frame(iframe) video_selector = ( By.CSS_SELECTOR, '.h5p-current video' ) prev_button_selector = ( By.CSS_SELECTOR, '.h5p-footer-previous-slide[role="button"]' ) next_button_selector = ( By.CSS_SELECTOR, '.h5p-footer-next-slide[role="button"]' ) wait.until(EC.presence_of_element_located(prev_button_selector)) for i in range(100): prev_button = driver.find_element(*prev_button_selector) if prev_button.get_attribute('aria-disabled') == 'true': break prev_button.click() sleep(0.1) video_urls = [] for i in range(100): wait.until(EC.presence_of_element_located(video_selector)) video = driver.find_element(*video_selector) video_urls.append(video.get_attribute('src')) next_button = driver.find_element(*next_button_selector) if next_button.get_attribute('aria-disabled') == 'true': print('breaking') break next_button.click() sleep(0.25) return video_urls finally: driver.quit() def download_and_merge_videos(urls, outfile): temp_dir = tempfile.mkdtemp() filenames = [] for idx, url in enumerate(urls): ext = url.split('.')[-1] filename = os.path.join(temp_dir, f'video_{idx}.{ext}') filenames.append(filename) status = subprocess.call(['curl', '-o', filename, url]) if status != 0: return False list_content = '' for filename in filenames: list_content += f"file '{filename}'\n" list_path = os.path.join(temp_dir, 'list.txt') with open(list_path, 'w') as f: f.write(list_content) ext = urls[0].split('.')[-1] status = subprocess.call(['ffmpeg', '-f', 'concat', '-safe', '0', '-i', list_path, '-c', 'copy', outfile + '.' + ext]) shutil.rmtree(temp_dir, ignore_errors=True) return status == 0 def main(): args = parse_args() urls = find_video_urls(args.url) if not urls or len(urls) <= 0: exit(1) if not download_and_merge_videos(urls, args.outfile): exit(1) main()