compbau-downloader/download.py
Ludwig Lehnert d3a17f6651 finished
2025-02-15 19:47:25 +01:00

127 lines
3.3 KiB
Python

import os
import shutil
import tempfile
import subprocess
from time import sleep
from argparse import ArgumentParser
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
STUDON_LOGIN_URL = 'https://www.studon.fau.de/studon/saml.php'
def parse_args():
parser = ArgumentParser()
parser.add_argument('url', help='Interactive StudOn screencast url')
parser.add_argument('outfile', help='Destination file name without extension')
return parser.parse_args()
def find_video_urls(url):
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
try:
driver.get(STUDON_LOGIN_URL)
while not driver.current_url.startswith('https://www.studon.fau.de'):
driver.implicitly_wait(1)
driver.get(url)
wait.until(EC.presence_of_element_located((By.TAG_NAME, 'iframe')))
iframe = driver.find_element(By.TAG_NAME, 'iframe')
driver.switch_to.frame(iframe)
video_selector = (
By.CSS_SELECTOR,
'.h5p-current video'
)
prev_button_selector = (
By.CSS_SELECTOR,
'.h5p-footer-previous-slide[role="button"]'
)
next_button_selector = (
By.CSS_SELECTOR,
'.h5p-footer-next-slide[role="button"]'
)
wait.until(EC.presence_of_element_located(prev_button_selector))
for i in range(100):
prev_button = driver.find_element(*prev_button_selector)
if prev_button.get_attribute('aria-disabled') == 'true':
break
prev_button.click()
sleep(0.1)
video_urls = []
for i in range(100):
wait.until(EC.presence_of_element_located(video_selector))
video = driver.find_element(*video_selector)
video_urls.append(video.get_attribute('src'))
next_button = driver.find_element(*next_button_selector)
if next_button.get_attribute('aria-disabled') == 'true':
print('breaking')
break
next_button.click()
sleep(0.25)
return video_urls
finally:
driver.quit()
def download_and_merge_videos(urls, outfile):
temp_dir = tempfile.mkdtemp()
filenames = []
for idx, url in enumerate(urls):
ext = url.split('.')[-1]
filename = os.path.join(temp_dir, f'video_{idx}.{ext}')
filenames.append(filename)
status = subprocess.call(['curl', '-o', filename, url])
if status != 0:
return False
list_content = ''
for filename in filenames:
list_content += f"file '{filename}'\n"
list_path = os.path.join(temp_dir, 'list.txt')
with open(list_path, 'w') as f:
f.write(list_content)
ext = urls[0].split('.')[-1]
status = subprocess.call(['ffmpeg', '-f', 'concat', '-safe', '0',
'-i', list_path, '-c', 'copy', outfile + '.' + ext])
shutil.rmtree(temp_dir, ignore_errors=True)
return status == 0
def main():
args = parse_args()
urls = find_video_urls(args.url)
if not urls or len(urls) <= 0:
exit(1)
if not download_and_merge_videos(urls, args.outfile):
exit(1)
main()