From d4055ac0c1a098d3e809a24d02cc9522f957d37f Mon Sep 17 00:00:00 2001 From: Ludwig Lehnert Date: Thu, 11 Jul 2024 21:11:43 +0200 Subject: [PATCH] initial commit --- .gitignore | 3 + README.md | 14 +++++ dl | 21 +++++++ lib.py | 154 +++++++++++++++++++++++++++++++++++++++++++++++ pyvenv.cfg | 5 ++ requirements.txt | 2 + 6 files changed, 199 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 dl create mode 100644 lib.py create mode 100644 pyvenv.cfg create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..54d2f43 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +venv/ +**/*.mp4 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0c63c26 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# FAU-TV Video Downloader + +Downloads all videos of a given course (even if the download has been restricted) to a local folder. +This software is provided without warranty. Usage is discouraged! Usage of this software is at your own risk! + +## Usage + +```bash +./dl '' '[output directory]' '[starter url]' +``` + +# Procedure + +Upon starting, a firefox instance is opened, prompting you to log in using IDM SSO. As soon as the url changes to something starting with `https://www.fau.tv`, download will be starting. If you do not want to provide your credentials, you can provide `https://www.fau.tv` as a starter url. diff --git a/dl b/dl new file mode 100755 index 0000000..3b41c08 --- /dev/null +++ b/dl @@ -0,0 +1,21 @@ +from .lib import * +import sys +import os + + +def main(): + course_id = sys.argv[1] + out_dir = sys.argv[2] if len(sys.argv) >= 3 else './out' + auth_url = sys.argv[3] if len(sys.argv) >= 4 else None + + load_token(auth_url) if auth_url is not None else load_token() + + os.makedirs(out_dir, exist_ok=True) + + for index, clip_id in enumerate(get_course_clip_ids(course_id)): + print(f'downloading clip {clip_id}') + download_clip(clip_id, f'{out_dir}/{index+1: 04d}_{clip_id}.mp4') + + +if __name__ == '__main__': + main() diff --git a/lib.py b/lib.py new file mode 100644 index 0000000..646f3f1 --- /dev/null +++ b/lib.py @@ -0,0 +1,154 @@ +from selenium import webdriver +import time +from dataclasses import dataclass +import requests +import re +import subprocess +import shutil + + +_token: "Token" = None + + +@dataclass +class Token(): + auth_token: str + session_id: str + session_ci: str + + def cookies(self): + return { + "SimpleSAMLAuthToken": self.auth_token, + "SimpleSAMLSessionID": self.session_id, + "session_ci": self.session_ci, + } + + +@dataclass +class ClipDetails(): + combined_media_id: str = None + combined_playlist_url: str = None + camera_media_id: str = None + camera_playlist_url: str = None + slides_media_id: str = None + slides_playlist_url: str = None + + +def load_token(auth_url: str = "https://www.fau.tv/auth/sso"): + global _token + + driver = webdriver.Firefox() + + driver.get(auth_url) + + while not driver.current_url.startswith('https://www.fau.tv/'): + time.sleep(0.5) + + def get_value(cookie): + if cookie is None: + return None + return cookie.get("value") + + _token = Token( + auth_token=get_value(driver.get_cookie("SimpleSAMLAuthToken")), + session_id=get_value(driver.get_cookie("SimpleSAMLSessionID")), + session_ci=get_value(driver.get_cookie("session_ci")), + ) + + print(_token) + + driver.close() + + +def get_course_clip_ids(course_id: str) -> list[str]: + global _token + + regex = re.compile(r'(/clip/id/)([0-9]+)(\"\s*class=\"preview\")') + + urls = [] + + url = f'https://www.fau.tv/course/id/{course_id}' + with requests.get(url, cookies=_token.cookies()) as r: + clip_matches = regex.findall(r.text) + for match in clip_matches: + urls.append(match[1]) + + return urls + + +def get_clip_details(clip_id: str) -> ClipDetails: + global _token + + url = f'https://www.fau.tv/clip/id/{clip_id}' + details = ClipDetails() + + with requests.get(url, cookies=_token.cookies()) as r: + def get_details(keyword: str): + mediaid_re = re.compile( + r'(' + keyword + r'Sources[^,]*,\s+mediaid\:\s+\")([0-9]+)' + ) + + playlist_url_re = re.compile( + r'(file\:\s+\")([^\"]*' + keyword + r'\.smil[^\"]*)(\")' + ) + + mediaid_matches = mediaid_re.findall(r.text) + if len(mediaid_matches) > 0: + setattr(details, keyword + "_media_id", mediaid_matches[0][1]) + + playlist_url_matches = playlist_url_re.findall(r.text) + if len(playlist_url_matches) > 0: + setattr(details, keyword + "_playlist_url", + playlist_url_matches[0][1]) + + get_details("combined") + get_details("camera") + get_details("slides") + + return details + + +def download_media(media_id: str, outfile_path: str): + global _token + + url = f'https://itunes.video.uni-erlangen.de/get/file/' + \ + str(media_id) + '?download=1' + + with requests.get(url, stream=True, cookies=_token.cookies()) as r: + if (r.status_code != 200): + return False + + with open(outfile_path, 'wb') as f: + shutil.copyfileobj(r.raw, f) + + return True + + +def download_playlist(playlist_url: str, outfile_path: str): + subprocess.run([ + 'ffmpeg', + '-i', playlist_url, + '-c', 'copy', + outfile_path, + ]) + + +def download_clip(clip_id: str, outfile_path: str): + details = get_clip_details(clip_id) + + media_id = next(id for id in [ + details.combined_media_id, + details.camera_media_id, + details.slides_media_id, + ] if id is not None) + + if download_media(media_id, outfile_path): + return + + url = next(url for url in [ + details.combined_playlist_url, + details.camera_playlist_url, + details.slides_playlist_url, + ] if url is not None) + + download_playlist(url, outfile_path) diff --git a/pyvenv.cfg b/pyvenv.cfg new file mode 100644 index 0000000..ec986ab --- /dev/null +++ b/pyvenv.cfg @@ -0,0 +1,5 @@ +home = /usr/bin +include-system-site-packages = false +version = 3.12.4 +executable = /usr/bin/python3.12 +command = /usr/bin/python -m venv /home/ludwig/git/fau-tv-dl diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fdd089e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +selenium +requests \ No newline at end of file