From 2f6d487676e1829085548fca4bb2d837e6ad9148 Mon Sep 17 00:00:00 2001 From: Leon Haag-Fank Date: Fri, 1 Dec 2023 14:42:14 +0100 Subject: [PATCH] Initial commit --- README.md | 29 +++++++++++++++++++ pep5-dl | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 README.md create mode 100755 pep5-dl diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b3d713 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# pep5-dl +An unofficial script to download the slides of the 2023 PEP5 lecture. + +## Disclaimer +This script uses your login credentials. +If the script is run multiple times without terminating properly (before logout procedure) the Übungsgruppensystem might block you for some hours. +Use at your own risk. + +## Installation +Requires `playwright` (with `chromium` driver) and `PyPDF4`: +```sh +pip install playwright PyPDF4 +playwright install chromium +``` +Then you just need to download the [`pep5-dl`](https://git.haagfank.de/LnLcFlx/pep5-dl/raw/branch/master/pep5-dl) file from this repository. + +### (Without `PyPDF4`) +If you are on Linux and have `pdfunite` installed, you can alternatively supply `--merger=pdfunite` and do not need `PyPDF4`. + +Alternatively you can supply `--merger=none --keep --tmpdir=` and simply download the individual PDFs to ``. + +## Usage +If you simply want to download all current slides into one file `slides.pdf` in the current directory, run +```sh +python pep5-dl '' '' +``` +where your have to replace `` and `` with your credentials. + +For help and more options run `pep5-dl --help`. diff --git a/pep5-dl b/pep5-dl new file mode 100755 index 0000000..ec0d233 --- /dev/null +++ b/pep5-dl @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import os +import glob +import tempfile +import argparse +from playwright.sync_api import sync_playwright + +BASE = 'https://uebungen.physik.uni-heidelberg.de' +LOGIN = BASE+'/uebungen/login.php' +MATERIAL = BASE+'/c/image/d/vorlesung/20232/1735/material/' + +DEFAULT = os.path.join(os.getcwd(), 'slides.pdf') + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('user', type=str, help='Username') + parser.add_argument('pwd', type=str, help='Password') + parser.add_argument('-o', '--out', type=str, default=DEFAULT, help='Output path') + parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present in TMPDIR') + parser.add_argument('-k', '--keep', action='store_true', help='Keep temporary files in TMPDIR') + parser.add_argument('-t', '--tmpdir', type=str, default=tempfile.gettempdir(), help='Temporary directory') + parser.add_argument('-m', '--merger', type=str, default='pypdf', choices=['pypdf', 'pdfunite', 'none'], help='Method used for merging PDFs') + args = parser.parse_args() + if args.user is None or args.pwd is None: + parser.error('You need to supply a username and password') + exit(1) + + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + page.goto(LOGIN) + page.locator("input[name='username']").fill(args.user) + page.locator("input[name='submit']").click() + page.locator("input[name='loginpass']").fill(args.pwd) + try: + page.locator("input[name='submit']").click() + if page.url == LOGIN: + print('could not login') + else: + i = 1 + while True: + name = 'PEP5_{:02d}.pdf'.format(i) + if not os.path.isfile(name) or args.all: + pdf = page.context.request.get(MATERIAL+name) + if pdf.headers['content-type'] == 'application/pdf': + print('Downloading {}...'.format(name)) + with open(os.path.join(args.tmpdir, name), 'wb') as f: + f.write(pdf.body()) + else: + break + else: + print('{} already downloaded'.format(name)) + i += 1 + except Exception as e: + page.goto(BASE+'/uebungen/logout.php') + print(e) + finally: + page.goto(BASE+'/uebungen/logout.php') + browser.close() + + glexpr = os.path.join(args.tmpdir, 'PEP5_*') + gl = glob.glob(glexpr) + + if args.out is not None and args.out != '': + if args.merger == 'pypdf': + from PyPDF4 import PdfMerger + merger = PdfMerger() + for pdf in gl: + merger.append(pdf) + merger.write(args.out) + merger.close() + elif args.merger == 'pdfunite': + from subprocess import Popen + p = Popen('pdfunite {} {}'.format(glexpr, args.out), shell=True) + p.communicate() + + if not args.keep and args.merger != 'none': + for f in gl: + os.remove(f) + + +if __name__ == "__main__": + main()