Initial commit

This commit is contained in:
Leon Haag-Fank 2023-12-01 14:42:14 +01:00
commit 2f6d487676
2 changed files with 114 additions and 0 deletions

29
README.md Normal file
View file

@ -0,0 +1,29 @@
# pep5-dl
An unofficial script to download the slides of the 2023 PEP5 lecture.
## Disclaimer
This script uses your login credentials.
If the script is run multiple times without terminating properly (before logout procedure) the Übungsgruppensystem might block you for some hours.
Use at your own risk.
## Installation
Requires `playwright` (with `chromium` driver) and `PyPDF4`:
```sh
pip install playwright PyPDF4
playwright install chromium
```
Then you just need to download the [`pep5-dl`](https://git.haagfank.de/LnLcFlx/pep5-dl/raw/branch/master/pep5-dl) file from this repository.
### (Without `PyPDF4`)
If you are on Linux and have `pdfunite` installed, you can alternatively supply `--merger=pdfunite` and do not need `PyPDF4`.
Alternatively you can supply `--merger=none --keep --tmpdir=<DIR>` and simply download the individual PDFs to `<DIR>`.
## Usage
If you simply want to download all current slides into one file `slides.pdf` in the current directory, run
```sh
python pep5-dl '<USERNAME>' '<PASSWORD>'
```
where your have to replace `<USERNAME>` and `<PASSWORD>` with your credentials.
For help and more options run `pep5-dl --help`.

85
pep5-dl Executable file
View file

@ -0,0 +1,85 @@
#!/usr/bin/env python
import os
import glob
import tempfile
import argparse
from playwright.sync_api import sync_playwright
BASE = 'https://uebungen.physik.uni-heidelberg.de'
LOGIN = BASE+'/uebungen/login.php'
MATERIAL = BASE+'/c/image/d/vorlesung/20232/1735/material/'
DEFAULT = os.path.join(os.getcwd(), 'slides.pdf')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('user', type=str, help='Username')
parser.add_argument('pwd', type=str, help='Password')
parser.add_argument('-o', '--out', type=str, default=DEFAULT, help='Output path')
parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present in TMPDIR')
parser.add_argument('-k', '--keep', action='store_true', help='Keep temporary files in TMPDIR')
parser.add_argument('-t', '--tmpdir', type=str, default=tempfile.gettempdir(), help='Temporary directory')
parser.add_argument('-m', '--merger', type=str, default='pypdf', choices=['pypdf', 'pdfunite', 'none'], help='Method used for merging PDFs')
args = parser.parse_args()
if args.user is None or args.pwd is None:
parser.error('You need to supply a username and password')
exit(1)
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto(LOGIN)
page.locator("input[name='username']").fill(args.user)
page.locator("input[name='submit']").click()
page.locator("input[name='loginpass']").fill(args.pwd)
try:
page.locator("input[name='submit']").click()
if page.url == LOGIN:
print('could not login')
else:
i = 1
while True:
name = 'PEP5_{:02d}.pdf'.format(i)
if not os.path.isfile(name) or args.all:
pdf = page.context.request.get(MATERIAL+name)
if pdf.headers['content-type'] == 'application/pdf':
print('Downloading {}...'.format(name))
with open(os.path.join(args.tmpdir, name), 'wb') as f:
f.write(pdf.body())
else:
break
else:
print('{} already downloaded'.format(name))
i += 1
except Exception as e:
page.goto(BASE+'/uebungen/logout.php')
print(e)
finally:
page.goto(BASE+'/uebungen/logout.php')
browser.close()
glexpr = os.path.join(args.tmpdir, 'PEP5_*')
gl = glob.glob(glexpr)
if args.out is not None and args.out != '':
if args.merger == 'pypdf':
from PyPDF4 import PdfMerger
merger = PdfMerger()
for pdf in gl:
merger.append(pdf)
merger.write(args.out)
merger.close()
elif args.merger == 'pdfunite':
from subprocess import Popen
p = Popen('pdfunite {} {}'.format(glexpr, args.out), shell=True)
p.communicate()
if not args.keep and args.merger != 'none':
for f in gl:
os.remove(f)
if __name__ == "__main__":
main()