Initial commit
This commit is contained in:
commit
2f6d487676
2 changed files with 114 additions and 0 deletions
29
README.md
Normal file
29
README.md
Normal file
|
@ -0,0 +1,29 @@
|
|||
# pep5-dl
|
||||
An unofficial script to download the slides of the 2023 PEP5 lecture.
|
||||
|
||||
## Disclaimer
|
||||
This script uses your login credentials.
|
||||
If the script is run multiple times without terminating properly (before logout procedure) the Übungsgruppensystem might block you for some hours.
|
||||
Use at your own risk.
|
||||
|
||||
## Installation
|
||||
Requires `playwright` (with `chromium` driver) and `PyPDF4`:
|
||||
```sh
|
||||
pip install playwright PyPDF4
|
||||
playwright install chromium
|
||||
```
|
||||
Then you just need to download the [`pep5-dl`](https://git.haagfank.de/LnLcFlx/pep5-dl/raw/branch/master/pep5-dl) file from this repository.
|
||||
|
||||
### (Without `PyPDF4`)
|
||||
If you are on Linux and have `pdfunite` installed, you can alternatively supply `--merger=pdfunite` and do not need `PyPDF4`.
|
||||
|
||||
Alternatively you can supply `--merger=none --keep --tmpdir=<DIR>` and simply download the individual PDFs to `<DIR>`.
|
||||
|
||||
## Usage
|
||||
If you simply want to download all current slides into one file `slides.pdf` in the current directory, run
|
||||
```sh
|
||||
python pep5-dl '<USERNAME>' '<PASSWORD>'
|
||||
```
|
||||
where your have to replace `<USERNAME>` and `<PASSWORD>` with your credentials.
|
||||
|
||||
For help and more options run `pep5-dl --help`.
|
85
pep5-dl
Executable file
85
pep5-dl
Executable file
|
@ -0,0 +1,85 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import glob
|
||||
import tempfile
|
||||
import argparse
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
BASE = 'https://uebungen.physik.uni-heidelberg.de'
|
||||
LOGIN = BASE+'/uebungen/login.php'
|
||||
MATERIAL = BASE+'/c/image/d/vorlesung/20232/1735/material/'
|
||||
|
||||
DEFAULT = os.path.join(os.getcwd(), 'slides.pdf')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('user', type=str, help='Username')
|
||||
parser.add_argument('pwd', type=str, help='Password')
|
||||
parser.add_argument('-o', '--out', type=str, default=DEFAULT, help='Output path')
|
||||
parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present in TMPDIR')
|
||||
parser.add_argument('-k', '--keep', action='store_true', help='Keep temporary files in TMPDIR')
|
||||
parser.add_argument('-t', '--tmpdir', type=str, default=tempfile.gettempdir(), help='Temporary directory')
|
||||
parser.add_argument('-m', '--merger', type=str, default='pypdf', choices=['pypdf', 'pdfunite', 'none'], help='Method used for merging PDFs')
|
||||
args = parser.parse_args()
|
||||
if args.user is None or args.pwd is None:
|
||||
parser.error('You need to supply a username and password')
|
||||
exit(1)
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch()
|
||||
page = browser.new_page()
|
||||
page.goto(LOGIN)
|
||||
page.locator("input[name='username']").fill(args.user)
|
||||
page.locator("input[name='submit']").click()
|
||||
page.locator("input[name='loginpass']").fill(args.pwd)
|
||||
try:
|
||||
page.locator("input[name='submit']").click()
|
||||
if page.url == LOGIN:
|
||||
print('could not login')
|
||||
else:
|
||||
i = 1
|
||||
while True:
|
||||
name = 'PEP5_{:02d}.pdf'.format(i)
|
||||
if not os.path.isfile(name) or args.all:
|
||||
pdf = page.context.request.get(MATERIAL+name)
|
||||
if pdf.headers['content-type'] == 'application/pdf':
|
||||
print('Downloading {}...'.format(name))
|
||||
with open(os.path.join(args.tmpdir, name), 'wb') as f:
|
||||
f.write(pdf.body())
|
||||
else:
|
||||
break
|
||||
else:
|
||||
print('{} already downloaded'.format(name))
|
||||
i += 1
|
||||
except Exception as e:
|
||||
page.goto(BASE+'/uebungen/logout.php')
|
||||
print(e)
|
||||
finally:
|
||||
page.goto(BASE+'/uebungen/logout.php')
|
||||
browser.close()
|
||||
|
||||
glexpr = os.path.join(args.tmpdir, 'PEP5_*')
|
||||
gl = glob.glob(glexpr)
|
||||
|
||||
if args.out is not None and args.out != '':
|
||||
if args.merger == 'pypdf':
|
||||
from PyPDF4 import PdfMerger
|
||||
merger = PdfMerger()
|
||||
for pdf in gl:
|
||||
merger.append(pdf)
|
||||
merger.write(args.out)
|
||||
merger.close()
|
||||
elif args.merger == 'pdfunite':
|
||||
from subprocess import Popen
|
||||
p = Popen('pdfunite {} {}'.format(glexpr, args.out), shell=True)
|
||||
p.communicate()
|
||||
|
||||
if not args.keep and args.merger != 'none':
|
||||
for f in gl:
|
||||
os.remove(f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Reference in a new issue