From 3e25d664745ed0a99d7b691157333478679c4131 Mon Sep 17 00:00:00 2001 From: Leon Haag-Fank Date: Wed, 3 Apr 2024 15:43:39 +0200 Subject: [PATCH] Initial commit --- README.md | 23 +++++++++++++++ zeit-dl | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 README.md create mode 100755 zeit-dl diff --git a/README.md b/README.md new file mode 100644 index 0000000..0f6f0e4 --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# zeit-dl +An unofficial script to download PDF/EPUB/audio files from your Die ZEIT Digitalabo. + +## Disclaimer +This script uses your login credentials. +Use at your own risk. + +## Installation +Requires `playwright` (with `chromium` driver): +```sh +pip install playwright +playwright install chromium +``` +Then you just need to download the [`zeit-dl`](https://git.haagfank.de/LnLcFlx/zeit-dl/raw/branch/master/zeit-dl) file from this repository. + +## Usage +If you simply want to download the current issue of Die ZEIT as pdf run +```sh +python zeit-dl '' '' +``` +where your have to replace `` and `` with your credentials. + +For help and more options run `zeit-dl --help`. diff --git a/zeit-dl b/zeit-dl new file mode 100755 index 0000000..4047d20 --- /dev/null +++ b/zeit-dl @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import os +import re +import argparse +from playwright.sync_api import sync_playwright + +EPAPER = 'https://epaper.zeit.de' +MEINE = 'https://meine.zeit.de' + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('user', type=str, help='Username') + parser.add_argument('pwd', type=str, help='Password') + parser.add_argument('-o', '--out', type=str, default=os.getcwd(), help='Output directory') + parser.add_argument('-a', '--abo', type=str, choices=['diezeit', 'zeitcampus', 'zeit-audio'], default='diezeit', help='Abo') + parser.add_argument('-i', '--issue', type=str, help='Ausgabe (meistens DD.MM.YYYY)') + parser.add_argument('-t', '--type', type=str, choices=['pdf', 'epub', 'mp3'], default='pdf', help='Dateiformat') + parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present') + args = parser.parse_args() + if args.user is None or args.pwd is None: + parser.error('You need to supply a username and password') + + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + page.goto(MEINE+'/anmelden') + page.locator("input[id='login_email']").fill(args.user) + page.locator("input[id='login_pass']").fill(args.pwd) + try: + page.locator("input[type='submit']").click() + if page.url == MEINE+'/anmelden': + print('could not login') + else: + download(page, args) + except Exception as e: + page.goto(MEINE+'/abmelden') + print(e) + finally: + page.goto(MEINE+'/abmelden') + print('finally: logout') + browser.close() + + +def download(page, args): + if args.abo == 'zeit-audio': + print('not yet implemented') + else: + if args.issue is None: + page.goto(EPAPER+'/abo/'+args.abo) + page.locator("div.epaper-highlighted > a.btn").click(force=True) + issue = page.url.split('/')[-1] + else: + page.goto(EPAPER+'/abo/'+args.abo+'/'+args.issue) + issue = args.issue + + filename = '{}-{}.{}'.format(args.abo, issue, args.type) + filepath = os.path.join(args.out, filename) + if os.path.isfile(filepath): + print('Issue already exists:', filepath) + return 1 + + dl_btns = page.locator("div.download-buttons > a.btn").all() + url = None + for btn in dl_btns: + js_obj = btn.get_attribute('data-wt-click') + match = re.search(r"9: ?'([^']*)'", js_obj) + if match.group(1) == args.type: + print(js_obj) + url = EPAPER+btn.get_attribute('href') + continue + if url is None: + print('Could not find appropriate button for', args.type) + return 1 + + file = page.context.request.get(url) + if file.headers['content-type'] != 'text/html': + print('Downloading {}...'.format(filename)) + with open(filepath, 'wb') as f: + f.write(file.body()) + + +if __name__ == "__main__": + main()