Initial commit

2024-04-03 15:43:39 +02:00 · 2024-04-03 15:43:39 +02:00 · 3e25d66474
commit 3e25d66474
2 changed files with 108 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,23 @@
 # zeit-dl
 An unofficial script to download PDF/EPUB/audio files from your Die ZEIT Digitalabo.
 ## Disclaimer
 This script uses your login credentials.
 Use at your own risk.
 ## Installation
 Requires `playwright` (with `chromium` driver):
 ```sh
 pip install playwright
 playwright install chromium
 ```
 Then you just need to download the [`zeit-dl`](https://git.haagfank.de/LnLcFlx/zeit-dl/raw/branch/master/zeit-dl) file from this repository.
 ## Usage
 If you simply want to download the current issue of Die ZEIT as pdf run
 ```sh
 python zeit-dl '<USERNAME>' '<PASSWORD>'
 ```
 where your have to replace `<USERNAME>` and `<PASSWORD>` with your credentials.
 For help and more options run `zeit-dl --help`.
--- a/85
+++ b/85
@ -0,0 +1,85 @@
 #!/usr/bin/env python
 import os
 import re
 import argparse
 from playwright.sync_api import sync_playwright
 EPAPER = 'https://epaper.zeit.de'
 MEINE = 'https://meine.zeit.de'
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('user', type=str, help='Username')
    parser.add_argument('pwd', type=str, help='Password')
    parser.add_argument('-o', '--out', type=str, default=os.getcwd(), help='Output directory')
    parser.add_argument('-a', '--abo', type=str, choices=['diezeit', 'zeitcampus', 'zeit-audio'], default='diezeit', help='Abo')
    parser.add_argument('-i', '--issue', type=str, help='Ausgabe (meistens DD.MM.YYYY)')
    parser.add_argument('-t', '--type', type=str, choices=['pdf', 'epub', 'mp3'], default='pdf', help='Dateiformat')
    parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present')
    args = parser.parse_args()
    if args.user is None or args.pwd is None:
        parser.error('You need to supply a username and password')
    with sync_playwright() as p:
        browser = p.chromium.launch()
        page = browser.new_page()
        page.goto(MEINE+'/anmelden')
        page.locator("input[id='login_email']").fill(args.user)
        page.locator("input[id='login_pass']").fill(args.pwd)
        try:
            page.locator("input[type='submit']").click()
            if page.url == MEINE+'/anmelden':
                print('could not login')
            else:
                download(page, args)
        except Exception as e:
            page.goto(MEINE+'/abmelden')
            print(e)
        finally:
            page.goto(MEINE+'/abmelden')
            print('finally: logout')
        browser.close()
 def download(page, args):
    if args.abo == 'zeit-audio':
        print('not yet implemented')
    else:
        if args.issue is None:
            page.goto(EPAPER+'/abo/'+args.abo)
            page.locator("div.epaper-highlighted > a.btn").click(force=True)
            issue = page.url.split('/')[-1]
        else:
            page.goto(EPAPER+'/abo/'+args.abo+'/'+args.issue)
            issue = args.issue
        filename = '{}-{}.{}'.format(args.abo, issue, args.type)
        filepath = os.path.join(args.out, filename)
        if os.path.isfile(filepath):
            print('Issue already exists:', filepath)
            return 1
        dl_btns = page.locator("div.download-buttons > a.btn").all()
        url = None
        for btn in dl_btns:
            js_obj = btn.get_attribute('data-wt-click')
            match = re.search(r"9: ?'([^']*)'", js_obj)
            if match.group(1) == args.type:
                print(js_obj)
                url = EPAPER+btn.get_attribute('href')
                continue
        if url is None:
            print('Could not find appropriate button for', args.type)
            return 1
        file = page.context.request.get(url)
        if file.headers['content-type'] != 'text/html':
            print('Downloading {}...'.format(filename))
            with open(filepath, 'wb') as f:
                f.write(file.body())
 if __name__ == "__main__":
    main()