Initial commit
This commit is contained in:
commit
3e25d66474
2 changed files with 108 additions and 0 deletions
23
README.md
Normal file
23
README.md
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
# zeit-dl
|
||||||
|
An unofficial script to download PDF/EPUB/audio files from your Die ZEIT Digitalabo.
|
||||||
|
|
||||||
|
## Disclaimer
|
||||||
|
This script uses your login credentials.
|
||||||
|
Use at your own risk.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
Requires `playwright` (with `chromium` driver):
|
||||||
|
```sh
|
||||||
|
pip install playwright
|
||||||
|
playwright install chromium
|
||||||
|
```
|
||||||
|
Then you just need to download the [`zeit-dl`](https://git.haagfank.de/LnLcFlx/zeit-dl/raw/branch/master/zeit-dl) file from this repository.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
If you simply want to download the current issue of Die ZEIT as pdf run
|
||||||
|
```sh
|
||||||
|
python zeit-dl '<USERNAME>' '<PASSWORD>'
|
||||||
|
```
|
||||||
|
where your have to replace `<USERNAME>` and `<PASSWORD>` with your credentials.
|
||||||
|
|
||||||
|
For help and more options run `zeit-dl --help`.
|
85
zeit-dl
Executable file
85
zeit-dl
Executable file
|
@ -0,0 +1,85 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
EPAPER = 'https://epaper.zeit.de'
|
||||||
|
MEINE = 'https://meine.zeit.de'
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('user', type=str, help='Username')
|
||||||
|
parser.add_argument('pwd', type=str, help='Password')
|
||||||
|
parser.add_argument('-o', '--out', type=str, default=os.getcwd(), help='Output directory')
|
||||||
|
parser.add_argument('-a', '--abo', type=str, choices=['diezeit', 'zeitcampus', 'zeit-audio'], default='diezeit', help='Abo')
|
||||||
|
parser.add_argument('-i', '--issue', type=str, help='Ausgabe (meistens DD.MM.YYYY)')
|
||||||
|
parser.add_argument('-t', '--type', type=str, choices=['pdf', 'epub', 'mp3'], default='pdf', help='Dateiformat')
|
||||||
|
parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present')
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.user is None or args.pwd is None:
|
||||||
|
parser.error('You need to supply a username and password')
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch()
|
||||||
|
page = browser.new_page()
|
||||||
|
page.goto(MEINE+'/anmelden')
|
||||||
|
page.locator("input[id='login_email']").fill(args.user)
|
||||||
|
page.locator("input[id='login_pass']").fill(args.pwd)
|
||||||
|
try:
|
||||||
|
page.locator("input[type='submit']").click()
|
||||||
|
if page.url == MEINE+'/anmelden':
|
||||||
|
print('could not login')
|
||||||
|
else:
|
||||||
|
download(page, args)
|
||||||
|
except Exception as e:
|
||||||
|
page.goto(MEINE+'/abmelden')
|
||||||
|
print(e)
|
||||||
|
finally:
|
||||||
|
page.goto(MEINE+'/abmelden')
|
||||||
|
print('finally: logout')
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
|
||||||
|
def download(page, args):
|
||||||
|
if args.abo == 'zeit-audio':
|
||||||
|
print('not yet implemented')
|
||||||
|
else:
|
||||||
|
if args.issue is None:
|
||||||
|
page.goto(EPAPER+'/abo/'+args.abo)
|
||||||
|
page.locator("div.epaper-highlighted > a.btn").click(force=True)
|
||||||
|
issue = page.url.split('/')[-1]
|
||||||
|
else:
|
||||||
|
page.goto(EPAPER+'/abo/'+args.abo+'/'+args.issue)
|
||||||
|
issue = args.issue
|
||||||
|
|
||||||
|
filename = '{}-{}.{}'.format(args.abo, issue, args.type)
|
||||||
|
filepath = os.path.join(args.out, filename)
|
||||||
|
if os.path.isfile(filepath):
|
||||||
|
print('Issue already exists:', filepath)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
dl_btns = page.locator("div.download-buttons > a.btn").all()
|
||||||
|
url = None
|
||||||
|
for btn in dl_btns:
|
||||||
|
js_obj = btn.get_attribute('data-wt-click')
|
||||||
|
match = re.search(r"9: ?'([^']*)'", js_obj)
|
||||||
|
if match.group(1) == args.type:
|
||||||
|
print(js_obj)
|
||||||
|
url = EPAPER+btn.get_attribute('href')
|
||||||
|
continue
|
||||||
|
if url is None:
|
||||||
|
print('Could not find appropriate button for', args.type)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
file = page.context.request.get(url)
|
||||||
|
if file.headers['content-type'] != 'text/html':
|
||||||
|
print('Downloading {}...'.format(filename))
|
||||||
|
with open(filepath, 'wb') as f:
|
||||||
|
f.write(file.body())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Add table
Reference in a new issue