Changed --type to --types to allow multiple types at once (PDF+EPUB)
This commit is contained in:
parent
708d6fecc6
commit
7b6e1f0dcb
1 changed files with 26 additions and 16 deletions
42
zeit-dl
42
zeit-dl
|
@ -5,6 +5,7 @@ import sys
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import argparse
|
import argparse
|
||||||
|
from urllib.parse import urlparse
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
EPAPER = 'https://epaper.zeit.de'
|
EPAPER = 'https://epaper.zeit.de'
|
||||||
|
@ -25,7 +26,7 @@ def main():
|
||||||
parser.add_argument('-o', '--out', type=str, default=os.getcwd(), help='Output directory')
|
parser.add_argument('-o', '--out', type=str, default=os.getcwd(), help='Output directory')
|
||||||
parser.add_argument('-a', '--abo', type=str, choices=['diezeit', 'zeitcampus', 'zeit-audio'], default='diezeit', help='Subscription (part after abo/)')
|
parser.add_argument('-a', '--abo', type=str, choices=['diezeit', 'zeitcampus', 'zeit-audio'], default='diezeit', help='Subscription (part after abo/)')
|
||||||
parser.add_argument('-i', '--issue', type=str, help='Issue (mostly DD.MM.YYYY)')
|
parser.add_argument('-i', '--issue', type=str, help='Issue (mostly DD.MM.YYYY)')
|
||||||
parser.add_argument('-t', '--type', type=str, choices=['pdf', 'epub', 'mp3'], default='pdf', help='File type')
|
parser.add_argument('-t', '--types', type=str, nargs='*', choices=['pdf', 'epub', 'mp3'], default=['pdf'], help='File type')
|
||||||
parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present')
|
parser.add_argument('-f', '--force', action='store_true', help='Redownload file even if already present')
|
||||||
parser.add_argument('--format', type=str, default="{abo}_{issue}.{ext}", help='Filename format. Possible formatting strings are {abo}, {issue}, {ext} and datetime format codes.')
|
parser.add_argument('--format', type=str, default="{abo}_{issue}.{ext}", help='Filename format. Possible formatting strings are {abo}, {issue}, {ext} and datetime format codes.')
|
||||||
parser.add_argument('-q', '--quiet', action='store_true', help='No output except for filename if written.')
|
parser.add_argument('-q', '--quiet', action='store_true', help='No output except for filename if written.')
|
||||||
|
@ -59,6 +60,8 @@ def main():
|
||||||
|
|
||||||
|
|
||||||
def download(page, args):
|
def download(page, args):
|
||||||
|
downloads = [] # (url, type)
|
||||||
|
|
||||||
if args.abo == 'zeit-audio':
|
if args.abo == 'zeit-audio':
|
||||||
log('not yet implemented', args=args, level='ERROR')
|
log('not yet implemented', args=args, level='ERROR')
|
||||||
return 1
|
return 1
|
||||||
|
@ -70,12 +73,32 @@ def download(page, args):
|
||||||
else:
|
else:
|
||||||
page.goto(EPAPER+'/abo/'+args.abo+'/'+args.issue)
|
page.goto(EPAPER+'/abo/'+args.abo+'/'+args.issue)
|
||||||
issue = args.issue
|
issue = args.issue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
published = datetime.strptime(issue, '%d.%m.%Y')
|
published = datetime.strptime(issue, '%d.%m.%Y')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
published = None
|
published = None
|
||||||
filename = args.format.format(abo=args.abo, issue=issue, ext=args.type)
|
|
||||||
|
dl_btns = page.locator("div.download-buttons > a.btn").all()
|
||||||
|
url = None
|
||||||
|
for btn in dl_btns:
|
||||||
|
js_obj = btn.get_attribute('data-wt-click')
|
||||||
|
match = re.search(r"9: ?'([^']*)'", js_obj)
|
||||||
|
type = match.group(1)
|
||||||
|
if type in args.types:
|
||||||
|
log(js_obj, args=args)
|
||||||
|
url = btn.get_attribute('href')
|
||||||
|
o = urlparse(url)
|
||||||
|
if o.netloc == '':
|
||||||
|
url = EPAPER+url
|
||||||
|
if url is not None:
|
||||||
|
downloads.append((url, type))
|
||||||
|
|
||||||
|
if len(downloads) == 0:
|
||||||
|
log('Could not find appropriate button for', args.types, args=args, level='ERROR')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
for url, type in downloads:
|
||||||
|
filename = args.format.format(abo=args.abo, issue=issue, ext=type)
|
||||||
if published is not None:
|
if published is not None:
|
||||||
filename = published.strftime(filename)
|
filename = published.strftime(filename)
|
||||||
|
|
||||||
|
@ -86,20 +109,7 @@ def download(page, args):
|
||||||
log('Continuing...', args=args)
|
log('Continuing...', args=args)
|
||||||
else:
|
else:
|
||||||
log('Aborting...', args=args)
|
log('Aborting...', args=args)
|
||||||
return 0
|
|
||||||
|
|
||||||
dl_btns = page.locator("div.download-buttons > a.btn").all()
|
|
||||||
url = None
|
|
||||||
for btn in dl_btns:
|
|
||||||
js_obj = btn.get_attribute('data-wt-click')
|
|
||||||
match = re.search(r"9: ?'([^']*)'", js_obj)
|
|
||||||
if match.group(1) == args.type:
|
|
||||||
log(js_obj, args=args)
|
|
||||||
url = EPAPER+btn.get_attribute('href')
|
|
||||||
continue
|
continue
|
||||||
if url is None:
|
|
||||||
log('Could not find appropriate button for', args.type, args=args, level='ERROR')
|
|
||||||
return 1
|
|
||||||
|
|
||||||
file = page.context.request.get(url)
|
file = page.context.request.get(url)
|
||||||
if file.headers['content-type'] != 'text/html':
|
if file.headers['content-type'] != 'text/html':
|
||||||
|
|
Loading…
Add table
Reference in a new issue