#!/usr/bin/env python3
"""Command-line script for querying YLE
"""
import argparse
import asyncio
import csv
import logging
import random
from datetime import datetime
from time import sleep
import aiohttp
from ..query import query_yle
logging.basicConfig(level=logging.INFO)
def _parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--from-date',
help="from date (inclusive, YYYY-MM-DD)", required=True)
parser.add_argument('-t', '--to-date', help="to date (inclusive, YYYY-MM-DD, defaults to today)",
default=datetime.today().strftime('%Y-%m-%d'))
parser.add_argument(
'-q', '--query', help="query string to search for", required=True)
parser.add_argument(
'-o', '--output', help="output CSV file", required=True)
parser.add_argument(
'-l', '--limit', help="number of articles to fetch per query (max==10000)", default=10000, type=int)
parser.add_argument('-lang', '--language',
help="language to search (fi=YLE uutiset,sv=Svenska YLE)", default="fi")
parser.add_argument(
'-d', '--delay', help="number of seconds to wait between consecutive requests", default=1.0, type=float)
parser.add_argument('--quiet', default=False,
action='store_true', help="Log only errors")
return parser.parse_args()
async def _amain():
args = _parse_arguments()
if args.quiet:
logging.basicConfig(level=logging.ERROR)
with open(args.output, "w") as output_file:
csv_output = csv.writer(output_file)
csv_output.writerow(['id', 'url', 'title', 'date_modified'])
total_count = 0
async with aiohttp.ClientSession() as session:
async for response in query_yle(session, args.query, args.language, args.from_date, args.to_date, args.limit):
total_count += len(response.articles)
logging.info(
"Processing %d articles from %s. In total fetched %d articles.",
len(response.articles), response.url, total_count)
for article in response.articles:
csv_output.writerow([article.id, article.url,
article.title, article.date_modified])
sleep(random.randrange(args.delay*2))
logging.info("Processed %s articles in total.", total_count)
[docs]def main():
asyncio.run(_amain())
if __name__ == '__main__':
main()