diff options
author | makefu <github@syntax-fehler.de> | 2020-03-31 22:23:38 +0200 |
---|---|---|
committer | makefu <github@syntax-fehler.de> | 2020-03-31 22:23:38 +0200 |
commit | e4942fdab9a43712d56b38dbdb8421d38d7743df (patch) | |
tree | f45314001e9e80bcf9ca1d6ec22126db0907e751 /ebknotify | |
parent | b9471e7b0576dfbb1157c639c4a34b243f9dd61e (diff) |
ebknotify: add feed
Diffstat (limited to 'ebknotify')
-rw-r--r-- | ebknotify/cache.py | 73 | ||||
-rw-r--r-- | ebknotify/cli.py | 59 | ||||
-rw-r--r-- | ebknotify/client.py | 12 | ||||
-rw-r--r-- | ebknotify/common.py | 20 | ||||
-rw-r--r-- | ebknotify/example.py | 72 | ||||
-rw-r--r-- | ebknotify/feed.py | 43 | ||||
-rw-r--r-- | ebknotify/raw.py | 14 |
7 files changed, 251 insertions, 42 deletions
diff --git a/ebknotify/cache.py b/ebknotify/cache.py new file mode 100644 index 0000000..fbc4b9b --- /dev/null +++ b/ebknotify/cache.py @@ -0,0 +1,73 @@ +import logging +from os.path import expanduser,exists +from datetime import datetime +from dateutil.tz import tzlocal +from .common import html_unescape +import dateutil +import dateutil.parser +import json + +log = logging.getLogger("cache") +class Cache(): + + def __init__(self,path): + self.path = expanduser(path) + + self.load(self.path) + # self.reset_init() + + def load(self,path): + if not exists(path): + log.info(f"{path} as cache does not exist") + self.store = {} + else: + self.store = json.load(open(path)) + log.debug("loaded {len(ret)} entries from {cachefile}") + + def reset_seen(self): + """ reset the store 'seen' field to false for this run """ + for k in self.store: + self.store[k]['notify']['seen'] = False + + def save(self) -> None: + cachefile = self.path + if not exists(cachefile): + log.info(f"{cachefile} does not exist,creating") + + with open(cachefile,"w+") as f: + json.dump(self.store,f) + + def update(self,ad,searchquery): + now = datetime.now(tzlocal()) + ident = ad['id'] + creation = ad.get('start-date-time', {}).get('value', '').encode('utf-8') + price = ad.get('price',{ 'amount': {}})['amount'].get('value',0) + d = dateutil.parser.parse(creation) + age_in_h = round((now - d).total_seconds() / 3600,2) + title = ad.get('title', {}).get('value', '').encode('utf-8') + title_unescaped = html_unescape(title) + try: + img = ad['pictures']['picture'][0]['link'][4]['href'] + except Exception as e: + print(e) + img = None + # TODO: fetch detailed article + ad['notify'] = { + 'filter': searchquery, + 'creation': d.isoformat(), + 'filter-name': searchquery['name'], + 'first-seen': now.isoformat(), + 'title-unescaped': title_unescaped, + 'age_in_h': age_in_h, + 'price': price, + 'image': img, + 'url': ad['link'][1]['href'], + 'seen': True + } + if ident in self.store: + log.debug(f"{ident} already in store, updating") + ad['notify']['first-seen'] = self.store[ident]['notify']['first-seen'] # saving first_seen from cache + else: + log.debug(f"{ident} is new, adding") + self.store[ident] = ad + return ad diff --git a/ebknotify/cli.py b/ebknotify/cli.py index ef6f99a..ad7d0b7 100644 --- a/ebknotify/cli.py +++ b/ebknotify/cli.py @@ -1,48 +1,63 @@ -""" usage: ebk-notify [options] CONFIG +""" usage: ebk-notify [options] options: --lol=LOL Log Level [Default: INFO] + --config=FILE Path to config file [Default: ~/.config/ebk-notify/ebk.yml] + --filter=FILE Path to separate filter file, + if not set ebk-notify will check in config unter the 'items' key + --cache=FILE Path to cache file [Default: ~/.config/ebk-notify/cache.json] + --atom Write atom files + --outdir=DIR Write atom files to folder [Default: ~/.config/ebk-notify/feeds] """ + from docopt import docopt from .client import EbkClient -from .common import set_lol -import yaml +from .common import set_lol,load_config +from .cache import Cache +from .feed import Feed import json -import dateutil -import dateutil.parser -from datetime import datetime -from dateutil.tz import tzlocal import logging - +from os.path import join log = logging.getLogger('ebk-notify') def main(): args = docopt(__doc__) set_lol(args['--lol'],log) - - config = yaml.safe_load(open(args['CONFIG'])) + config = load_config(args['--config']) api = EbkClient(config['main']['appid'], config['main']['apppw'], config['main']['userid'], config['main']['userpw'] ) - for item in config['items']: - log.info(f"For Search \"{item['name']}\"") + + if args['--filter']: + log.info(f"--filter set, trying to load filters from {args['--filter']}") + items = load_config(args['--filter']) + else: + log.info("loading filter from configurtion['items']") + items = config['items'] + cache = Cache(args['--cache']) + + for item in items: + name = item['name'] + log.info(f"For Search \"{name}\"") del item['name'] + + if not 'adType' in item: + item['adType'] = 'OFFERED' if 'distance' in item and 'distanceUnit' not in item: item['distanceUnit'] = 'KM' ads = api.get_ads(**item) - now = datetime.now(tzlocal()) + item['name'] = name for ad in ads: id = ad.get('id', 0) log.debug(json.dumps(ad,indent=4)) - creation = ad.get('start-date-time', {}).get('value', '').encode('utf-8') - price = ad.get('price',{ 'amount': {}})['amount'].get('value',0) - url = ad['link'][1]['href'] - d = dateutil.parser.parse(creation) - age_in_h = round((now - d).total_seconds() / 3600,2) - - title = ad.get('title', {}).get('value', '').encode('utf-8') - title_unescaped = api.html_unescape(title) - log.info( f"{age_in_h}h ago: \"{title_unescaped}\" for {price}€ -> {url}") + ad = cache.update(ad,item) + e = ad['notify'] + print( f"{e['age_in_h']}h ago: \"{e['title-unescaped']}\" for {e['price']}€ -> {e['url']}") + cache.save() + if args['--atom']: + f = Feed() + f.load_cache(cache) + print(f.to_atom_file(join(args['--outdir'],f'{f.ident}.atom'))) diff --git a/ebknotify/client.py b/ebknotify/client.py index 3abcb07..73f2363 100644 --- a/ebknotify/client.py +++ b/ebknotify/client.py @@ -35,16 +35,6 @@ from datetime import datetime from dateutil.tz import tzlocal import logging -logging.basicConfig(level=logging.DEBUG) - -try: - from html import unescape # python 3.4+ -except ImportError: - try: - from html.parser import HTMLParser # python 3.x (<3.4) - except ImportError: - from HTMLParser import HTMLParser # python 2.x - unescape = HTMLParser().unescape class EbkClient: @@ -222,6 +212,4 @@ class EbkClient: locations = self.get_locations(url_suffix, depth, include_parent_path) return locations - def html_unescape(self, data): - return unescape(data.decode()) diff --git a/ebknotify/common.py b/ebknotify/common.py index 0566c40..c277c26 100644 --- a/ebknotify/common.py +++ b/ebknotify/common.py @@ -1,8 +1,26 @@ +import yaml +import json +from os.path import expanduser,exists import logging +from sys import exit -def set_lol(lol,log): +from html import unescape # python 3.4+ + +log = logging.getLogger("ebk-common") + +def set_lol(lol:str ,log) -> None: numeric_level = getattr(logging,lol.upper(),None) if not isinstance(numeric_level,int): raise AttributeError('No such log level {}'.format(lol)) logging.basicConfig(level=numeric_level) log.setLevel(numeric_level) + +def load_config(path:str) -> dict: + configpath = expanduser(path) + if not exists(configpath): + log.error(f"{configpath} does not exist, bailing out") + exit(1) + return yaml.safe_load(open(configpath)) + +def html_unescape(data): + return unescape(data.decode()) diff --git a/ebknotify/example.py b/ebknotify/example.py new file mode 100644 index 0000000..bbddc19 --- /dev/null +++ b/ebknotify/example.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ebk-client - eBay Kleinanzeigen/Classifieds API client in Python
+Copyright (c) 2016 tjado <https://github.com/tejado>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+Author: tjado <https://github.com/tejado>
+"""
+
+from pprint import pprint
+import dateutil.parser
+from datetime import datetime
+from dateutil.tz import tzlocal
+import sys
+import json
+from ebk_client import EbkClient
+u = "ebay-kleinanzeigen.de@syntax-fehler.de"
+api = EbkClient('android', 'TaR60pEttY', u , 'se.J<I+Q~t6c:FH9de[)')
+#pprint(api._http_get("/ads.json?priceType=FREE").json())
+#pprint(api._http_get("/ads/{}.json".format( 747663143 )).json())
+#my_ads = api.get_my_ads()
+ads = api.get_ads(zipcode="70378",q="Milchaufschäumer",distance=5,distanceUnit="KM")
+#ads = api.get_ads(zipcode="70378",categoryId=192,distance=2,distanceUnit="KM")
+
+
+def nice(t):
+ return json.dumps(t,indent=4)
+
+now = datetime.now(tzlocal())
+print('Ads:')
+#print(ads)
+for ad in ads or []:
+ id = ad.get('id', 0)
+ #pprint(api.get_ad_details(id))
+ creation = ad.get('start-date-time', {}).get('value', '').encode('utf-8')
+ d = dateutil.parser.parse(creation)
+ age_in_h = (now - d).total_seconds() / 3600
+
+ title = ad.get('title', {}).get('value', '').encode('utf-8')
+ title_unescaped = api.html_unescape(title)
+ print( "{} ({}) -> {}".format(id, age_in_h, title_unescaped) )
+
+#categories = api.get_categories()
+#print(nice(categories))
+#print('Subcategories of cat 80:\n\r{}'.format(nice(categories)))
+#
+#locations = api.get_location_by_name('70378')
+#print('Location by german postcode 70378:\n\r{}'.format(nice(locations)))
+
+# locations = api.get_location_by_coordinates(53.553155, 10.006151)
+# print('Location Latitude/Longitude:\n\r{}'.format(pprint.PrettyPrinter(indent=4).pformat(locations)))
+
+# category_attributes = api.get_category_attributes(88)
+# print('Category Attributes for cat 88:\n\r{}'.format(pprint.PrettyPrinter(indent=4).pformat(category_attributes)))
diff --git a/ebknotify/feed.py b/ebknotify/feed.py new file mode 100644 index 0000000..ce0e4a2 --- /dev/null +++ b/ebknotify/feed.py @@ -0,0 +1,43 @@ +from feedgen.feed import FeedGenerator + +class Feed(FeedGenerator): + def __init__(self,ident='root'): + super().__init__() + super().add_entry + self.ident = ident + self.id(f'http://ebk.euer.krebsco.de/{ident}') + self.title(f'Ebay Kleinanzeigen Feed {ident}') + self.author({'name':'Felix Richter', 'email': 'ebk-notify@syntax-fehler.de'}) + self.link( href=f'http://ebk.euer.krebsco.de/{ident}.atom', rel='self' ) + self.subtitle(f'Results for ebay kleinanzeingen for identity {ident}') + self.language('de') + + def load_cache(self,cache) -> None: + for item in cache.store.values(): + print(item) + fe = self.add_entry() + available = 'available' if item['notify']['seen'] else 'sold' + fe.id(f"{item['notify']['url']}#{available}") + fe.category({'term':item['category']['localized-name']['value']}) + fe.link({'href':item['notify']['url'],'title':'Link to item'}) + fe.pubDate(item['notify']['creation']) + fe.title(f"{item['title']['value']} ({available})") + fe.content(f''' + <a href="{item['notify']['url']}"> + <img src="{item['notify']['image']}" /> + </a> + <div> + {item['description']['value']} + </div> + ''',type='CDATA') + + def add_article(self,entry) -> None: + pass + + def to_atom(self) -> str: + return self.atom_str(pretty=True) + + def to_atom_file(self,filename=None) -> None: + if not filename: + filename = f'{self.ident}.atom' + self.atom_file(filename) diff --git a/ebknotify/raw.py b/ebknotify/raw.py index 5e4adfd..36fb6a5 100644 --- a/ebknotify/raw.py +++ b/ebknotify/raw.py @@ -1,24 +1,24 @@ -""" usage: ebk-raw [options] CONFIG URLPATH [PARAMS...] +""" usage: ebk-raw [options] URLPATH [PARAMS...] options: --lol=LOL Log Level [Default: INFO] --method=METHOD HTTP Method to use [Default: GET] + --config=FILE Path to config file [Default: ~/.config/ebk-notify/ebk.yml] --xml do not try to decode as json but xml instead PARAMS are the parameters sent via http method to URLPATH examples: - ebk-raw ebk.yml /ads/search-metadata.json - ebk-raw ebk.yml --xml /ads/search-metadata - ebk-raw ebk.yml /ads.json categoryId=216 zipcode=70435 distance=50 distanceUnit=KM + ebk-raw /ads/search-metadata.json + ebk-raw --xml /ads/search-metadata + ebk-raw /ads.json categoryId=216 zipcode=70435 distance=50 distanceUnit=KM """ from docopt import docopt from .client import EbkClient -import yaml import json import dateutil import logging -from .common import set_lol +from .common import set_lol,load_config log = logging.getLogger('ebk-raw') @@ -34,7 +34,7 @@ def main(): set_lol(args['--lol'],log) suffix = args['URLPATH'] - config = yaml.safe_load(open(args['CONFIG'])) + config = load_config(args['--config']) api = EbkClient(config['main']['appid'], config['main']['apppw'], config['main']['userid'], |