summaryrefslogtreecommitdiffstats
path: root/ebknotify
diff options
context:
space:
mode:
authormakefu <github@syntax-fehler.de>2020-03-31 22:23:38 +0200
committermakefu <github@syntax-fehler.de>2020-03-31 22:23:38 +0200
commite4942fdab9a43712d56b38dbdb8421d38d7743df (patch)
treef45314001e9e80bcf9ca1d6ec22126db0907e751 /ebknotify
parentb9471e7b0576dfbb1157c639c4a34b243f9dd61e (diff)
ebknotify: add feed
Diffstat (limited to 'ebknotify')
-rw-r--r--ebknotify/cache.py73
-rw-r--r--ebknotify/cli.py59
-rw-r--r--ebknotify/client.py12
-rw-r--r--ebknotify/common.py20
-rw-r--r--ebknotify/example.py72
-rw-r--r--ebknotify/feed.py43
-rw-r--r--ebknotify/raw.py14
7 files changed, 251 insertions, 42 deletions
diff --git a/ebknotify/cache.py b/ebknotify/cache.py
new file mode 100644
index 0000000..fbc4b9b
--- /dev/null
+++ b/ebknotify/cache.py
@@ -0,0 +1,73 @@
+import logging
+from os.path import expanduser,exists
+from datetime import datetime
+from dateutil.tz import tzlocal
+from .common import html_unescape
+import dateutil
+import dateutil.parser
+import json
+
+log = logging.getLogger("cache")
+class Cache():
+
+ def __init__(self,path):
+ self.path = expanduser(path)
+
+ self.load(self.path)
+ # self.reset_init()
+
+ def load(self,path):
+ if not exists(path):
+ log.info(f"{path} as cache does not exist")
+ self.store = {}
+ else:
+ self.store = json.load(open(path))
+ log.debug("loaded {len(ret)} entries from {cachefile}")
+
+ def reset_seen(self):
+ """ reset the store 'seen' field to false for this run """
+ for k in self.store:
+ self.store[k]['notify']['seen'] = False
+
+ def save(self) -> None:
+ cachefile = self.path
+ if not exists(cachefile):
+ log.info(f"{cachefile} does not exist,creating")
+
+ with open(cachefile,"w+") as f:
+ json.dump(self.store,f)
+
+ def update(self,ad,searchquery):
+ now = datetime.now(tzlocal())
+ ident = ad['id']
+ creation = ad.get('start-date-time', {}).get('value', '').encode('utf-8')
+ price = ad.get('price',{ 'amount': {}})['amount'].get('value',0)
+ d = dateutil.parser.parse(creation)
+ age_in_h = round((now - d).total_seconds() / 3600,2)
+ title = ad.get('title', {}).get('value', '').encode('utf-8')
+ title_unescaped = html_unescape(title)
+ try:
+ img = ad['pictures']['picture'][0]['link'][4]['href']
+ except Exception as e:
+ print(e)
+ img = None
+ # TODO: fetch detailed article
+ ad['notify'] = {
+ 'filter': searchquery,
+ 'creation': d.isoformat(),
+ 'filter-name': searchquery['name'],
+ 'first-seen': now.isoformat(),
+ 'title-unescaped': title_unescaped,
+ 'age_in_h': age_in_h,
+ 'price': price,
+ 'image': img,
+ 'url': ad['link'][1]['href'],
+ 'seen': True
+ }
+ if ident in self.store:
+ log.debug(f"{ident} already in store, updating")
+ ad['notify']['first-seen'] = self.store[ident]['notify']['first-seen'] # saving first_seen from cache
+ else:
+ log.debug(f"{ident} is new, adding")
+ self.store[ident] = ad
+ return ad
diff --git a/ebknotify/cli.py b/ebknotify/cli.py
index ef6f99a..ad7d0b7 100644
--- a/ebknotify/cli.py
+++ b/ebknotify/cli.py
@@ -1,48 +1,63 @@
-""" usage: ebk-notify [options] CONFIG
+""" usage: ebk-notify [options]
options:
--lol=LOL Log Level [Default: INFO]
+ --config=FILE Path to config file [Default: ~/.config/ebk-notify/ebk.yml]
+ --filter=FILE Path to separate filter file,
+ if not set ebk-notify will check in config unter the 'items' key
+ --cache=FILE Path to cache file [Default: ~/.config/ebk-notify/cache.json]
+ --atom Write atom files
+ --outdir=DIR Write atom files to folder [Default: ~/.config/ebk-notify/feeds]
"""
+
from docopt import docopt
from .client import EbkClient
-from .common import set_lol
-import yaml
+from .common import set_lol,load_config
+from .cache import Cache
+from .feed import Feed
import json
-import dateutil
-import dateutil.parser
-from datetime import datetime
-from dateutil.tz import tzlocal
import logging
-
+from os.path import join
log = logging.getLogger('ebk-notify')
def main():
args = docopt(__doc__)
set_lol(args['--lol'],log)
-
- config = yaml.safe_load(open(args['CONFIG']))
+ config = load_config(args['--config'])
api = EbkClient(config['main']['appid'],
config['main']['apppw'],
config['main']['userid'],
config['main']['userpw']
)
- for item in config['items']:
- log.info(f"For Search \"{item['name']}\"")
+
+ if args['--filter']:
+ log.info(f"--filter set, trying to load filters from {args['--filter']}")
+ items = load_config(args['--filter'])
+ else:
+ log.info("loading filter from configurtion['items']")
+ items = config['items']
+ cache = Cache(args['--cache'])
+
+ for item in items:
+ name = item['name']
+ log.info(f"For Search \"{name}\"")
del item['name']
+
+ if not 'adType' in item:
+ item['adType'] = 'OFFERED'
if 'distance' in item and 'distanceUnit' not in item:
item['distanceUnit'] = 'KM'
ads = api.get_ads(**item)
- now = datetime.now(tzlocal())
+ item['name'] = name
for ad in ads:
id = ad.get('id', 0)
log.debug(json.dumps(ad,indent=4))
- creation = ad.get('start-date-time', {}).get('value', '').encode('utf-8')
- price = ad.get('price',{ 'amount': {}})['amount'].get('value',0)
- url = ad['link'][1]['href']
- d = dateutil.parser.parse(creation)
- age_in_h = round((now - d).total_seconds() / 3600,2)
-
- title = ad.get('title', {}).get('value', '').encode('utf-8')
- title_unescaped = api.html_unescape(title)
- log.info( f"{age_in_h}h ago: \"{title_unescaped}\" for {price}€ -> {url}")
+ ad = cache.update(ad,item)
+ e = ad['notify']
+ print( f"{e['age_in_h']}h ago: \"{e['title-unescaped']}\" for {e['price']}€ -> {e['url']}")
+ cache.save()
+ if args['--atom']:
+ f = Feed()
+ f.load_cache(cache)
+ print(f.to_atom_file(join(args['--outdir'],f'{f.ident}.atom')))
diff --git a/ebknotify/client.py b/ebknotify/client.py
index 3abcb07..73f2363 100644
--- a/ebknotify/client.py
+++ b/ebknotify/client.py
@@ -35,16 +35,6 @@ from datetime import datetime
from dateutil.tz import tzlocal
import logging
-logging.basicConfig(level=logging.DEBUG)
-
-try:
- from html import unescape # python 3.4+
-except ImportError:
- try:
- from html.parser import HTMLParser # python 3.x (<3.4)
- except ImportError:
- from HTMLParser import HTMLParser # python 2.x
- unescape = HTMLParser().unescape
class EbkClient:
@@ -222,6 +212,4 @@ class EbkClient:
locations = self.get_locations(url_suffix, depth, include_parent_path)
return locations
- def html_unescape(self, data):
- return unescape(data.decode())
diff --git a/ebknotify/common.py b/ebknotify/common.py
index 0566c40..c277c26 100644
--- a/ebknotify/common.py
+++ b/ebknotify/common.py
@@ -1,8 +1,26 @@
+import yaml
+import json
+from os.path import expanduser,exists
import logging
+from sys import exit
-def set_lol(lol,log):
+from html import unescape # python 3.4+
+
+log = logging.getLogger("ebk-common")
+
+def set_lol(lol:str ,log) -> None:
numeric_level = getattr(logging,lol.upper(),None)
if not isinstance(numeric_level,int):
raise AttributeError('No such log level {}'.format(lol))
logging.basicConfig(level=numeric_level)
log.setLevel(numeric_level)
+
+def load_config(path:str) -> dict:
+ configpath = expanduser(path)
+ if not exists(configpath):
+ log.error(f"{configpath} does not exist, bailing out")
+ exit(1)
+ return yaml.safe_load(open(configpath))
+
+def html_unescape(data):
+ return unescape(data.decode())
diff --git a/ebknotify/example.py b/ebknotify/example.py
new file mode 100644
index 0000000..bbddc19
--- /dev/null
+++ b/ebknotify/example.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ebk-client - eBay Kleinanzeigen/Classifieds API client in Python
+Copyright (c) 2016 tjado <https://github.com/tejado>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+Author: tjado <https://github.com/tejado>
+"""
+
+from pprint import pprint
+import dateutil.parser
+from datetime import datetime
+from dateutil.tz import tzlocal
+import sys
+import json
+from ebk_client import EbkClient
+u = "ebay-kleinanzeigen.de@syntax-fehler.de"
+api = EbkClient('android', 'TaR60pEttY', u , 'se.J<I+Q~t6c:FH9de[)')
+#pprint(api._http_get("/ads.json?priceType=FREE").json())
+#pprint(api._http_get("/ads/{}.json".format( 747663143 )).json())
+#my_ads = api.get_my_ads()
+ads = api.get_ads(zipcode="70378",q="Milchaufschäumer",distance=5,distanceUnit="KM")
+#ads = api.get_ads(zipcode="70378",categoryId=192,distance=2,distanceUnit="KM")
+
+
+def nice(t):
+ return json.dumps(t,indent=4)
+
+now = datetime.now(tzlocal())
+print('Ads:')
+#print(ads)
+for ad in ads or []:
+ id = ad.get('id', 0)
+ #pprint(api.get_ad_details(id))
+ creation = ad.get('start-date-time', {}).get('value', '').encode('utf-8')
+ d = dateutil.parser.parse(creation)
+ age_in_h = (now - d).total_seconds() / 3600
+
+ title = ad.get('title', {}).get('value', '').encode('utf-8')
+ title_unescaped = api.html_unescape(title)
+ print( "{} ({}) -> {}".format(id, age_in_h, title_unescaped) )
+
+#categories = api.get_categories()
+#print(nice(categories))
+#print('Subcategories of cat 80:\n\r{}'.format(nice(categories)))
+#
+#locations = api.get_location_by_name('70378')
+#print('Location by german postcode 70378:\n\r{}'.format(nice(locations)))
+
+# locations = api.get_location_by_coordinates(53.553155, 10.006151)
+# print('Location Latitude/Longitude:\n\r{}'.format(pprint.PrettyPrinter(indent=4).pformat(locations)))
+
+# category_attributes = api.get_category_attributes(88)
+# print('Category Attributes for cat 88:\n\r{}'.format(pprint.PrettyPrinter(indent=4).pformat(category_attributes)))
diff --git a/ebknotify/feed.py b/ebknotify/feed.py
new file mode 100644
index 0000000..ce0e4a2
--- /dev/null
+++ b/ebknotify/feed.py
@@ -0,0 +1,43 @@
+from feedgen.feed import FeedGenerator
+
+class Feed(FeedGenerator):
+ def __init__(self,ident='root'):
+ super().__init__()
+ super().add_entry
+ self.ident = ident
+ self.id(f'http://ebk.euer.krebsco.de/{ident}')
+ self.title(f'Ebay Kleinanzeigen Feed {ident}')
+ self.author({'name':'Felix Richter', 'email': 'ebk-notify@syntax-fehler.de'})
+ self.link( href=f'http://ebk.euer.krebsco.de/{ident}.atom', rel='self' )
+ self.subtitle(f'Results for ebay kleinanzeingen for identity {ident}')
+ self.language('de')
+
+ def load_cache(self,cache) -> None:
+ for item in cache.store.values():
+ print(item)
+ fe = self.add_entry()
+ available = 'available' if item['notify']['seen'] else 'sold'
+ fe.id(f"{item['notify']['url']}#{available}")
+ fe.category({'term':item['category']['localized-name']['value']})
+ fe.link({'href':item['notify']['url'],'title':'Link to item'})
+ fe.pubDate(item['notify']['creation'])
+ fe.title(f"{item['title']['value']} ({available})")
+ fe.content(f'''
+ <a href="{item['notify']['url']}">
+ <img src="{item['notify']['image']}" />
+ </a>
+ <div>
+ {item['description']['value']}
+ </div>
+ ''',type='CDATA')
+
+ def add_article(self,entry) -> None:
+ pass
+
+ def to_atom(self) -> str:
+ return self.atom_str(pretty=True)
+
+ def to_atom_file(self,filename=None) -> None:
+ if not filename:
+ filename = f'{self.ident}.atom'
+ self.atom_file(filename)
diff --git a/ebknotify/raw.py b/ebknotify/raw.py
index 5e4adfd..36fb6a5 100644
--- a/ebknotify/raw.py
+++ b/ebknotify/raw.py
@@ -1,24 +1,24 @@
-""" usage: ebk-raw [options] CONFIG URLPATH [PARAMS...]
+""" usage: ebk-raw [options] URLPATH [PARAMS...]
options:
--lol=LOL Log Level [Default: INFO]
--method=METHOD HTTP Method to use [Default: GET]
+ --config=FILE Path to config file [Default: ~/.config/ebk-notify/ebk.yml]
--xml do not try to decode as json but xml instead
PARAMS are the parameters sent via http method to URLPATH
examples:
- ebk-raw ebk.yml /ads/search-metadata.json
- ebk-raw ebk.yml --xml /ads/search-metadata
- ebk-raw ebk.yml /ads.json categoryId=216 zipcode=70435 distance=50 distanceUnit=KM
+ ebk-raw /ads/search-metadata.json
+ ebk-raw --xml /ads/search-metadata
+ ebk-raw /ads.json categoryId=216 zipcode=70435 distance=50 distanceUnit=KM
"""
from docopt import docopt
from .client import EbkClient
-import yaml
import json
import dateutil
import logging
-from .common import set_lol
+from .common import set_lol,load_config
log = logging.getLogger('ebk-raw')
@@ -34,7 +34,7 @@ def main():
set_lol(args['--lol'],log)
suffix = args['URLPATH']
- config = yaml.safe_load(open(args['CONFIG']))
+ config = load_config(args['--config'])
api = EbkClient(config['main']['appid'],
config['main']['apppw'],
config['main']['userid'],