ma rss: init sofa
This commit is contained in:
parent
3200f87dec
commit
29ebc7131e
1
2configs/deployment/rss/sofa-urls
Normal file
1
2configs/deployment/rss/sofa-urls
Normal file
|
@ -0,0 +1 @@
|
|||
https://www.ebay-kleinanzeigen.de/s-ditzingen/preis::50/sofa/k0l8863r10
|
26
2configs/deployment/rss/sofa.nix
Normal file
26
2configs/deployment/rss/sofa.nix
Normal file
|
@ -0,0 +1,26 @@
|
|||
{ pkgs, lib, config, ... }:
|
||||
let
|
||||
fqdn = "rss.euer.krebsco.de";
|
||||
ratt-path = "/var/lib/ratt/";
|
||||
out-path = "${ratt-path}/sofa.xml";
|
||||
in {
|
||||
systemd.tmpfiles.rules = ["d ${ratt-path} 0750 nginx nginx - -" ];
|
||||
systemd.services.run-ratt-sofa = {
|
||||
enable = true;
|
||||
path = with pkgs; [ ratt xmlstarlet ];
|
||||
script = builtins.readFile ./ratt-hourly.sh;
|
||||
scriptArgs = "${./sofa-urls} ${out-path}";
|
||||
|
||||
preStart = "install -v -m750 ${./sofa.yml} ${ratt-path}/sofa.yml"; # ratt requires the config file in the cwd
|
||||
serviceConfig.User = "nginx";
|
||||
serviceConfig.WorkingDirectory = ratt-path;
|
||||
startAt = "00/3:30"; # every 3 hours, fetch latest
|
||||
};
|
||||
|
||||
services.nginx.virtualHosts."${fqdn}" = {
|
||||
locations."=/ratt/sofa.xml" = {
|
||||
alias = out-path;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
59
2configs/deployment/rss/sofa.yml
Normal file
59
2configs/deployment/rss/sofa.yml
Normal file
|
@ -0,0 +1,59 @@
|
|||
regex: https://www.ebay\-kleinanzeigen.de/s\-.*
|
||||
selectors:
|
||||
httpsettings:
|
||||
cookie: {}
|
||||
header: {}
|
||||
useragent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
|
||||
Chrome/90.0.4430.72 Safari/537.36
|
||||
insecure: false
|
||||
feed:
|
||||
title: title
|
||||
authorname: ""
|
||||
authoremail: ""
|
||||
item:
|
||||
container: ul[id='srchrslt-adtable'] li[class='ad-listitem lazyload-item ']
|
||||
title: |
|
||||
title = sel:find("h2.text-module-begin"):first():text():gsub("^%s*(.-)%s*$", "%1")
|
||||
print(title)
|
||||
link: |
|
||||
link = sel:find("a"):first():attr("href")
|
||||
print("https://www.ebay-kleinanzeigen.de" .. link)
|
||||
created: |-
|
||||
created = ""
|
||||
sel:find("div.aditem-main--top--right"):each(function(i, s)
|
||||
created = s:text():gsub("^%s*(.-)%s*$", "%1")
|
||||
end)
|
||||
if created:match("Heute") then
|
||||
time = created:gsub("^.*,", "")
|
||||
print(os.date("%d.%m.%Y") .. time .. " CET")
|
||||
return
|
||||
end
|
||||
if created:match("Gestern") then
|
||||
time = created:gsub("^.*,", "")
|
||||
print(os.date("%d.%m.%Y", os.time()-24*60*60) .. time .. " CET")
|
||||
return
|
||||
end
|
||||
if created:match("\.") then
|
||||
print(created .. " 00:00 CET")
|
||||
return
|
||||
end
|
||||
createdformat: 02.01.2006 15:04 MST
|
||||
description: |-
|
||||
description = sel:find(".aditem-main--middle"):html()
|
||||
place = sel:find(".aditem-main--top--left"):html()
|
||||
print(description .. place)
|
||||
content: ""
|
||||
image: |
|
||||
img = sel:find("div.imagebox"):first():attr("data-imgsrc")
|
||||
if img ~= "" then
|
||||
-- prepend host if needed
|
||||
if not(img:match("https*:\/\/.*")) then
|
||||
img = "https://www.ebay-kleinanzeigen.de" .. img
|
||||
end
|
||||
print(img)
|
||||
end
|
||||
nextpage: |
|
||||
nextpage = sel:find("link[rel=next]"):attr("href")
|
||||
print("https://www.ebay-kleinanzeigen.de" .. nextpage)
|
||||
nextpagecount: 5
|
||||
sort: ""
|
Loading…
Reference in a new issue