| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- #[macro_use]
- extern crate serde_derive;
- use reqwest;
- use chrono::{Utc, Duration, Local, DateTime};
- use select::document::Document;
- use select::predicate::Name;
- use parse_duration::parse;
- use serde_json;
- use std::io::BufWriter;
- use std::io::BufReader;
- use std::fs::File;
- use std::{thread, time};
- /// Fucking egun is a mess. It does not even use css and is built using tables. This is an attempt to parse it.
- #[derive(Serialize, Deserialize, Debug, Default, Clone)]
- struct Auction {
- price: f32,
- desc: String,
- gcal: String,
- thumb: String,
- remaining: i64,
- url: String,
- timestamp: i64,
- is_price_final: bool
- }
- #[derive(Serialize, Deserialize, Debug, Clone)]
- struct Query {
- url: String,
- auctions: Vec<Auction>,
- frequency: i64,
- }
- impl Query {
- fn run(&mut self) {
- self.auctions = parse_url(&self.url);
- }
- fn detect_frequency(&self) {
- let _a = &self.auctions
- .iter()
- .map(|x| x)
- .collect::<Vec<_>>();
- }
- }
- impl Default for Query {
- fn default () -> Query {
- Query{
- frequency: Duration::minutes(5).num_seconds(),
- url: "".to_string(),
- auctions: vec![]
- }
- }
- }
- #[derive(Serialize, Deserialize, Debug, Default)]
- struct Account {
- queries: Vec<Query>,
- name: String,
- }
- fn date_to_gcal(date: DateTime<Local>) -> String {
- format!("{}",
- date.format("%Y%m%dT%H%M00/%Y%m%dT%H%M00")
- )
- }
- /// Parse a relative offset "x Tage HH:MM" into proper time
- // fn parse_end_date(timestring: &String) -> Option<DateTime<Local>> {
- // match parse(&format!("{} minutes", timestring.replace(":", " hours ").replace("Tage", "days"))).ok() {
- // Some(old_duration) => match Duration::from_std(old_duration).ok() {
- // Some(chronoduration) => Some(Utc::now().with_timezone(&Local) + chronoduration),
- // None => None
- // }
- // None => None
- // }
- // }
- /// Parse a relative offset "x Tage HH:MM" into proper time
- fn parse_remaining(timestring: &String) -> Option<Duration> {
- match parse(&format!("{} minutes", timestring.replace(":", " hours ").replace("Tage", "days"))).ok() {
- Some(old_duration) => Duration::from_std(old_duration).ok(),
- None => None
- }
- }
- fn parse_price(price: &str) -> Option<f32> {
- price.to_string()
- .replace(".", "")
- .replace(",", ".")
- .replace(" EUR", "")
- .parse().ok()
- }
- fn parse_url(url: &str) -> Vec<Auction> {
- let mut auctions = vec![];
- if let Ok(mut resp) = reqwest::get(url) {
- if !resp.status().is_success() {
- return auctions;
- }
- let text = resp.text().unwrap_or("".to_string());
- for node in Document::from_read(text.as_bytes())
- .unwrap()
- // .find(Name("a"))
- // .filter(|n| n.attr("href").is_some())
- // .filter(|n| n.attr("href").unwrap().contains("item.php?id="))
- .find(Name("tr"))
- .filter(|x| x.attr("bgcolor").is_some())
- .filter(|x| x.attr("align").is_none())
- {
- // Get auction name
- if let Some(name) = node
- .children().into_iter()
- .filter(|x| x.name() == Some("td"))
- .filter(|x| x.attr("align") == Some("LEFT"))
- .flat_map(|x| x.children())
- .filter(|x| x.name() == Some("a"))
- .flat_map(|x| x.children())
- .map(|x| x.text())
- // .filter(|x| x.name() == Some("Text"))
- .collect::<Vec<_>>().get(0) {
- // If we have the name, go on finding other details
- // instantiate mutable Auction
- let mut auction = Auction::default();
- auction.desc = name.clone();
- auction.thumb = format!("http://egun.de/market/images/picture.gif");
- // get image
- if let Some(img) = node
- .children().into_iter()
- .filter(|x| x.name() == Some("td"))
- .filter(|x| x.attr("align") == Some("center"))
- .flat_map(|x| x.descendants())
- .filter(|x| x.name() == Some("img"))
- .filter(|x| match x.attr("src") {
- Some(src) => src.contains("cache"),
- None => false
- } )
- .map(|x| x.attr("src").unwrap()) // we just tested
- .collect::<Vec<_>>().get(0) {
- auction.thumb = format!("http://egun.de/market/{}", img);
- }
- // get price
- if let Some(price) = node
- .children()
- .filter(|x| x.text().contains("EUR"))
- .flat_map(|x| x.children())
- .map(|x| parse_price(&x.text()))
- .flat_map(|x| x)
- .collect::<Vec<_>>().get(0) {
- auction.price = price.clone();
- }
- // get article url
- if let Some(url) = node
- .children().into_iter()
- .filter(|x| x.name() == Some("td"))
- .filter(|x| x.attr("align") == Some("LEFT"))
- .flat_map(|x| x.children())
- .filter(|x| x.name() == Some("a"))
- .map(|x| x.attr("href"))
- .filter_map(|x| x)
- .collect::<Vec<_>>().get(0) {
- auction.url = format!("http://egun.de/market/{}", url);
- }
-
- // TODO: check if https://doc.rust-lang.org/std/time/struct.SystemTime.html works too
- if let Some(remaining) = parse_remaining(
- &node
- .children()
- .filter(|x| x.attr("align") == Some("center"))
- .filter(|x| x.attr("nowrap").is_some())
- .flat_map(|x| x.children())
- .filter(|x| !x.text().is_empty())
- .map(|x| x.text())
- .collect::<Vec<_>>()
- .join(" ")
- ){
- // dbg!(&t_remaining.children());
- let end_date = Utc::now().with_timezone(&Local) + remaining;
- auction.gcal = format!("http://www.google.com/calendar/event?action=TEMPLATE&dates={}&text={}&location=&details=", date_to_gcal(end_date), auction.desc);
- auction.remaining = remaining.num_seconds();
- auction.timestamp = end_date.timestamp();
- // println!("ENDS\t{:?}", date_to_gcal(remaining));
- }
- auctions.push(auction);
- }
- }
- }
- auctions
- }
- fn daemon(queries: Vec<Query>) {
-
-
- println!("Starting daemon with {}", queries.len());
- loop {
- let mut auctions = vec![];
- for mut query in queries.clone() {
- query.run();
- auctions.extend(query.auctions);
- }
- println!("{} auctions found", auctions.len());
- let writer = BufWriter::new(File::create("db.json").unwrap());
- serde_json::to_writer_pretty(writer, &auctions).unwrap();
- let pause = time::Duration::from_secs(300);
- thread::sleep(pause);
- }
- }
- fn main() {
-
- let reader = BufReader::new(File::open("urls.json").unwrap());
- let urls: Vec<String> = serde_json::from_reader(reader).unwrap_or(vec![]);
- daemon(
- urls.iter()
- .map(|x| Query {url: x.to_string(), ..Default::default()})
- .collect::<Vec<Query>>()
- );
- }
|