main.rs 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. #[macro_use]
  2. extern crate serde_derive;
  3. use reqwest;
  4. use chrono::{Utc, Duration, Local, DateTime};
  5. use select::document::Document;
  6. use select::predicate::Name;
  7. use parse_duration::parse;
  8. use serde_json;
  9. use std::io::BufWriter;
  10. use std::io::BufReader;
  11. use std::fs::File;
  12. use std::{thread, time};
  13. /// Fucking egun is a mess. It does not even use css and is built using tables. This is an attempt to parse it.
  14. #[derive(Serialize, Deserialize, Debug, Default, Clone)]
  15. struct Auction {
  16. price: f32,
  17. desc: String,
  18. gcal: String,
  19. thumb: String,
  20. remaining: i64,
  21. url: String,
  22. timestamp: i64,
  23. is_price_final: bool
  24. }
  25. #[derive(Serialize, Deserialize, Debug, Clone)]
  26. struct Query {
  27. url: String,
  28. auctions: Vec<Auction>,
  29. frequency: i64,
  30. }
  31. impl Query {
  32. fn run(&mut self) {
  33. self.auctions = parse_url(&self.url);
  34. }
  35. fn detect_frequency(&self) {
  36. let _a = &self.auctions
  37. .iter()
  38. .map(|x| x)
  39. .collect::<Vec<_>>();
  40. }
  41. }
  42. impl Default for Query {
  43. fn default () -> Query {
  44. Query{
  45. frequency: Duration::minutes(5).num_seconds(),
  46. url: "".to_string(),
  47. auctions: vec![]
  48. }
  49. }
  50. }
  51. #[derive(Serialize, Deserialize, Debug, Default)]
  52. struct Account {
  53. queries: Vec<Query>,
  54. name: String,
  55. }
  56. fn date_to_gcal(date: DateTime<Local>) -> String {
  57. format!("{}",
  58. date.format("%Y%m%dT%H%M00/%Y%m%dT%H%M00")
  59. )
  60. }
  61. /// Parse a relative offset "x Tage HH:MM" into proper time
  62. // fn parse_end_date(timestring: &String) -> Option<DateTime<Local>> {
  63. // match parse(&format!("{} minutes", timestring.replace(":", " hours ").replace("Tage", "days"))).ok() {
  64. // Some(old_duration) => match Duration::from_std(old_duration).ok() {
  65. // Some(chronoduration) => Some(Utc::now().with_timezone(&Local) + chronoduration),
  66. // None => None
  67. // }
  68. // None => None
  69. // }
  70. // }
  71. /// Parse a relative offset "x Tage HH:MM" into proper time
  72. fn parse_remaining(timestring: &String) -> Option<Duration> {
  73. match parse(&format!("{} minutes", timestring.replace(":", " hours ").replace("Tage", "days"))).ok() {
  74. Some(old_duration) => Duration::from_std(old_duration).ok(),
  75. None => None
  76. }
  77. }
  78. fn parse_price(price: &str) -> Option<f32> {
  79. price.to_string()
  80. .replace(".", "")
  81. .replace(",", ".")
  82. .replace(" EUR", "")
  83. .parse().ok()
  84. }
  85. fn parse_url(url: &str) -> Vec<Auction> {
  86. let mut auctions = vec![];
  87. if let Ok(mut resp) = reqwest::get(url) {
  88. if !resp.status().is_success() {
  89. return auctions;
  90. }
  91. let text = resp.text().unwrap_or("".to_string());
  92. for node in Document::from_read(text.as_bytes())
  93. .unwrap()
  94. // .find(Name("a"))
  95. // .filter(|n| n.attr("href").is_some())
  96. // .filter(|n| n.attr("href").unwrap().contains("item.php?id="))
  97. .find(Name("tr"))
  98. .filter(|x| x.attr("bgcolor").is_some())
  99. .filter(|x| x.attr("align").is_none())
  100. {
  101. // Get auction name
  102. if let Some(name) = node
  103. .children().into_iter()
  104. .filter(|x| x.name() == Some("td"))
  105. .filter(|x| x.attr("align") == Some("LEFT"))
  106. .flat_map(|x| x.children())
  107. .filter(|x| x.name() == Some("a"))
  108. .flat_map(|x| x.children())
  109. .map(|x| x.text())
  110. // .filter(|x| x.name() == Some("Text"))
  111. .collect::<Vec<_>>().get(0) {
  112. // If we have the name, go on finding other details
  113. // instantiate mutable Auction
  114. let mut auction = Auction::default();
  115. auction.desc = name.clone();
  116. auction.thumb = format!("http://egun.de/market/images/picture.gif");
  117. // get image
  118. if let Some(img) = node
  119. .children().into_iter()
  120. .filter(|x| x.name() == Some("td"))
  121. .filter(|x| x.attr("align") == Some("center"))
  122. .flat_map(|x| x.descendants())
  123. .filter(|x| x.name() == Some("img"))
  124. .filter(|x| match x.attr("src") {
  125. Some(src) => src.contains("cache"),
  126. None => false
  127. } )
  128. .map(|x| x.attr("src").unwrap()) // we just tested
  129. .collect::<Vec<_>>().get(0) {
  130. auction.thumb = format!("http://egun.de/market/{}", img);
  131. }
  132. // get price
  133. if let Some(price) = node
  134. .children()
  135. .filter(|x| x.text().contains("EUR"))
  136. .flat_map(|x| x.children())
  137. .map(|x| parse_price(&x.text()))
  138. .flat_map(|x| x)
  139. .collect::<Vec<_>>().get(0) {
  140. auction.price = price.clone();
  141. }
  142. // get article url
  143. if let Some(url) = node
  144. .children().into_iter()
  145. .filter(|x| x.name() == Some("td"))
  146. .filter(|x| x.attr("align") == Some("LEFT"))
  147. .flat_map(|x| x.children())
  148. .filter(|x| x.name() == Some("a"))
  149. .map(|x| x.attr("href"))
  150. .filter_map(|x| x)
  151. .collect::<Vec<_>>().get(0) {
  152. auction.url = format!("http://egun.de/market/{}", url);
  153. }
  154. // TODO: check if https://doc.rust-lang.org/std/time/struct.SystemTime.html works too
  155. if let Some(remaining) = parse_remaining(
  156. &node
  157. .children()
  158. .filter(|x| x.attr("align") == Some("center"))
  159. .filter(|x| x.attr("nowrap").is_some())
  160. .flat_map(|x| x.children())
  161. .filter(|x| !x.text().is_empty())
  162. .map(|x| x.text())
  163. .collect::<Vec<_>>()
  164. .join(" ")
  165. ){
  166. // dbg!(&t_remaining.children());
  167. let end_date = Utc::now().with_timezone(&Local) + remaining;
  168. auction.gcal = format!("http://www.google.com/calendar/event?action=TEMPLATE&dates={}&text={}&location=&details=", date_to_gcal(end_date), auction.desc);
  169. auction.remaining = remaining.num_seconds();
  170. auction.timestamp = end_date.timestamp();
  171. // println!("ENDS\t{:?}", date_to_gcal(remaining));
  172. }
  173. auctions.push(auction);
  174. }
  175. }
  176. }
  177. auctions
  178. }
  179. fn daemon(queries: Vec<Query>) {
  180. println!("Starting daemon with {}", queries.len());
  181. loop {
  182. let mut auctions = vec![];
  183. for mut query in queries.clone() {
  184. query.run();
  185. auctions.extend(query.auctions);
  186. }
  187. println!("{} auctions found", auctions.len());
  188. let writer = BufWriter::new(File::create("db.json").unwrap());
  189. serde_json::to_writer_pretty(writer, &auctions).unwrap();
  190. let pause = time::Duration::from_secs(300);
  191. thread::sleep(pause);
  192. }
  193. }
  194. fn main() {
  195. let reader = BufReader::new(File::open("urls.json").unwrap());
  196. let urls: Vec<String> = serde_json::from_reader(reader).unwrap_or(vec![]);
  197. daemon(
  198. urls.iter()
  199. .map(|x| Query {url: x.to_string(), ..Default::default()})
  200. .collect::<Vec<Query>>()
  201. );
  202. }