1 module dcc.engine.tribune;
2 
3 private import dcc.common;
4 
5 private import std.net.curl;
6 private import std.xml;
7 private import std.signals;
8 private import std.datetime;
9 private import std.conv;
10 private import std.stdio;
11 private import std.string;
12 private import std.algorithm;
13 private import std.uri;
14 private import std.array;
15 private import std.regex : regex, replace, ctRegex, matchAll;
16 
17 private import std.concurrency : send, receive;
18 
19 private static import std.regex;
20 
21 import core.time;
22 
23 static auto CLOCK_REGEX = std.regex.regex(
24 	`(?P<time>`		// Time part: HH:MM[:SS]
25 		~`(?:`
26 			~`(?:[01]?[0-9])|(?:2[0-3])`	// Hour (00-23)
27 		~`)`
28 		~`:`
29 		~`(?:[0-5][0-9])`					// Minute (00-59)
30 		~`(?::(?:[0-5][0-9]))?`				// Optional seconds (00-59)
31 	~`)`
32 	~`(?P<index>`	// Optional index part: ¹²³, :n, or ^n
33 		~`(?:(?:[:\^][0-9])|¹|²|³)?`
34 	~`)`
35 	~`(?P<tribune>`	// Optional tribune part: @tribunename
36 		~`(?:@[A-Za-z]*)?`
37 	~`)`
38 );
39 
40 version (GNU) {
41 	// GDC seems to have problems with Unicode classes.
42 	static auto CONTROL_CHARS_REGEX = std.regex.regex(`[\x00-\x1F]`, "g");
43 } else {
44 	static auto CONTROL_CHARS_REGEX = std.regex.regex(`\p{Control}`, "g");
45 }
46 
47 class Tribune {
48 	string name;
49 	string[] aliases;
50 	string post_url;
51 	string post_format;
52 	string backend_url;
53 	string cookie;
54 	string ua;
55 	int refresh;
56 	bool tags_encoded;
57 	string color;
58 	string login;
59 
60 	Duration time_offset;
61 	bool unreliable_date = false;
62 	SysTime last_update;
63 	string backend_type = "xml";
64 
65 	Post[string] posts;
66 	mixin Signal!(Post) new_post;
67 
68 	string last_posted_id;
69 
70 	this(string backend_url, bool tags_encoded) {
71 		this.backend_url = backend_url;
72 		this.tags_encoded = tags_encoded;
73 	}
74 
75 	this(string name, string[] aliases, string post_url, string post_format, string backend_url, string cookie, string ua, int refresh, bool tags_encoded, string color, string login) {
76 		this.name = name;
77 		this.aliases = aliases;
78 		this.post_url = post_url;
79 		this.post_format = post_format;
80 		this.backend_url = backend_url;
81 		this.cookie = cookie;
82 		this.ua = ua;
83 		this.refresh = refresh;
84 		this.tags_encoded = tags_encoded;
85 		this.color = color;
86 		this.login = login;
87 	}
88 
89 	bool matches_name(string name) {
90 		if (name == this.name) {
91 			return true;
92 		}
93 
94 		foreach (string a ; this.aliases) {
95 			if (name == a) {
96 				return true;
97 			}
98 		}
99 
100 		return false;
101 	}
102 
103 	bool fetch_posts() {
104 		string backend = this.fetch_backend();
105 		this.last_update = std.datetime.Clock.currTime(UTC());
106 		Post[] posts = this.parse_backend(backend).values;
107 		posts.sort!((a, b) => a.post_id < b.post_id);
108 
109 		bool a = false;
110 		// Let's insert the new posts and keep track of their ids.
111 		string[] new_ids;
112 		Post last_post;
113 		foreach (Post post; posts) {
114 			if (post.post_id !in this.posts) {
115 				new_ids ~= post.post_id;
116 				this.posts[post.post_id] = post;
117 
118 				if (last_post !is null && post.clock == last_post.clock) {
119 					if (last_post.index == 0) {
120 						last_post.index = 1;
121 					}
122 
123 					post.index = last_post.index + 1;
124 				}
125 
126 				last_post = post;
127 			}
128 		}
129 
130 		// Hashtables have no sort order, so sort the new ids.
131 		new_ids.sort();
132 
133 		// Now we can emit a new_post signal for each post.
134 		foreach (string id ; new_ids) {
135 			Post post = this.posts[id];
136 			this.new_post.emit(this.posts[id]);
137 		}
138 
139 		return true;
140 	}
141 
142 	Post[string] parse_backend(string source) {
143 		switch (this.backend_type) {
144 			case "tsv":
145 				return this.parse_backend_tsv(source);
146 			case "xml":
147 			default:
148 				return this.parse_backend_xml(source);
149 		}
150 	}
151 
152 	Post[string] parse_backend_tsv(string source) {
153 		Post[string] posts;
154 
155 		foreach (string line; source.splitter('\n')) {
156 			auto fields = line.splitter('\t').array;
157 
158 			if (fields.length == 5) {
159 				Post post = new Post();
160 				post.tribune = this;
161 
162 				post.post_id   = fields[0];
163 				post.timestamp = fields[1];
164 				post.info      = fields[2];
165 				post.login     = fields[3];
166 				post.message   = fields[4];
167 
168 				if (post.post_id == this.last_posted_id) {
169 					post.mine = true;
170 				}
171 
172 				posts[post.post_id] = post;
173 			}
174 		}
175 
176 		return posts;
177 	}
178 
179 	Post[string] parse_backend_xml(string source) {
180 		//check(source);
181 		// TODO: error handling
182 
183 		auto xml = new DocumentParser(source);
184 
185 		Post[string] posts;
186 
187 		xml.onStartTag["post"] = (ElementParser xml) {
188 			Post post = new Post();
189 			post.tribune = this;
190 			post.post_id = xml.tag.attr["id"];
191 			if (post.post_id == this.last_posted_id) {
192 				post.mine = true;
193 			}
194 			post.timestamp = xml.tag.attr["time"];
195 			xml.onEndTag["info"]    = (in Element e) {
196 				post.info = replace(e.text().strip(), CONTROL_CHARS_REGEX, " ");
197 				post.info = this.tags_cleanup(post.info);
198 			};
199 			xml.onEndTag["message"] = (in Element e) {
200 				post.message = replace(e.text().strip(), CONTROL_CHARS_REGEX, " ");
201 
202 				if (this.tags_encoded) {
203 					post.message = this.tags_decode(post.message);
204 				}
205 
206 				post.message = this.tags_cleanup(post.message);
207 			};
208 			xml.onEndTag["login"]   = (in Element e) {
209 				post.login = replace(e.text().strip(), CONTROL_CHARS_REGEX, " ");
210 				post.login = this.tags_cleanup(post.login);
211 			};
212 
213 			xml.parse();
214 
215 			delete xml;
216 
217 			posts[post.post_id] = post;
218 		};
219 
220 		xml.parse();
221 
222 		delete xml;
223 
224 		return posts;
225 	}
226 
227 	string tags_decode(string source) {
228 		source = std.xml.decode(source);
229 		return source;
230 	}
231 
232 	string tags_cleanup(string source) {
233 		source = source.replace(regex(`<clock[^>]*>`, "g"), "");
234 		source = std.array.replace(source, `</clock>`, "");
235 		source = std.array.replace(source, `<![CDATA[`, "");
236 		source = std.array.replace(source, `]]>`, "");
237 		return source;
238 	}
239 
240 	string fetch_backend() {
241 		auto connection = HTTP();
242 		connection.addRequestHeader("User-Agent", "DCoinCoin/" ~ VERSION);
243 		connection.operationTimeout(2.seconds);
244 
245 		ubyte[] backend;
246 		try {
247 			backend = get!(HTTP, ubyte)(this.backend_url, connection);
248 
249 			if ("content-type" in connection.responseHeaders) {
250 				if (!connection.responseHeaders["content-type"].find("text/tab-separated-values").empty) {
251 					this.backend_type = "tsv";
252 				}
253 			}
254 
255 			if (!this.unreliable_date && "date" in connection.responseHeaders) {
256 				try {
257 					SysTime now = std.datetime.Clock.currTime(UTC());
258 					SysTime tribuneTime = parseRFC822DateTime(connection.responseHeaders["date"]);
259 
260 					this.time_offset = now - tribuneTime;
261 				} catch (DateTimeException e) {
262 				}
263 			}
264 		} catch (CurlException e) {
265 			return "";
266 		}
267 
268 		if (backend.length > 0) {
269 			return cast(string)backend;
270 		} else {
271 			return "";
272 		}
273 	}
274 
275 	bool post(string message) {
276 		auto connection = HTTP();
277 		connection.addRequestHeader("User-Agent", std.array.replace(this.ua, "%v", VERSION));
278 		connection.addRequestHeader("Referer", this.backend_url);
279 		connection.operationTimeout(2.seconds);
280 
281 		if (this.cookie.length) {
282 			connection.addRequestHeader("Cookie", this.cookie);
283 		}
284 
285 		string data = std.array.replace(this.post_format, "%s", message.encodeComponent());
286 		try {
287 			std.net.curl.post(this.post_url, data, connection);
288 			if ("x-post-id" in connection.responseHeaders) {
289 				this.last_posted_id = connection.responseHeaders["x-post-id"];
290 			}
291 		} catch (CurlException e) {
292 			return false;
293 		}
294 
295 		return connection.statusLine.code < 300;
296 	}
297 }
298 
299 struct Clock {
300 	string time;
301 	int index;
302 	string tribune;
303 	string text;
304 	Post post;
305 }
306 
307 class Post {
308 	string post_id;
309 	string _timestamp;
310 	SysTime time;
311 	SysTime real_time;
312 
313 	string info = "";
314 	string message = "";
315 	string login = "";
316 
317 	int index = 0;
318 
319 	Tribune tribune;
320 
321 	bool _mine;
322 
323 	override string toString() {
324 		return this.clock ~ " " ~ this.login ~ "> " ~ this.message;
325 	}
326 
327 	void mine(bool mine) {
328 		this._mine = mine;
329 	}
330 
331 	bool mine() {
332 		if (this._mine) {
333 			return this._mine;
334 		}
335 
336 		if (this.login.length && this.login == this.tribune.login) {
337 			return true;
338 		}
339 
340 		if (this.info.length && this.info == this.tribune.ua) {
341 			return true;
342 		}
343 
344 		return false;
345 	}
346 
347 	Clock[] clocks() {
348 		return this.analyze_clocks();
349 	}
350 
351 	Clock[] analyze_clocks() {
352 		Clock[] clocks;
353 
354 		if (auto match = this.message.matchAll(CLOCK_REGEX)) {
355 			while (!match.empty) {
356 				auto capture = match.front;
357 
358 				int index = 1;
359 
360 				if (capture["index"].length > 0) switch (to!dstring(capture["index"])[0]) {
361 					case ':':
362 					case '^':
363 						try {
364 							index = to!int(capture["index"][1 .. $]);
365 						}
366 						catch (Exception e) {
367 							// Let's keep index to 1.
368 						}
369 						break;
370 					case '¹': index = 1; break;
371 					case '²': index = 2; break;
372 					case '³': index = 3; break;
373 					default: break;
374 				}
375 
376 				string clock_tribune = this.tribune.name;
377 				if (capture["tribune"].length > 0) {
378 					clock_tribune = capture["tribune"][1 .. $].dup;
379 				}
380 				clocks ~= Clock(capture["time"].dup, index, clock_tribune, capture.hit.dup, this);
381 
382 				match.popFront();
383 			}
384 		}
385 
386 		return clocks;
387 	}
388 
389 	string timestamp() {
390 		return this._timestamp;
391 	}
392 
393 	void timestamp(string s) {
394 		this._timestamp = s;
395 
396 		if (s.length == 14) {
397 			int year   = to!int(s[0..4]);
398 			int month  = to!int(s[4..6]);
399 			int day    = to!int(s[6..8]);
400 			int hour   = to!int(s[8..10]);
401 			int minute = to!int(s[10..12]);
402 			int second = to!int(s[12..14]);
403 
404 			this.time = SysTime(DateTime(year, month, day, hour, minute, second));
405 			this.real_time = this.time + this.tribune.time_offset;
406 		}
407 	}
408 
409 	string clock() {
410 		return format("%02s:%02s:%02s", this.time.hour, this.time.minute, this.time.second);
411 	}
412 
413 	string tribune_time() {
414 		return format("%04d%02d%02d%02d%02d%02d", this.time.year, this.time.month, this.time.day, this.time.hour, this.time.minute, this.time.second);
415 	}
416 
417 	string unicodeClock() {
418 		switch (this.time.hour) {
419 			case 0:
420 			case 12:
421 				return this.time.minute < 30 ? "🕛" : "🕧";
422 			case 1:
423 			case 13:
424 				return this.time.minute < 30 ? "🕐" : "🕜";
425 			case 2:
426 			case 14:
427 				return this.time.minute < 30 ? "🕑" : "🕝";
428 			case 3:
429 			case 15:
430 				return this.time.minute < 30 ? "🕒" : "🕞";
431 			case 4:
432 			case 16:
433 				return this.time.minute < 30 ? "🕓" : "🕟";
434 			case 5:
435 			case 17:
436 				return this.time.minute < 30 ? "🕔" : "🕠";
437 			case 6:
438 			case 18:
439 				return this.time.minute < 30 ? "🕕" : "🕡";
440 			case 7:
441 			case 19:
442 				return this.time.minute < 30 ? "🕖" : "🕢";
443 			case 8:
444 			case 20:
445 				return this.time.minute < 30 ? "🕗" : "🕣";
446 			case 9:
447 			case 21:
448 				return this.time.minute < 30 ? "🕘" : "🕤";
449 			case 10:
450 			case 22:
451 				return this.time.minute < 30 ? "🕙" : "🕥";
452 			case 11:
453 			case 23:
454 				return this.time.minute < 30 ? "🕚" : "🕦";
455 			default:
456 				return "🕓";
457 		}
458 	}
459 
460 	string clock_ref() {
461 		string clock = this.clock;
462 
463 		switch (this.index) {
464 			case 0: break;
465 			case 1: clock ~= "¹"; break;
466 			case 2: clock ~= "²"; break;
467 			case 3: clock ~= "³"; break;
468 			default:
469 				clock ~= ":" ~ to!string(this.index);
470 				break;
471 		}
472 
473 		return clock;
474 	}
475 
476 	bool matches_clock(Clock clock) {
477 		if (clock.tribune == "" && this.tribune != clock.post.tribune) {
478 			return false;
479 		}
480 
481 		if (clock.tribune != "" && !this.tribune.matches_name(clock.tribune)) {
482 			return false;
483 		}
484 
485 		if (clock.text == this.clock_ref) {
486 			return true;
487 		}
488 
489 		if (clock.text.length == 5 && clock.text == this.clock[0 .. 5]) {
490 			return true;
491 		}
492 
493 		if (clock.time == this.clock && (clock.index == this.index || (clock.index == 1 && this.index == 0))) {
494 			return true;
495 		}
496 
497 		if (clock.time == format("%02s:%02s", this.time.hour, this.time.minute) && clock.index == this.index) {
498 			return true;
499 		}
500 
501 		return false;
502 	}
503 
504 	string short_info() {
505 		auto max = min(10, this.info.length);
506 		return this.info[0 .. max];
507 	}
508 }
509