parse_date(): fix parsing 03/10/2006
[git/git.git] / date.c
1 /*
2 * GIT - The information manager from hell
3 *
4 * Copyright (C) Linus Torvalds, 2005
5 */
6
7 #include <time.h>
8 #include <sys/time.h>
9
10 #include "cache.h"
11
12 static time_t my_mktime(struct tm *tm)
13 {
14 static const int mdays[] = {
15 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
16 };
17 int year = tm->tm_year - 70;
18 int month = tm->tm_mon;
19 int day = tm->tm_mday;
20
21 if (year < 0 || year > 129) /* algo only works for 1970-2099 */
22 return -1;
23 if (month < 0 || month > 11) /* array bounds */
24 return -1;
25 if (month < 2 || (year + 2) % 4)
26 day--;
27 return (year * 365 + (year + 1) / 4 + mdays[month] + day) * 24*60*60UL +
28 tm->tm_hour * 60*60 + tm->tm_min * 60 + tm->tm_sec;
29 }
30
31 static const char *month_names[] = {
32 "January", "February", "March", "April", "May", "June",
33 "July", "August", "September", "October", "November", "December"
34 };
35
36 static const char *weekday_names[] = {
37 "Sundays", "Mondays", "Tuesdays", "Wednesdays", "Thursdays", "Fridays", "Saturdays"
38 };
39
40 /*
41 * The "tz" thing is passed in as this strange "decimal parse of tz"
42 * thing, which means that tz -0100 is passed in as the integer -100,
43 * even though it means "sixty minutes off"
44 */
45 const char *show_date(unsigned long time, int tz)
46 {
47 struct tm *tm;
48 time_t t;
49 static char timebuf[200];
50 int minutes;
51
52 minutes = tz < 0 ? -tz : tz;
53 minutes = (minutes / 100)*60 + (minutes % 100);
54 minutes = tz < 0 ? -minutes : minutes;
55 t = time + minutes * 60;
56 tm = gmtime(&t);
57 if (!tm)
58 return NULL;
59 sprintf(timebuf, "%.3s %.3s %d %02d:%02d:%02d %d %+05d",
60 weekday_names[tm->tm_wday],
61 month_names[tm->tm_mon],
62 tm->tm_mday,
63 tm->tm_hour, tm->tm_min, tm->tm_sec,
64 tm->tm_year + 1900, tz);
65 return timebuf;
66 }
67
68 /*
69 * Check these. And note how it doesn't do the summer-time conversion.
70 *
71 * In my world, it's always summer, and things are probably a bit off
72 * in other ways too.
73 */
74 static const struct {
75 const char *name;
76 int offset;
77 int dst;
78 } timezone_names[] = {
79 { "IDLW", -12, 0, }, /* International Date Line West */
80 { "NT", -11, 0, }, /* Nome */
81 { "CAT", -10, 0, }, /* Central Alaska */
82 { "HST", -10, 0, }, /* Hawaii Standard */
83 { "HDT", -10, 1, }, /* Hawaii Daylight */
84 { "YST", -9, 0, }, /* Yukon Standard */
85 { "YDT", -9, 1, }, /* Yukon Daylight */
86 { "PST", -8, 0, }, /* Pacific Standard */
87 { "PDT", -8, 1, }, /* Pacific Daylight */
88 { "MST", -7, 0, }, /* Mountain Standard */
89 { "MDT", -7, 1, }, /* Mountain Daylight */
90 { "CST", -6, 0, }, /* Central Standard */
91 { "CDT", -6, 1, }, /* Central Daylight */
92 { "EST", -5, 0, }, /* Eastern Standard */
93 { "EDT", -5, 1, }, /* Eastern Daylight */
94 { "AST", -3, 0, }, /* Atlantic Standard */
95 { "ADT", -3, 1, }, /* Atlantic Daylight */
96 { "WAT", -1, 0, }, /* West Africa */
97
98 { "GMT", 0, 0, }, /* Greenwich Mean */
99 { "UTC", 0, 0, }, /* Universal (Coordinated) */
100
101 { "WET", 0, 0, }, /* Western European */
102 { "BST", 0, 1, }, /* British Summer */
103 { "CET", +1, 0, }, /* Central European */
104 { "MET", +1, 0, }, /* Middle European */
105 { "MEWT", +1, 0, }, /* Middle European Winter */
106 { "MEST", +1, 1, }, /* Middle European Summer */
107 { "CEST", +1, 1, }, /* Central European Summer */
108 { "MESZ", +1, 1, }, /* Middle European Summer */
109 { "FWT", +1, 0, }, /* French Winter */
110 { "FST", +1, 1, }, /* French Summer */
111 { "EET", +2, 0, }, /* Eastern Europe, USSR Zone 1 */
112 { "EEST", +2, 1, }, /* Eastern European Daylight */
113 { "WAST", +7, 0, }, /* West Australian Standard */
114 { "WADT", +7, 1, }, /* West Australian Daylight */
115 { "CCT", +8, 0, }, /* China Coast, USSR Zone 7 */
116 { "JST", +9, 0, }, /* Japan Standard, USSR Zone 8 */
117 { "EAST", +10, 0, }, /* Eastern Australian Standard */
118 { "EADT", +10, 1, }, /* Eastern Australian Daylight */
119 { "GST", +10, 0, }, /* Guam Standard, USSR Zone 9 */
120 { "NZT", +11, 0, }, /* New Zealand */
121 { "NZST", +11, 0, }, /* New Zealand Standard */
122 { "NZDT", +11, 1, }, /* New Zealand Daylight */
123 { "IDLE", +12, 0, }, /* International Date Line East */
124 };
125
126 #define NR_TZ (sizeof(timezone_names) / sizeof(timezone_names[0]))
127
128 static int match_string(const char *date, const char *str)
129 {
130 int i = 0;
131
132 for (i = 0; *date; date++, str++, i++) {
133 if (*date == *str)
134 continue;
135 if (toupper(*date) == toupper(*str))
136 continue;
137 if (!isalnum(*date))
138 break;
139 return 0;
140 }
141 return i;
142 }
143
144 static int skip_alpha(const char *date)
145 {
146 int i = 0;
147 do {
148 i++;
149 } while (isalpha(date[i]));
150 return i;
151 }
152
153 /*
154 * Parse month, weekday, or timezone name
155 */
156 static int match_alpha(const char *date, struct tm *tm, int *offset)
157 {
158 int i;
159
160 for (i = 0; i < 12; i++) {
161 int match = match_string(date, month_names[i]);
162 if (match >= 3) {
163 tm->tm_mon = i;
164 return match;
165 }
166 }
167
168 for (i = 0; i < 7; i++) {
169 int match = match_string(date, weekday_names[i]);
170 if (match >= 3) {
171 tm->tm_wday = i;
172 return match;
173 }
174 }
175
176 for (i = 0; i < NR_TZ; i++) {
177 int match = match_string(date, timezone_names[i].name);
178 if (match >= 3) {
179 int off = timezone_names[i].offset;
180
181 /* This is bogus, but we like summer */
182 off += timezone_names[i].dst;
183
184 /* Only use the tz name offset if we don't have anything better */
185 if (*offset == -1)
186 *offset = 60*off;
187
188 return match;
189 }
190 }
191
192 if (match_string(date, "PM") == 2) {
193 if (tm->tm_hour > 0 && tm->tm_hour < 12)
194 tm->tm_hour += 12;
195 return 2;
196 }
197
198 /* BAD CRAP */
199 return skip_alpha(date);
200 }
201
202 static int is_date(int year, int month, int day, struct tm *tm)
203 {
204 if (month > 0 && month < 13 && day > 0 && day < 32) {
205 if (year == -1) {
206 tm->tm_mon = month-1;
207 tm->tm_mday = day;
208 return 1;
209 }
210 if (year >= 1970 && year < 2100) {
211 year -= 1900;
212 } else if (year > 70 && year < 100) {
213 /* ok */
214 } else if (year < 38) {
215 year += 100;
216 } else
217 return 0;
218
219 tm->tm_mon = month-1;
220 tm->tm_mday = day;
221 tm->tm_year = year;
222 return 1;
223 }
224 return 0;
225 }
226
227 static int match_multi_number(unsigned long num, char c, const char *date, char *end, struct tm *tm)
228 {
229 long num2, num3;
230
231 num2 = strtol(end+1, &end, 10);
232 num3 = -1;
233 if (*end == c && isdigit(end[1]))
234 num3 = strtol(end+1, &end, 10);
235
236 /* Time? Date? */
237 switch (c) {
238 case ':':
239 if (num3 < 0)
240 num3 = 0;
241 if (num < 25 && num2 >= 0 && num2 < 60 && num3 >= 0 && num3 <= 60) {
242 tm->tm_hour = num;
243 tm->tm_min = num2;
244 tm->tm_sec = num3;
245 break;
246 }
247 return 0;
248
249 case '-':
250 case '/':
251 if (num > 70) {
252 /* yyyy-mm-dd? */
253 if (is_date(num, num2, num3, tm))
254 break;
255 /* yyyy-dd-mm? */
256 if (is_date(num, num3, num2, tm))
257 break;
258 }
259 /* mm/dd/yy ? */
260 if (is_date(num3, num, num2, tm))
261 break;
262 /* dd/mm/yy ? */
263 if (is_date(num3, num2, num, tm))
264 break;
265 return 0;
266 }
267 return end - date;
268 }
269
270 /*
271 * We've seen a digit. Time? Year? Date?
272 */
273 static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt)
274 {
275 int n;
276 char *end;
277 unsigned long num;
278
279 num = strtoul(date, &end, 10);
280
281 /*
282 * Seconds since 1970? We trigger on that for anything after Jan 1, 2000
283 */
284 if (num > 946684800) {
285 time_t time = num;
286 if (gmtime_r(&time, tm)) {
287 *tm_gmt = 1;
288 return end - date;
289 }
290 }
291
292 /*
293 * Check for special formats: num[:-/]num[same]num
294 */
295 switch (*end) {
296 case ':':
297 case '/':
298 case '-':
299 if (isdigit(end[1])) {
300 int match = match_multi_number(num, *end, date, end, tm);
301 if (match)
302 return match;
303 }
304 }
305
306 /*
307 * None of the special formats? Try to guess what
308 * the number meant. We use the number of digits
309 * to make a more educated guess..
310 */
311 n = 0;
312 do {
313 n++;
314 } while (isdigit(date[n]));
315
316 /* Four-digit year or a timezone? */
317 if (n == 4) {
318 if (num <= 1200 && *offset == -1) {
319 unsigned int minutes = num % 100;
320 unsigned int hours = num / 100;
321 *offset = hours*60 + minutes;
322 } else if (num > 1900 && num < 2100)
323 tm->tm_year = num - 1900;
324 return n;
325 }
326
327 /*
328 * NOTE! We will give precedence to day-of-month over month or
329 * year numbers in the 1-12 range. So 05 is always "mday 5",
330 * unless we already have a mday..
331 *
332 * IOW, 01 Apr 05 parses as "April 1st, 2005".
333 */
334 if (num > 0 && num < 32 && tm->tm_mday < 0) {
335 tm->tm_mday = num;
336 return n;
337 }
338
339 /* Two-digit year? */
340 if (n == 2 && tm->tm_year < 0) {
341 if (num < 10 && tm->tm_mday >= 0) {
342 tm->tm_year = num + 100;
343 return n;
344 }
345 if (num >= 70) {
346 tm->tm_year = num;
347 return n;
348 }
349 }
350
351 if (num > 0 && num < 32) {
352 tm->tm_mday = num;
353 } else if (num > 1900) {
354 tm->tm_year = num - 1900;
355 } else if (num > 70) {
356 tm->tm_year = num;
357 } else if (num > 0 && num < 13) {
358 tm->tm_mon = num-1;
359 }
360
361 return n;
362 }
363
364 static int match_tz(const char *date, int *offp)
365 {
366 char *end;
367 int offset = strtoul(date+1, &end, 10);
368 int min, hour;
369 int n = end - date - 1;
370
371 min = offset % 100;
372 hour = offset / 100;
373
374 /*
375 * Don't accept any random crap.. At least 3 digits, and
376 * a valid minute. We might want to check that the minutes
377 * are divisible by 30 or something too.
378 */
379 if (min < 60 && n > 2) {
380 offset = hour*60+min;
381 if (*date == '-')
382 offset = -offset;
383
384 *offp = offset;
385 }
386 return end - date;
387 }
388
389 static int date_string(unsigned long date, int offset, char *buf, int len)
390 {
391 int sign = '+';
392
393 if (offset < 0) {
394 offset = -offset;
395 sign = '-';
396 }
397 return snprintf(buf, len, "%lu %c%02d%02d", date, sign, offset / 60, offset % 60);
398 }
399
400 /* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
401 (i.e. English) day/month names, and it doesn't work correctly with %z. */
402 int parse_date(const char *date, char *result, int maxlen)
403 {
404 struct tm tm;
405 int offset, tm_gmt;
406 time_t then;
407
408 memset(&tm, 0, sizeof(tm));
409 tm.tm_year = -1;
410 tm.tm_mon = -1;
411 tm.tm_mday = -1;
412 tm.tm_isdst = -1;
413 offset = -1;
414 tm_gmt = 0;
415
416 for (;;) {
417 int match = 0;
418 unsigned char c = *date;
419
420 /* Stop at end of string or newline */
421 if (!c || c == '\n')
422 break;
423
424 if (isalpha(c))
425 match = match_alpha(date, &tm, &offset);
426 else if (isdigit(c))
427 match = match_digit(date, &tm, &offset, &tm_gmt);
428 else if ((c == '-' || c == '+') && isdigit(date[1]))
429 match = match_tz(date, &offset);
430
431 if (!match) {
432 /* BAD CRAP */
433 match = 1;
434 }
435
436 date += match;
437 }
438
439 /* mktime uses local timezone */
440 then = my_mktime(&tm);
441 if (offset == -1)
442 offset = (then - mktime(&tm)) / 60;
443
444 if (then == -1)
445 return -1;
446
447 if (!tm_gmt)
448 then -= offset * 60;
449 return date_string(then, offset, result, maxlen);
450 }
451
452 void datestamp(char *buf, int bufsize)
453 {
454 time_t now;
455 int offset;
456
457 time(&now);
458
459 offset = my_mktime(localtime(&now)) - now;
460 offset /= 60;
461
462 date_string(now, offset, buf, bufsize);
463 }
464
465 static void update_tm(struct tm *tm, unsigned long sec)
466 {
467 time_t n = mktime(tm) - sec;
468 localtime_r(&n, tm);
469 }
470
471 static void date_yesterday(struct tm *tm, int *num)
472 {
473 update_tm(tm, 24*60*60);
474 }
475
476 static void date_time(struct tm *tm, int hour)
477 {
478 if (tm->tm_hour < hour)
479 date_yesterday(tm, NULL);
480 tm->tm_hour = hour;
481 tm->tm_min = 0;
482 tm->tm_sec = 0;
483 }
484
485 static void date_midnight(struct tm *tm, int *num)
486 {
487 date_time(tm, 0);
488 }
489
490 static void date_noon(struct tm *tm, int *num)
491 {
492 date_time(tm, 12);
493 }
494
495 static void date_tea(struct tm *tm, int *num)
496 {
497 date_time(tm, 17);
498 }
499
500 static const struct special {
501 const char *name;
502 void (*fn)(struct tm *, int *);
503 } special[] = {
504 { "yesterday", date_yesterday },
505 { "noon", date_noon },
506 { "midnight", date_midnight },
507 { "tea", date_tea },
508 { NULL }
509 };
510
511 static const char *number_name[] = {
512 "zero", "one", "two", "three", "four",
513 "five", "six", "seven", "eight", "nine", "ten",
514 };
515
516 static const struct typelen {
517 const char *type;
518 int length;
519 } typelen[] = {
520 { "seconds", 1 },
521 { "minutes", 60 },
522 { "hours", 60*60 },
523 { "days", 24*60*60 },
524 { "weeks", 7*24*60*60 },
525 { NULL }
526 };
527
528 static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
529 {
530 const struct typelen *tl;
531 const struct special *s;
532 const char *end = date;
533 int n = 1, i;
534
535 while (isalpha(*++end))
536 n++;
537
538 for (i = 0; i < 12; i++) {
539 int match = match_string(date, month_names[i]);
540 if (match >= 3) {
541 tm->tm_mon = i;
542 return end;
543 }
544 }
545
546 for (s = special; s->name; s++) {
547 int len = strlen(s->name);
548 if (match_string(date, s->name) == len) {
549 s->fn(tm, num);
550 return end;
551 }
552 }
553
554 if (!*num) {
555 for (i = 1; i < 11; i++) {
556 int len = strlen(number_name[i]);
557 if (match_string(date, number_name[i]) == len) {
558 *num = i;
559 return end;
560 }
561 }
562 if (match_string(date, "last") == 4)
563 *num = 1;
564 return end;
565 }
566
567 tl = typelen;
568 while (tl->type) {
569 int len = strlen(tl->type);
570 if (match_string(date, tl->type) >= len-1) {
571 update_tm(tm, tl->length * *num);
572 *num = 0;
573 return end;
574 }
575 tl++;
576 }
577
578 for (i = 0; i < 7; i++) {
579 int match = match_string(date, weekday_names[i]);
580 if (match >= 3) {
581 int diff, n = *num -1;
582 *num = 0;
583
584 diff = tm->tm_wday - i;
585 if (diff <= 0)
586 n++;
587 diff += 7*n;
588
589 update_tm(tm, diff * 24 * 60 * 60);
590 return end;
591 }
592 }
593
594 if (match_string(date, "months") >= 5) {
595 int n = tm->tm_mon - *num;
596 *num = 0;
597 while (n < 0) {
598 n += 12;
599 tm->tm_year--;
600 }
601 tm->tm_mon = n;
602 return end;
603 }
604
605 if (match_string(date, "years") >= 4) {
606 tm->tm_year -= *num;
607 *num = 0;
608 return end;
609 }
610
611 return end;
612 }
613
614 unsigned long approxidate(const char *date)
615 {
616 int number = 0;
617 struct tm tm, now;
618 struct timeval tv;
619 char buffer[50];
620
621 if (parse_date(date, buffer, sizeof(buffer)) > 0)
622 return strtoul(buffer, NULL, 10);
623
624 gettimeofday(&tv, NULL);
625 localtime_r(&tv.tv_sec, &tm);
626 now = tm;
627 for (;;) {
628 unsigned char c = *date;
629 if (!c)
630 break;
631 date++;
632 if (isdigit(c)) {
633 char *end;
634 number = strtoul(date-1, &end, 10);
635 date = end;
636 continue;
637 }
638 if (isalpha(c))
639 date = approxidate_alpha(date-1, &tm, &number);
640 }
641 if (number > 0 && number < 32)
642 tm.tm_mday = number;
643 if (tm.tm_mon > now.tm_mon && tm.tm_year == now.tm_year)
644 tm.tm_year--;
645 return mktime(&tm);
646 }