/*
** Modular Logfile Analyzer
** Copyright 2000 Jan Kneschke <jan@kneschke.de>
**
** Homepage: http://www.modlogan.org
**

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA

**
** $Id: parse.c,v 1.26 2004/03/18 02:31:50 ostborn Exp $
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <errno.h>

#include "config.h"

#include "mlocale.h"
#include "mplugins.h"
#include "mrecord.h"
#include "mdatatypes.h"
#include "misc.h"

#include "datatypes/query/datatype.h"

#include "plugin_config.h"

int find_os (mconfig *ext_conf, char *str) {
	config_input *conf = ext_conf->plugin_conf;
	mlist *l = conf->match_os;
	int str_len;
	if (!str || !l) return 0;

	while (*str == ' ') str++;
	
	str_len = strlen(str);

	for (l = conf->match_os; l; l = l->next) {
		mdata *data = l->data;

		if (data && strmatch(data->data.match.match, NULL, str, str_len)) {
			return 1;
		}
	}

	return 0;
}

int find_ua (mconfig *ext_conf, char *str) {
	config_input *conf = ext_conf->plugin_conf;
	mlist *l = conf->match_ua;
	int str_len;
	
	if (!str || !l) return 0;

	while (*str == ' ') str++;
	
	str_len = strlen(str);

	for (l = conf->match_ua; l; l = l->next) {
		mdata *data = l->data;

		if (data && strmatch(data->data.match.match, NULL, str, str_len)) {
			return 1;
		}
	}

	return 0;
}

int parse_timestamp(mconfig *ext_conf, const char *_date, const char *_time, mlogrec *record) {
#define N 20 + 1
	int ovector[3 * N], n;
	char buf[10];
	struct tm tm;
	config_input *conf = ext_conf->plugin_conf;

	char *str = NULL;

	str = malloc(strlen(_date) + strlen(_time) + 2);
	strcpy(str, _date);
	strcat(str, " ");
	strcat(str, _time);

	if ((n = pcre_exec(conf->match_timestamp, conf->match_timestamp_extra, str, strlen(str), 0, 0, ovector, 3 * N)) < 0) {
		if (n == PCRE_ERROR_NOMATCH) {
			fprintf(stderr, "%s.%d: string doesn't match: %s\n", __FILE__, __LINE__, str);
		} else {
			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);
		}
		return -1;
	}

	memset(&tm, 0, sizeof(struct tm));

	pcre_copy_substring(str, ovector, n, 1, buf, sizeof(buf));
	tm.tm_year = strtol(buf, NULL, 10)-1900;
	pcre_copy_substring(str, ovector, n, 3, buf, sizeof(buf));
	tm.tm_mday = strtol(buf, NULL, 10);
	pcre_copy_substring(str, ovector, n, 2, buf, sizeof(buf));
	tm.tm_mon = strtol(buf, NULL, 10)-1;

	pcre_copy_substring(str, ovector, n, 4, buf, sizeof(buf));
	tm.tm_hour = strtol(buf, NULL, 10);
	pcre_copy_substring(str, ovector, n, 5, buf, sizeof(buf));
	tm.tm_min = strtol(buf, NULL, 10);
	pcre_copy_substring(str, ovector, n, 6, buf, sizeof(buf));
	tm.tm_sec = strtol(buf, NULL, 10);

	record->timestamp = mktime (&tm);

	return 0;
#undef  N
}

int parse_useragent(mconfig *ext_conf,const char *str, mlogrec_web_extclf *record) {
/* get user agent */
	char *pc1 = (char *)str, *pc3, *pc2 = (char *)str, *buf_copy;

	buf_copy = malloc(strlen(str)+1);
	strcpy(buf_copy, str);

	str = urlescape((char *)str);

	if ((pc3 = strchr(pc1, '(') )) {
		if (strstr(pc3, "compatible")) {
			int finished = 0;

			pc1 = pc2 = (pc3+1);

			while (!finished) {
				while (*pc2 && !(*pc2 == ';' || *pc2 == ')')) pc2++;
				if (!*pc2) {
					if (ext_conf->debug_level > 0)
						fprintf(stderr, "%s.%d: %s: '%s'\n", 
							__FILE__, __LINE__,
							_("the 'Useragent' field of the logfile is incorrect"),
							buf_copy);
					free(buf_copy);
					return -1;
				} else if (*pc2 == ')') {
					finished = 1;
				}

				while (*pc1 == ' ') pc1++;

				*pc2 = '\0';
				if (!record->req_useragent && find_ua(ext_conf, pc1)) {
					buffer_copy_string(record->req_useragent, pc1);
				} else if (!record->req_useros && find_os(ext_conf, pc1)) {
					buffer_copy_string(record->req_useros, pc1);
				}
				pc1 = ++pc2;
			}


		} else {
			int finished = 0;

			pc2 = pc3;

			*pc2 = '\0';
#if 0
			if (!find_ua(ext_conf, pc1)) {
				printf("UA- unknown: %s\n", pc4);
			}
#endif
			buffer_copy_string(record->req_useragent, pc1);

			pc1 = pc2 = (pc3+1);

			while (!finished) {
				while (*pc2 && !(*pc2 == ';' || *pc2 == ')')) pc2++;
				if (!*pc2) {
					if (ext_conf->debug_level > 0)
						fprintf(stderr, "%s: '%s'\n", _("the 'Useragent' field of the logfile is incorrect"),buf_copy);
					free(buf_copy);
					return -1;
				} else if (*pc2 == ')') {
					finished = 1;
				}

				while (*pc1 == ' ') pc1++;

				*pc2 = '\0';


				if (!record->req_useros && find_os(ext_conf, pc1)) {
					buffer_copy_string(record->req_useros, pc1);
				}
				pc1 = ++pc2;
			}
		}

#if 0
		if (!record->req_useragent) {
			printf("UA unknown: %s\n", pc4);
		}

		if (!record->req_useros) {
			printf("OS unknown: %s\n", pc4);
		}
#endif
	} else {
		buffer_copy_string(record->req_useragent, str);
	}

	free(buf_copy);

	return 0;
}

int parse_referrer(mconfig *ext_conf,const char *str, mlogrec_web_extclf *record) {
#define N 20 + 1
	int ovector[3 * N], n;
	config_input *conf = ext_conf->plugin_conf;
	const char **list;

	if ((n = pcre_exec(conf->match_referrer, conf->match_referrer_extra, str, strlen(str), 0, 0, ovector, 3 * N)) < 0) {
		if (n == PCRE_ERROR_NOMATCH) {
			fprintf(stderr, "%s.%d: string doesn't match: %s\n", __FILE__, __LINE__, str);
		} else {
			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);
		}
		return -1;
	}

	if (n >= 2) {
		/* everything has matched, take the different pieces and be happy :) */
		pcre_get_substring_list(str, ovector, n, &list);

		buffer_copy_string(record->ref_url, (char *)list[1]);

		if (n > 3) {
			buffer_copy_string(record->ref_getvars, (char *)list[3]);
		}
#ifdef DEBUG_INPUT
		fprintf(stderr, "%s.%d: %s, %s\n", __FILE__, __LINE__, record->ref_url, record->ref_getvars);
#endif
		free(list);
	} else {
		fprintf(stderr, "%s.%d: Matched fields below minimum: %d\n", __FILE__, __LINE__, n);
		return -1;
	}



	return 0;
#undef  N
}

enum {
	M_MSIIS_FIELD_DATE, M_MSIIS_FIELD_TIME, M_MSIIS_FIELD_CLIENT_IP, M_MSIIS_FIELD_USERNAME,
	M_MSIIS_FIELD_SITENAME, M_MSIIS_FIELD_SERVERNAME, M_MSIIS_FIELD_SERVER_IP,
	M_MSIIS_FIELD_SERVER_PORT, M_MSIIS_FIELD_REQ_METHOD, M_MSIIS_FIELD_URI_STEM,
	M_MSIIS_FIELD_URI_QUERY, M_MSIIS_FIELD_STATUS, M_MSIIS_FIELD_WIN32_STATUS,
	M_MSIIS_FIELD_BYTES_SEND, M_MSIIS_FIELD_BYTES_RECEIVED, M_MSIIS_FIELD_TIME_TAKEN,
	M_MSIIS_FIELD_REQ_PROTOCOL, M_MSIIS_FIELD_REQ_HOST, M_MSIIS_FIELD_USER_AGENT,
	M_MSIIS_FIELD_COOKIE, M_MSIIS_FIELD_REFERRER, M_MSIIS_FIELD_PROCESS_EVENT,
	M_MSIIS_FIELD_PROCESS_TYPE, M_MSIIS_FIELD_USER_TIME, M_MSIIS_FIELD_KERNEL_TIME,
	M_MSIIS_FIELD_PAGE_FAULTS, M_MSIIS_FIELD_TOTAL_PROCESSES, M_MSIIS_FIELD_ACTIVE_PROCESSES,
	M_MSIIS_FIELD_TERMINATED_PROCESSES
};



typedef struct {
	char	*field;
	int	id;
	char	*match;
} msiis_field_def;

const msiis_field_def def[] =
{
	{ "date", 		M_MSIIS_FIELD_DATE,	"(.+?)"},
	{ "time", 		M_MSIIS_FIELD_TIME,	"(.+?)"},
	{ "c-ip", 		M_MSIIS_FIELD_CLIENT_IP,"(.+?)"},
	{ "cs-username",	M_MSIIS_FIELD_USERNAME,	"(.+?)"},
	{ "s-sitename",	M_MSIIS_FIELD_SITENAME,	"(.+?)"},
	{ "s-computername",	M_MSIIS_FIELD_SERVERNAME, "(.+?)"},
	{ "s-ip",		M_MSIIS_FIELD_SERVER_IP,"(.+?)"},
	{ "s-port",		M_MSIIS_FIELD_SERVER_PORT, "([0-9]+?)"},
	{ "cs-method",	M_MSIIS_FIELD_REQ_METHOD, "(.+?)"},
	{ "cs-uri-stem",	M_MSIIS_FIELD_URI_STEM,	"(.+?)"},
	{ "cs-uri-query",	M_MSIIS_FIELD_URI_QUERY,"(.+?)"},
	{ "sc-status",	M_MSIIS_FIELD_STATUS,	"([0-9]+?)"},
	{ "sc-win32-status",	M_MSIIS_FIELD_WIN32_STATUS, "(.+?)"},
	{ "sc-bytes",		M_MSIIS_FIELD_BYTES_SEND,"([0-9]+?)"},
	{ "cs-bytes", 	M_MSIIS_FIELD_BYTES_RECEIVED, "([0-9]+?)"},
	{ "time-taken",	M_MSIIS_FIELD_TIME_TAKEN,"(.+?)"},
	{ "cs-version",	M_MSIIS_FIELD_REQ_PROTOCOL, "(.+?)"},
	{ "cs-host",		M_MSIIS_FIELD_REQ_HOST,	"(.+?)"},
	{ "cs(User-Agent)",	M_MSIIS_FIELD_USER_AGENT, "(.+?)"},
	{ "cs(Cookie)",	M_MSIIS_FIELD_COOKIE,	"(.+?)"},
	{ "cs(Referer)",	M_MSIIS_FIELD_REFERRER,	"(.+?)"},
	{ "s-event", M_MSIIS_FIELD_PROCESS_EVENT, "(.+?)"}, /* Process Event */
	{ "s-process-type", M_MSIIS_FIELD_PROCESS_TYPE, "(.+?)"}, /* Process Type */
	{ "s-user-time", M_MSIIS_FIELD_USER_TIME, "(.+?)"}, /* Total User Time */
	{ "s-kernel-time", M_MSIIS_FIELD_KERNEL_TIME, "(.+?)" }, /* Total Kernel Time */
	{ "s-page-faults", M_MSIIS_FIELD_PAGE_FAULTS, "(.+?)"}, /* Total Page Faults */
	{ "s-total-procs", M_MSIIS_FIELD_TOTAL_PROCESSES, "(.+?)"}, /* Total Processes */
	{ "s-active-procs", M_MSIIS_FIELD_ACTIVE_PROCESSES, "(.+?)"}, /* Active Processes */
	{ "s-stopped-procs", M_MSIIS_FIELD_TERMINATED_PROCESSES, "(.+?)"}, /* Total Terminated Processes */

	{ NULL, 0, NULL}
};

int parse_msiis_field_info(mconfig *ext_conf, const char *_buffer) {
	config_input *conf = ext_conf->plugin_conf;
	char *buf, *pa, *pe;
	int pos = 0, i;
	buffer *match_buf;
	const char *errptr;
	int erroffset = 0;

	if (_buffer == NULL) return -1;

	buf = strdup(_buffer);

	for (pa = buf; (pe = strchr(pa, ' ')) != NULL; pa = pe + 1) {
		*pe = '\0';

		for (i = 0; def[i].field != NULL; i++) {
			if (strcmp(def[i].field, pa) == 0) {
				break;
			}
		}

		if (def[i].field != NULL) {
			if (pos >= M_MSIIS_MAX_FIELDS) return -1;

			conf->trans_fields[pos++] = i;
		} else {
			fprintf(stderr, "%s.%d: Unknown fieldtype: %s\n", __FILE__, __LINE__, pa);
			free(buf);
			return -1;
		}
	}

	/* don't forget the last param */
	if (*pa) {
		for (i = 0; def[i].field != NULL; i++) {
			if (strcmp(def[i].field, pa) == 0) {
				break;
			}
		}

		if (def[i].field != NULL) {
			if (pos >= M_MSIIS_MAX_FIELDS) return -1;
			conf->trans_fields[pos++] = i;
		} else {
			fprintf(stderr, "%s.%d: Unknown fieldtype: %s\n", __FILE__, __LINE__, pa);
			free(buf);
			return -1;
		}
	}

	free(buf);

	match_buf = buffer_init();

	for (i = 0; i < pos; i++) {
		if (match_buf->used == 0) {
			buffer_copy_string(match_buf, "^");
		} else {
			buffer_append_string(match_buf, " ");
		}
		buffer_append_string(match_buf, def[conf->trans_fields[i]].match);
	}

	buffer_append_string(match_buf, "$");

	if ((conf->match_clf = pcre_compile(match_buf->ptr,
		0, &errptr, &erroffset, NULL)) == NULL) {

		fprintf(stderr, "%s.%d: rexexp compilation error at %s\n", __FILE__, __LINE__, errptr);
		buffer_free(match_buf);
		return -1;
	}
	buffer_free(match_buf);

	conf->match_clf_extra = pcre_study(conf->match_clf, 0, &errptr);
	if (errptr != NULL) {
		fprintf(stderr, "%s.%d: rexexp studying error at %s\n", __FILE__, __LINE__, errptr);
		return -1;
	}

	return 0;
}

int parse_msiis_date_info(mconfig *ext_conf, const char *_buffer) {
	/* getting
		default_data
		default_time
	*/
	config_input *conf = ext_conf->plugin_conf;
	char *c;

	if ((c = strchr(_buffer, ' ')) != NULL) {
		*c++ = '\0';

		conf->default_date = strdup(_buffer);

		conf->default_time = strdup(c);

		return 0;
	} else {
		return -1;
	}
}

/**
 * parse a msiis record
 *
 * @param ext_conf
 * @param record
 * @param _buffer
 * @returns 0  - no error, -1 - fatal error, 1  - corrupt record
 */
int parse_record_pcre(mconfig *ext_conf, mlogrec *record, buffer *b) {
#define N 30 + 1
	const char **list;
	int ovector[3 * N], n, i;
	config_input *conf = ext_conf->plugin_conf;
	mlogrec_web *recweb = NULL;
	mlogrec_web_extclf *recext = NULL;

	const char *_date = NULL, *_time = NULL;

	/* remove the carriage return */
	if (b->ptr[b->used - 1-1] == '\r') {
		b->ptr[b->used - 1-1] = '\0';
	}

	if (*b->ptr == '#') {
		if (strncmp("#Version: ", b->ptr, 10) == 0) {
			if (strncmp("#Version: 1.0", b->ptr, 13) != 0) {
				fprintf(stderr, "%s.%d: only logfile version 1.0 is supported\n", __FILE__, __LINE__);
				return -1;
			}
			return 1;
		} else if (strncmp("#Fields: ", b->ptr, 9) == 0) {
			if (parse_msiis_field_info(ext_conf, b->ptr+9) != 0) {
				return -1;
			} else {
				return 1;
			}
		} else if (strncmp("#Date: ", b->ptr, 7) == 0) {
			if (parse_msiis_date_info(ext_conf, b->ptr+7) != 0) {
				return -1;
			} else {
				return 1;
			}
		} else {
			return 1;
		}
	}


	if (conf->match_clf == NULL) return -1;
	
	if (record->ext_type != M_RECORD_TYPE_WEB) {
		if (record->ext_type != M_RECORD_TYPE_UNSET) {
			mrecord_free_ext(record);
		}
		
		record->ext_type = M_RECORD_TYPE_WEB;
		record->ext = mrecord_init_web();
	}

	recweb = record->ext;
	
	if (recweb == NULL) return 1;

	recext = mrecord_init_web_extclf();

	recweb->ext_type = M_RECORD_TYPE_WEB_EXTCLF;
	recweb->ext = recext;

/* parse a CLF record */
	if ((n = pcre_exec(conf->match_clf, conf->match_clf_extra, b->ptr, b->used - 1, 0, 0, ovector, 3 * N)) < 0) {
		if (n == PCRE_ERROR_NOMATCH) {
			fprintf(stderr, "%s.%d: string doesn't match: %s\n", __FILE__, __LINE__, b->ptr);
		} else {
			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);
		}
		return 1;
	}

	if (n == 0) {
		fprintf(stderr, "%s.%d: !REPORT ME! N is too low -> %d\n", __FILE__, __LINE__, N);
		return -1;
	}

	pcre_get_substring_list(b->ptr, ovector, n, &list);

	for (i = 0; i < n-1; i++) {
		switch (def[conf->trans_fields[i]].id) {
		case M_MSIIS_FIELD_DATE:
			_date = list[i+1];
			break;
		case M_MSIIS_FIELD_TIME:
			_time = list[i+1];
			break;
		case M_MSIIS_FIELD_CLIENT_IP:
			buffer_copy_string(recweb->req_host_ip, (char *)list[i+1]);
			break;
		case M_MSIIS_FIELD_USERNAME:
			buffer_copy_string(recweb->req_user, (char *)list[i+1]);
			break;
		case M_MSIIS_FIELD_REQ_METHOD:
			buffer_copy_string(recweb->req_method, (char *)list[i+1]);
			break;
		case M_MSIIS_FIELD_URI_STEM:
			buffer_copy_string(recweb->req_url, (char *)list[i+1]);
			break;
		case M_MSIIS_FIELD_STATUS:
			recweb->req_status = strtol(list[i+1], NULL,10);
			break;
		case M_MSIIS_FIELD_BYTES_SEND:
			recweb->xfersize = strtod(list[i+1], NULL);
			break;
		case M_MSIIS_FIELD_SERVER_PORT:
			buffer_copy_string(recext->srv_port, (char *)list[i+1]);
			break;
		case M_MSIIS_FIELD_SERVER_IP:
			buffer_copy_string(recext->srv_host, (char *)list[i+1]);
			break;
		case M_MSIIS_FIELD_REQ_PROTOCOL:
			buffer_copy_string(recweb->req_protocol, (char *)list[i+1]);
			break;
		case M_MSIIS_FIELD_USER_AGENT:
			if (parse_useragent(ext_conf, list[i+1], recext)  == -1) {
				return 1;
			}
			break;
		case M_MSIIS_FIELD_REFERRER:
			if (parse_referrer(ext_conf, list[i+1], recext)  == -1) {
				return 1;
			}
			break;
		/* no mapping */
		case M_MSIIS_FIELD_COOKIE:
		case M_MSIIS_FIELD_SITENAME:
		case M_MSIIS_FIELD_SERVERNAME:

		case M_MSIIS_FIELD_REQ_HOST:
		case M_MSIIS_FIELD_BYTES_RECEIVED:
		case M_MSIIS_FIELD_TIME_TAKEN:
		case M_MSIIS_FIELD_URI_QUERY:
		case M_MSIIS_FIELD_WIN32_STATUS:
		case M_MSIIS_FIELD_PROCESS_EVENT:
		case M_MSIIS_FIELD_PROCESS_TYPE:
		case M_MSIIS_FIELD_USER_TIME:
		case M_MSIIS_FIELD_KERNEL_TIME:
		case M_MSIIS_FIELD_PAGE_FAULTS:
		case M_MSIIS_FIELD_TOTAL_PROCESSES:
		case M_MSIIS_FIELD_ACTIVE_PROCESSES:
		case M_MSIIS_FIELD_TERMINATED_PROCESSES:
			
			if (ext_conf->debug_level > 2)
				fprintf(stderr, "the field '%s' is known, but not supported yet.\n",def[conf->trans_fields[i]].field);
			break;


		default:
			fprintf(stderr, "the field '%s' is unknown\n", def[conf->trans_fields[i]].field);
			break;
		}
	}

	if (_time != NULL && (_date != NULL || conf->default_date != NULL)) {
		parse_timestamp(ext_conf, _date ? _date : conf->default_date, _time, record);
	}

	free(list);

	return 0;
#undef  N
}

int mplugins_input_msiis_get_next_record(mconfig *ext_conf, mlogrec *record) {
	int ret = 0;
	config_input *conf = ext_conf->plugin_conf;

	if (record == NULL) return M_RECORD_HARD_ERROR;

	/* fill the line buffer */
	if (NULL == mgets(&(conf->inputfile), conf->buf)) return M_RECORD_EOF;
	
	ret = parse_record_pcre   (ext_conf, record, conf->buf);
	
	if (ret == M_RECORD_CORRUPT) {
		M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_PARSING, M_DEBUG_LEVEL_WARNINGS,
			 "affected Record: %s\n",
			 conf->buf->ptr
			 );
	}
	return ret;
}
