/*
 * Demonstration and speed test of various wildcard matching algorithms.
 *
 * Two functions i found from internet are deleted from this source.
 * Their test code is commented out. You can find them and run your own test.
 *
 * If you want to use, modify or publish any code from this file, 
 * please contact me.
 *
 *            Dogan Kurt (dogan.kurt@dodobyte.com)
 *                  All Rights Reserved.
 */

#include <stdio.h>
#include <string.h>
#include <time.h>

/*
 * Iterative backtracking function we use in Mobile Antivirus.
 */
int wild_iterative(char *pat, char *str)
{
	char *locp = NULL;
	char *locs = NULL;

	while (*str) {
		if (*pat == '*') {
			locp = ++pat;
			locs = str;
			if (*pat == '\0') {
				return 1;
			}
			continue;
		}
		if (*str != *pat && *pat != '?') {
			if (!locp) {
				return 0;
			}
			str = ++locs;
			pat = locp;
			continue;
		}
		pat++, str++;
	}
	while (*pat == '*') {
		pat++;
	}
	return (*pat == '\0');
}

/*
 * This is a stripped down version of the algorithm we use in Antimalware.
 * Original function runs with compiled patterns and supports sets e.g. [a-z].
 */
int wild_iterative_opt(char *pat, char *str)
{
	char *last = NULL;
	char *star = NULL;

	while (*str) {
		switch (*pat) {
		case '?':
			str++, pat++;
			continue;
		case '*':
			do {
				pat++;
			} while (*pat == '*');
			if (*pat == '\0') {
				return 1;
			}
			star = pat;
			goto NextMatch;
		}

		if (*str == *pat) {
			str++, pat++;
			continue;
		}
		if (star == NULL) {
			return 0;
		}
		pat = star;
		str = last + 1;

NextMatch:
		while (*str != *pat && *pat != '?') {
			if (*++str == '\0') {
				return 0;
			}
		}
		last = str;
		str++, pat++;
	}
	while (*pat == '*') {
		pat++;
	}
	return (*pat == '\0');
}

/*
 * This is a demonstration function, should not be used without necessary
 * modifications. Since this is a speed comparison, i added the if statement 
 * to early exit in case of star & matchlist are empty at the same time.
 */
int wild_nfa(char *pat, char *str)
{
	int i, nmatch = 0;
	char *star = NULL, *match[100];

	match[nmatch++] = pat;

	while (*str) {
		if (star && (*star == *str || *star == '?')) {
			match[nmatch++] = star;
		}
		/* early exit, mismatch */
		if (!star && nmatch == 0) {
			return 0;
		}
		for (i = 0; i < nmatch; i++) {
			char *p = match[i];
			if (*p == '*') {
				while (*p == '*') {
					p++;
				}
				star = p;
				if (*star == '\0') {
					return 1;
				}
				nmatch = 0;
				str--;
				break;
			}
			if (*p == *str || *p == '?') {
				match[i]++;
			} else {
				size_t ncopy = (nmatch - i - 1) * sizeof(*match);
				memcpy(match + i, match + i + 1, ncopy);
				i--, nmatch--;
			}
		}
		str++;
	}
	for (i = 0; i < nmatch; i++) {
		char *p = match[i];
		while (*p == '*') {
			p++;
		}
		if (*p == '\0') {
			return 1;
		}
	}
	return 0;
}

/*
 * This code is from 7zip source code. I added multiple star collapsion code,
 * otherwise the function was never ending.
 */
int EnhancedMaskTest(char *mask, char *name)
{
  for (;;)
  {
    char m = *mask;
    char c = *name;
    if (m == 0)
      return (c == 0);
    if (m == '*')
    {
      /* collapse multiple stars */
      int n = 0;
      while (m == '*')
        m = *(mask + ++n);
      if (EnhancedMaskTest(mask + n, name))
        return 1;
      if (c == 0)
        return 0;
    }
    else
    {
      if (m == '?')
      {
        if (c == 0)
          return 0;
      }
      else if (m != c)
        return 0;
      mask++;
    }
    name++;
  }
}

/******************************************************************************/
/*                                Test Codes                                  */
/******************************************************************************/

#define REPEAT	10000

#define NPAT	87
#define NSTR	95

char *pat[NPAT];
char *str[NSTR];

void init_test()
{
	/* Patterns */
	pat[0] = "*ccd";
	pat[1] = "*issip*ss*";
	pat[2] = "xxxx*zzy*fffff";
	pat[3] = "xxx*zzy*f";
	pat[4] = "xxxx*zzy*f";
	pat[5] = "xy*z*xyz";
	pat[6] = "*sip*";
	pat[7] = "xy*xyz";
	pat[8] = "mi*sip*";
	pat[9] = "*abac*";
	pat[10] = "a*zz*";
	pat[11] = "*12*23";
	pat[12] = "a12b";
	pat[13] = "*12*12*";
	pat[14] = "*";
	pat[15] = "a*b";
	pat[16] = "a*";
	pat[17] = "a*aar";
	pat[18] = "XY*Z*XYz";
	pat[19] = "*SIP*";
	pat[20] = "*issip*PI";
	pat[21] = "mi*Sip*";
	pat[22] = "*Abac*";
	pat[23] = "*oWn*";
	pat[24] = "bLah";
	pat[25] = "bLaH";
	pat[26] = "*?";
	pat[27] = "??";
	pat[28] = "?*?";
	pat[29] = "*?*?*";
	pat[30] = "?**?*?";
	pat[31] = "?**?*&?";
	pat[32] = "?b*??";
	pat[33] = "?a*??";
	pat[34] = "?**?c?";
	pat[35] = "?**?d?";
	pat[36] = "?*b*?*d*?";
	pat[37] = "bL?h";
	pat[38] = "bLa?";
	pat[39] = "?Lah";
	pat[40] = "?LaH";
	pat[41] = "a*a*a*a*a*a*aa*aaa*a*a*b";
	pat[42] = "*a*b*ba*ca*a*aa*aaa*fa*ga*b*";
	pat[43] = "*a*b*ba*ca*a*x*aaa*fa*ga*b*";
	pat[44] = "*a*b*ba*ca*aaaa*fa*ga*gggg*b*";
	pat[45] = "*a*b*ba*ca*aaaa*fa*ga*ggg*b*";
	pat[46] = "*aabbaa*a*";
	pat[47] = "a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*";
	pat[48] = "*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*";
	pat[49] = "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*"
	          "abc*abc*abc*abc*abc*";
	pat[50] = "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*";
	pat[51] = "abc*abc*abc*abc*abc";
	pat[52] = "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abcd";
	pat[53] = "********a********b********c********";
	pat[54] = "abc";
	pat[55] = "********a********b********b********";
	pat[56] = "***a*b*c***";
	pat[57] = "?*.?*";
	pat[58] = "*.zip";
	pat[59] = "*zi?";
	pat[60] = "a?";
	pat[61] = "ab*?*xy";
	pat[62] = "*zip";
	pat[63] = "a*?*?*.zip";
	pat[64] = "*?*?.zip";
	pat[65] = "?a?";
	pat[66] = "*?.zip";
	pat[67] = "*?*?*.zip";
	pat[68] = "*zi*";
	pat[69] = "?aa*";
	pat[70] = "*aa*";
	pat[71] = "?*";
	pat[72] = "?*?.zip";
	pat[73] = "?.";
	pat[74] = "*.zi*";
	pat[75] = "?.zip";
	pat[76] = ".?";
	pat[77] = "a*?*?";
	pat[78] = "?.?";
	pat[79] = "?a";
	pat[80] = "ab*cd*xy";
	pat[81] = "*aa?";
	pat[82] = "?";
	pat[83] = "*.zi?";
	pat[84] = "*.*";
	pat[85] = "*ab*cd*";
	pat[86] = "Дело * чиновника мэрии ?? драке ??а Арбате "
	          "переквали??ицировали";

	/* Strings */
	str[0] = "abcccd";
	str[1] = "mississipissippi";
	str[2] = "xxxx*zzzzzzzzy*f";
	str[3] = "xxxxzzzzzzzzyf";
	str[4] = "xyxyxyzyxyz";
	str[5] = "mississippi";
	str[6] = "xyxyxyxyz";
	str[7] = "ababac";
	str[8] = "aaazz";
	str[9] = "a12b12";
	str[10] = "*";
	str[11] = "a*abab";
	str[12] = "a*r";
	str[13] = "a*ar";
	str[14] = "XYXYXYZYXYz";
	str[15] = "missisSIPpi";
	str[16] = "mississipPI";
	str[17] = "miSsissippi";
	str[18] = "abAbac";
	str[19] = "aAazz";
	str[20] = "A12b12";
	str[21] = "a12B12";
	str[22] = "oWn";
	str[23] = "bLah";
	str[24] = "a";
	str[25] = "ab";
	str[26] = "abc";
	str[27] = "abcd";
	str[28] = "abcde";
	str[29] = "bLaaa";
	str[30] = "bLaH";
	str[31] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
	          "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab";
	str[32] = "abababababababababababababababababababaacacacacacacacada"
	          "eafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab";
	str[33] = "aaabbaabbaab";
	str[34] = "a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*";
	str[35] = "aaaaaaaaaaaaaaaaa";
	str[36] = "aaaaaaaaaaaaaaaa";
	str[37] = "abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*"
	          "abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn";
	str[38] = "abc*abcd*abcd*abc*abcd";
	str[39] = "abc*abcd*abcd*abc*abcd*abcd*abc*abcd*abc*abc*abcd";
	str[40] = "********a********b********c********";
	str[41] = "*abc*";
	str[42] = ".a.b";
	str[43] = ".a.bcd";
	str[44] = ".ab";
	str[45] = "a.b";
	str[46] = "a.bcd";
	str[47] = "ab.";
	str[48] = "xyz.bcd";
	str[49] = ".zip";
	str[50] = "a.a.zip";
	str[51] = "a.a.zippo";
	str[52] = "a.zip";
	str[53] = "aaaa.zip";
	str[54] = "zip";
	str[55] = ".a";
	str[56] = "a.";
	str[57] = "a.a";
	str[58] = "aa";
	str[59] = "aaa";
	str[60] = ".a.a";
	str[61] = ".a.aa";
	str[62] = "aa.";
	str[63] = "aa.a";
	str[64] = "aa.ba.ba";
	str[65] = "aannn";
	str[66] = "xaab";
	str[67] = ".aa.";
	str[68] = "baa.";
	str[69] = "caa.ba.ba";
	str[70] = ".ab.ab.ab.cd.cd.";
	str[71] = ".ab.cd.ab.cd.abcd.";
	str[72] = ".axb.cxd.ab.cd.abcd.";
	str[73] = ".axb.cyd.ab.cyd.axbcd.";
	str[74] = "abanabnabncd";
	str[75] = "abanabnabncdef";
	str[76] = "xa";
	str[77] = "xxa";
	str[78] = ".a.";
	str[79] = "xab";
	str[80] = "xxab";
	str[81] = "ax";
	str[82] = ".axb.cxd.ab.cd.abcd.xy";
	str[83] = "a.ab.ab.ab.cd.cd.xy";
	str[84] = "ab.ab.cd.ab.cd.abcdxy.";
	str[85] = "ab.axb.cd.xyab.cyd.axbcd.";
	str[86] = "ab.axb.cd.xyab.cyd.axbcd.xy";
	str[87] = "ab.xy";
	str[88] = "abancda.bnxyabncdefxy";
	str[89] = "abancdabnxyabncdef";
	str[90] = "abancdabnxyabncdefxy";
	str[91] = "abcdx_y";
	str[92] = "abxy";
	str[93] = "xab_anabnabncd_xy";
	str[94] = "Дело бывшего чиновника мэрии о драке "
	          "на Арбате переквалифицировали";
}

int main()
{
	int i, j, k;
	clock_t start = 0;
	
	init_test();
	
	fprintf(stderr, "Iterative\n");
	start = clock();
	for (i = 0; i < NPAT; i++) {
		for (j = 0; j < NSTR; j++) {
			for (k = 0; k < REPEAT; k++) {
				wild_iterative(pat[i], str[j]);
			}
		}
	}
	fprintf(stderr, "%lf\n", (double)(clock() - start) / CLOCKS_PER_SEC);

	fprintf(stderr, "Iterative Optimised\n");
	start = clock();
	for (i = 0; i < NPAT; i++) {
		for (j = 0; j < NSTR; j++) {
			for (k = 0; k < REPEAT; k++) {
				wild_iterative_opt(pat[i], str[j]);
			}
		}
	}
	fprintf(stderr, "%lf\n", (double)(clock() - start) / CLOCKS_PER_SEC);

#if 0
	fprintf(stderr, "Kirk J. Krauss, DrDobbs\n");
	start = clock();
	for (i = 0; i < NPAT; i++) {
		for (j = 0; j < NSTR; j++) {
			for (k = 0; k < REPEAT; k++) {
				WildTextCompare(pat[i], str[j]);
			}
		}
	}
	fprintf(stderr, "%lf\n", (double)(clock() - start) / CLOCKS_PER_SEC);

	fprintf(stderr, "Alessandro Cantatore\n");
	start = clock();
	for (i = 0; i < NPAT; i++) {
		for (j = 0; j < NSTR; j++) {
			for (k = 0; k < REPEAT; k++) {
				szWildMatch7(pat[i], str[j]);
			}
		}
	}
	fprintf(stderr, "%lf\n", (double)(clock() - start) / CLOCKS_PER_SEC);
#endif

	fprintf(stderr, "Nondeterministic Finite Automaton\n");
	start = clock();
	for (i = 0; i < NPAT; i++) {
		for (j = 0; j < NSTR; j++) {
			for (k = 0; k < REPEAT; k++) {
				wild_nfa(pat[i], str[j]);
			}
		}
	}
	fprintf(stderr, "%lf\n", (double)(clock() - start) / CLOCKS_PER_SEC);

	/* 
	 * Since this is too slow, we divide repeat time by 100 and
	 * multiply run time by 100 to get a comparable time.
	 */
	fprintf(stderr, "7Zip Recursive\n");
	start = clock();
	for (i = 0; i < NPAT; i++) {
		for (j = 0; j < NSTR; j++) {
			for (k = 0; k < (REPEAT / 100); k++) {
				EnhancedMaskTest(pat[i], str[j]);
			}
		}
	}
	fprintf(stderr, "%lf\n", (double)(clock() - start) / CLOCKS_PER_SEC * 100);

	return 0;
}