mirror of
git://git.acid.vegas/unrealircd.git
synced 2024-11-14 20:16:42 +00:00
866 lines
23 KiB
C
866 lines
23 KiB
C
/*
|
|
* Unreal Internet Relay Chat Daemon, src/match.c
|
|
* Copyright (C) 1990 Jarkko Oikarinen
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 1, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
|
|
#include "unrealircd.h"
|
|
|
|
ID_Copyright("(C) 1990 Jarkko Oikarinen");
|
|
|
|
/*
|
|
* Compare if a given string (name) matches the given
|
|
* mask (which can contain wild cards: '*' - match any
|
|
* number of chars, '?' - match any single character.
|
|
*
|
|
* return 0, if match
|
|
* 1, if no match
|
|
*/
|
|
|
|
u_char touppertab[], tolowertab[];
|
|
#define tolowertab2 tolowertab
|
|
#define lc(x) tolowertab2[x]
|
|
|
|
/* Match routine for special cases where escaping is needed in a normal fashion.
|
|
* Checks a string ('name') against a globbing(+more) pattern ('mask').
|
|
* Original by Douglas A Lewis (dalewis@acsu.buffalo.edu).
|
|
* Code based on hybrid7's version (match_esc()).
|
|
* Various modifications by Bram Matthys (Syzop).
|
|
* Returns 1 on match and 0 for no match.
|
|
* Instead of our previous code, this one is less optimized but actually _readable_ ;).
|
|
* Modifications I (Syzop) had to do vs the hybrid7 code:
|
|
* - Got rid of (u_char *) casts, since we already compile with
|
|
* chars defaulting to unsigned [or else major things break] ;).
|
|
* - Support for '_'.
|
|
* - Rip out support for '#'.
|
|
*/
|
|
int match_esc(const char *mask, const char *name)
|
|
{
|
|
const u_char *m = mask;
|
|
const u_char *n = name;
|
|
const u_char *ma = NULL;
|
|
const u_char *na = name;
|
|
|
|
while(1)
|
|
{
|
|
if (*m == '*')
|
|
{
|
|
while (*m == '*') /* collapse.. */
|
|
m++;
|
|
ma = m;
|
|
na = n;
|
|
}
|
|
|
|
if (!*m)
|
|
{
|
|
if (!*n)
|
|
return 1;
|
|
if (!ma)
|
|
return 0;
|
|
for (m--; (m > (const u_char *)mask) && (*m == '?'); m--);
|
|
if (*m == '*')
|
|
return 1;
|
|
m = ma;
|
|
n = ++na;
|
|
} else
|
|
if (!*n)
|
|
{
|
|
while (*m == '*') /* collapse.. */
|
|
m++;
|
|
return (*m == 0);
|
|
}
|
|
|
|
if (*m != '?')
|
|
{
|
|
if (*m == '\\')
|
|
if (!*++m)
|
|
return 0; /* unfinished escape sequence */
|
|
if ((lc(*m) != lc(*n)) && !((*m == '_') && (*n == ' ')))
|
|
{
|
|
if (!ma)
|
|
return 0;
|
|
m = ma;
|
|
n = ++na;
|
|
} else
|
|
{
|
|
m++;
|
|
n++;
|
|
}
|
|
} else
|
|
{
|
|
m++;
|
|
n++;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/** Same credit/copyright as match_esc() applies, except escaping removed.. ;p */
|
|
int match_simple(const char *mask, const char *name)
|
|
{
|
|
const u_char *m = mask;
|
|
const u_char *n = name;
|
|
const u_char *ma = NULL;
|
|
const u_char *na = name;
|
|
|
|
while(1)
|
|
{
|
|
if (*m == '*')
|
|
{
|
|
while (*m == '*') /* collapse.. */
|
|
m++;
|
|
ma = m;
|
|
na = n;
|
|
}
|
|
|
|
if (!*m)
|
|
{
|
|
if (!*n)
|
|
return 1;
|
|
if (!ma)
|
|
return 0;
|
|
for (m--; (m > (const u_char *)mask) && (*m == '?'); m--);
|
|
if (*m == '*')
|
|
return 1;
|
|
m = ma;
|
|
n = ++na;
|
|
} else
|
|
if (!*n)
|
|
{
|
|
while (*m == '*') /* collapse.. */
|
|
m++;
|
|
return (*m == 0);
|
|
}
|
|
|
|
if ((lc(*m) != lc(*n)) && !((*m == '_') && (*n == ' ')) && (*m != '?'))
|
|
{
|
|
if (!ma)
|
|
return 0;
|
|
m = ma;
|
|
n = ++na;
|
|
} else
|
|
{
|
|
m++;
|
|
n++;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* collapse a pattern string into minimal components.
|
|
* This particular version is "in place", so that it changes the pattern
|
|
* which is to be reduced to a "minimal" size.
|
|
*/
|
|
char *collapse(char *pattern)
|
|
{
|
|
char *s;
|
|
char *s1;
|
|
char *t;
|
|
|
|
s = pattern;
|
|
|
|
if (BadPtr(pattern))
|
|
return pattern;
|
|
/*
|
|
* Collapse all \** into \*, \*[?]+\** into \*[?]+
|
|
*/
|
|
for (; *s; s++)
|
|
if (*s == '\\')
|
|
{
|
|
if (!*(s + 1))
|
|
break;
|
|
else
|
|
s++;
|
|
}
|
|
else if (*s == '*')
|
|
{
|
|
if (*(t = s1 = s + 1) == '*')
|
|
while (*t == '*')
|
|
t++;
|
|
else if (*t == '?')
|
|
for (t++, s1++; *t == '*' || *t == '?'; t++)
|
|
if (*t == '?')
|
|
*s1++ = *t;
|
|
while ((*s1++ = *t++))
|
|
;
|
|
}
|
|
return pattern;
|
|
}
|
|
|
|
|
|
/* Case insensitive comparison of two NULL terminated strings,
|
|
* using the "IRC nick comparison" rules. Or, well, partially
|
|
* anyway.
|
|
* Should be used for NICK-related comparisons. And probably
|
|
* not even then, since this does not deal with multibyte.
|
|
* @returns 0, if s1 equal to s2
|
|
* <0, if s1 lexicographically less than s2
|
|
* >0, if s1 lexicographically greater than s2
|
|
*/
|
|
int smycmp(const char *s1, const char *s2)
|
|
{
|
|
u_char *str1;
|
|
u_char *str2;
|
|
int res;
|
|
|
|
str1 = (u_char *)s1;
|
|
str2 = (u_char *)s2;
|
|
|
|
while ((res = toupper(*str1) - toupper(*str2)) == 0)
|
|
{
|
|
if (*str1 == '\0')
|
|
return 0;
|
|
str1++;
|
|
str2++;
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
u_char tolowertab[] = {
|
|
0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa,
|
|
0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14,
|
|
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
|
0x1e, 0x1f,
|
|
' ', '!', '"', '#', '$', '%', '&', 0x27, '(', ')',
|
|
'*', '+', ',', '-', '.', '/',
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
|
':', ';', '<', '=', '>', '?',
|
|
'@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
|
|
'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
|
|
't', 'u', 'v', 'w', 'x', 'y', 'z', '[', '\\', ']', '^',
|
|
'_',
|
|
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
|
|
'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
|
|
't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~',
|
|
0x7f,
|
|
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
|
0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
|
|
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
|
|
0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
|
|
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
|
|
0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
|
|
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9,
|
|
0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
|
|
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9,
|
|
0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
|
|
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9,
|
|
0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
|
|
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
|
|
0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
|
|
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9,
|
|
0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
|
|
};
|
|
|
|
u_char touppertab[] = {
|
|
0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa,
|
|
0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14,
|
|
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
|
0x1e, 0x1f,
|
|
' ', '!', '"', '#', '$', '%', '&', 0x27, '(', ')',
|
|
'*', '+', ',', '-', '.', '/',
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
|
':', ';', '<', '=', '>', '?',
|
|
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
|
|
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
|
|
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^',
|
|
0x5f,
|
|
'`', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
|
|
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
|
|
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~',
|
|
0x7f,
|
|
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
|
0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
|
|
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
|
|
0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
|
|
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
|
|
0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
|
|
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9,
|
|
0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
|
|
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9,
|
|
0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
|
|
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9,
|
|
0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
|
|
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
|
|
0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
|
|
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9,
|
|
0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
|
|
};
|
|
|
|
u_char char_atribs[] = {
|
|
/* 0-7 */ CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL,
|
|
/* 8-12 */ CNTRL, CNTRL | SPACE, CNTRL | SPACE, CNTRL | SPACE,
|
|
CNTRL | SPACE,
|
|
/* 13-15 */ CNTRL | SPACE, CNTRL, CNTRL,
|
|
/* 16-23 */ CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL,
|
|
/* 24-31 */ CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL,
|
|
/* space */ PRINT | SPACE,
|
|
/* !"#$%&'( */ PRINT, PRINT, PRINT, PRINT, PRINT, PRINT, PRINT, PRINT,
|
|
/* )*+,-./ */ PRINT, PRINT, PRINT, PRINT, PRINT | ALLOW, PRINT | ALLOW,
|
|
PRINT,
|
|
/* 012 */ PRINT | DIGIT | ALLOW, PRINT | DIGIT | ALLOW,
|
|
PRINT | DIGIT | ALLOW,
|
|
/* 345 */ PRINT | DIGIT | ALLOW, PRINT | DIGIT | ALLOW,
|
|
PRINT | DIGIT | ALLOW,
|
|
/* 678 */ PRINT | DIGIT | ALLOW, PRINT | DIGIT | ALLOW,
|
|
PRINT | DIGIT | ALLOW,
|
|
/* 9:; */ PRINT | DIGIT | ALLOW, PRINT, PRINT,
|
|
/* <=>? */ PRINT, PRINT, PRINT, PRINT,
|
|
/* @ */ PRINT,
|
|
/* ABC */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* DEF */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* GHI */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* JKL */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* MNO */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* PQR */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* STU */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* VWX */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* YZ[ */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, PRINT,
|
|
/* \]^ */ PRINT, PRINT, PRINT,
|
|
/* _` */ PRINT | ALLOW, PRINT,
|
|
/* abc */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* def */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* ghi */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* jkl */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* mno */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* pqr */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* stu */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* vwx */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW,
|
|
PRINT | ALPHA | ALLOW,
|
|
/* yz{ */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, PRINT,
|
|
/* |}~ */ PRINT, PRINT, PRINT,
|
|
/* del */ 0,
|
|
/* 80-8f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
/* 90-9f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
/* a0-af */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
/* b0-bf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
/* c0-cf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
/* d0-df */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
/* e0-ef */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
/* f0-ff */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
/** Free up all resources of an Match entry (including the struct itself).
|
|
* NOTE: this function may (also) be called for Match structs that have only been
|
|
* setup half-way, so use special care when accessing members (NULL checks!)
|
|
*/
|
|
void unreal_delete_match(Match *m)
|
|
{
|
|
safe_free(m->str);
|
|
if (m->type == MATCH_PCRE_REGEX)
|
|
{
|
|
if (m->ext.pcre2_expr)
|
|
pcre2_code_free(m->ext.pcre2_expr);
|
|
}
|
|
safe_free(m);
|
|
}
|
|
|
|
Match *unreal_create_match(MatchType type, char *str, char **error)
|
|
{
|
|
Match *m = safe_alloc(sizeof(Match));
|
|
static char errorbuf[512];
|
|
|
|
*errorbuf = '\0';
|
|
|
|
safe_strdup(m->str, str);
|
|
m->type = type;
|
|
|
|
if (m->type == MATCH_SIMPLE)
|
|
{
|
|
/* Nothing to do */
|
|
}
|
|
else if (m->type == MATCH_PCRE_REGEX)
|
|
{
|
|
int errorcode = 0;
|
|
PCRE2_SIZE erroroffset = 0;
|
|
int options = 0;
|
|
char buf2[512];
|
|
|
|
options = PCRE2_CASELESS|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
|
|
|
|
m->ext.pcre2_expr = pcre2_compile(str, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL);
|
|
if (m->ext.pcre2_expr == NULL)
|
|
{
|
|
*buf2 = '\0';
|
|
pcre2_get_error_message(errorcode, buf2, sizeof(buf2));
|
|
if (error)
|
|
{
|
|
if (erroroffset > 0)
|
|
snprintf(errorbuf, sizeof(errorbuf), "%s (at character #%d)", buf2, (int)erroroffset);
|
|
else
|
|
strlcpy(errorbuf, buf2, sizeof(errorbuf));
|
|
*error = errorbuf;
|
|
}
|
|
unreal_delete_match(m);
|
|
return NULL;
|
|
}
|
|
pcre2_jit_compile(m->ext.pcre2_expr, PCRE2_JIT_COMPLETE);
|
|
return m;
|
|
}
|
|
else {
|
|
/* Unknown type, how did that happen ? */
|
|
unreal_delete_match(m);
|
|
return NULL;
|
|
}
|
|
return m;
|
|
}
|
|
|
|
/** Try to match an Match entry ('m') against a string ('str').
|
|
* @returns 1 if matched, 0 if not.
|
|
* @note These (more logical) return values are opposite to the match_simple() function.
|
|
*/
|
|
int unreal_match(Match *m, char *str)
|
|
{
|
|
if (m->type == MATCH_SIMPLE)
|
|
{
|
|
if (match_simple(m->str, str))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
if (m->type == MATCH_PCRE_REGEX)
|
|
{
|
|
pcre2_match_data *md = pcre2_match_data_create(9, NULL);
|
|
int ret;
|
|
|
|
ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
|
|
pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */
|
|
|
|
if (ret > 0)
|
|
return 1; /* MATCH */
|
|
return 0; /* NO MATCH */
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int unreal_match_method_strtoval(char *str)
|
|
{
|
|
if (!strcmp(str, "regex") || !strcmp(str, "pcre"))
|
|
return MATCH_PCRE_REGEX;
|
|
if (!strcmp(str, "simple") || !strcmp(str, "glob"))
|
|
return MATCH_SIMPLE;
|
|
return 0;
|
|
}
|
|
|
|
char *unreal_match_method_valtostr(int val)
|
|
{
|
|
if (val == MATCH_PCRE_REGEX)
|
|
return "regex";
|
|
if (val == MATCH_SIMPLE)
|
|
return "simple";
|
|
|
|
return "unknown";
|
|
}
|
|
|
|
/* It is unfortunately that we have 2 matching/replace systems.
|
|
* However, the above is for spamfilter matching and stuff
|
|
* and below is for matching on WORDS, which does specific things
|
|
* like replacement on word boundaries etc.
|
|
* Moved here from the censor channel and user mode module
|
|
* (previously was present in both modules, code duplication)
|
|
*/
|
|
int fast_badword_match(ConfigItem_badword *badword, char *line)
|
|
{
|
|
char *p;
|
|
int bwlen = strlen(badword->word);
|
|
if ((badword->type & BADW_TYPE_FAST_L) && (badword->type & BADW_TYPE_FAST_R))
|
|
return (our_strcasestr(line, badword->word) ? 1 : 0);
|
|
|
|
p = line;
|
|
while((p = our_strcasestr(p, badword->word)))
|
|
{
|
|
if (!(badword->type & BADW_TYPE_FAST_L))
|
|
{
|
|
if ((p != line) && !iswseperator(*(p - 1))) /* aaBLA but no *BLA */
|
|
goto next;
|
|
}
|
|
if (!(badword->type & BADW_TYPE_FAST_R))
|
|
{
|
|
if (!iswseperator(*(p + bwlen))) /* BLAaa but no BLA* */
|
|
goto next;
|
|
}
|
|
/* Looks like it matched */
|
|
return 1;
|
|
next:
|
|
p += bwlen;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* fast_badword_replace:
|
|
* A fast replace routine written by Syzop used for replacing badwords.
|
|
* This searches in line for the bad word and replaces it.
|
|
* buf is used for the result and max is sizeof(buf).
|
|
* Assumptions[!]: max > 0 AND max > strlen(line)+1
|
|
*/
|
|
int fast_badword_replace(ConfigItem_badword *badword, char *line, char *buf, int max)
|
|
{
|
|
/* Some aliases ;P */
|
|
char *replacew = badword->replace ? badword->replace : REPLACEWORD;
|
|
char *pold = line, *pnew = buf; /* Pointers to old string and new string */
|
|
char *poldx = line;
|
|
int replacen = -1; /* Only calculated if needed. w00t! saves us a few nanosecs? lol */
|
|
int searchn = -1;
|
|
char *startw, *endw;
|
|
char *c_eol = buf + max - 1; /* Cached end of (new) line */
|
|
int run = 1;
|
|
int cleaned = 0;
|
|
|
|
Debug((DEBUG_NOTICE, "replacing %s -> %s in '%s'", badword->word, replacew, line));
|
|
|
|
while(run) {
|
|
pold = our_strcasestr(pold, badword->word);
|
|
if (!pold)
|
|
break;
|
|
if (replacen == -1)
|
|
replacen = strlen(replacew);
|
|
if (searchn == -1)
|
|
searchn = strlen(badword->word);
|
|
/* Hunt for start of word */
|
|
if (pold > line) {
|
|
for (startw = pold; (!iswseperator(*startw) && (startw != line)); startw--);
|
|
if (iswseperator(*startw))
|
|
startw++; /* Don't point at the space/seperator but at the word! */
|
|
} else {
|
|
startw = pold;
|
|
}
|
|
|
|
if (!(badword->type & BADW_TYPE_FAST_L) && (pold != startw)) {
|
|
/* not matched */
|
|
pold++;
|
|
continue;
|
|
}
|
|
|
|
/* Hunt for end of word
|
|
* Fix for bug #4909: word will be at least 'searchn' long so we can skip
|
|
* 'searchn' bytes and avoid stopping half-way the badword.
|
|
*/
|
|
for (endw = pold+searchn; ((*endw != '\0') && (!iswseperator(*endw))); endw++);
|
|
|
|
if (!(badword->type & BADW_TYPE_FAST_R) && (pold+searchn != endw)) {
|
|
/* not matched */
|
|
pold++;
|
|
continue;
|
|
}
|
|
|
|
cleaned = 1; /* still too soon? Syzop/20050227 */
|
|
|
|
/* Do we have any not-copied-yet data? */
|
|
if (poldx != startw) {
|
|
int tmp_n = startw - poldx;
|
|
if (pnew + tmp_n >= c_eol) {
|
|
/* Partial copy and return... */
|
|
memcpy(pnew, poldx, c_eol - pnew);
|
|
*c_eol = '\0';
|
|
return 1;
|
|
}
|
|
|
|
memcpy(pnew, poldx, tmp_n);
|
|
pnew += tmp_n;
|
|
}
|
|
/* Now update the word in buf (pnew is now something like startw-in-new-buffer */
|
|
|
|
if (replacen) {
|
|
if ((pnew + replacen) >= c_eol) {
|
|
/* Partial copy and return... */
|
|
memcpy(pnew, replacew, c_eol - pnew);
|
|
*c_eol = '\0';
|
|
return 1;
|
|
}
|
|
memcpy(pnew, replacew, replacen);
|
|
pnew += replacen;
|
|
}
|
|
poldx = pold = endw;
|
|
}
|
|
/* Copy the last part */
|
|
if (*poldx) {
|
|
strncpy(pnew, poldx, c_eol - pnew);
|
|
*(c_eol) = '\0';
|
|
} else {
|
|
*pnew = '\0';
|
|
}
|
|
return cleaned;
|
|
}
|
|
|
|
/*
|
|
* Returns a string, which has been filtered by the words loaded via
|
|
* the loadbadwords() function. It's primary use is to filter swearing
|
|
* in both private and public messages
|
|
*/
|
|
char *stripbadwords(char *str, ConfigItem_badword *start_bw, int *blocked)
|
|
{
|
|
static char cleanstr[4096];
|
|
char buf[4096];
|
|
char *ptr;
|
|
int matchlen, m, stringlen, cleaned;
|
|
ConfigItem_badword *this_word;
|
|
|
|
*blocked = 0;
|
|
|
|
if (!start_bw)
|
|
return str;
|
|
|
|
/*
|
|
* work on a copy
|
|
*/
|
|
stringlen = strlcpy(cleanstr, StripControlCodes(str), sizeof cleanstr);
|
|
matchlen = 0;
|
|
buf[0] = '\0';
|
|
cleaned = 0;
|
|
|
|
for (this_word = start_bw; this_word; this_word = this_word->next)
|
|
{
|
|
if (this_word->type & BADW_TYPE_FAST)
|
|
{
|
|
if (this_word->action == BADWORD_BLOCK)
|
|
{
|
|
if (fast_badword_match(this_word, cleanstr))
|
|
{
|
|
*blocked = 1;
|
|
return NULL;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int n;
|
|
/* fast_badword_replace() does size checking so we can use 512 here instead of 4096 */
|
|
n = fast_badword_replace(this_word, cleanstr, buf, 512);
|
|
if (!cleaned && n)
|
|
cleaned = n;
|
|
strcpy(cleanstr, buf);
|
|
memset(buf, 0, sizeof(buf)); /* regexp likes this somehow */
|
|
}
|
|
} else
|
|
if (this_word->type & BADW_TYPE_REGEX)
|
|
{
|
|
if (this_word->action == BADWORD_BLOCK)
|
|
{
|
|
pcre2_match_data *md = pcre2_match_data_create(9, NULL);
|
|
int ret;
|
|
|
|
ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
|
|
pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */
|
|
if (ret > 0)
|
|
{
|
|
*blocked = 1;
|
|
return NULL;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pcre2_match_data *md;
|
|
int ret;
|
|
PCRE2_SIZE *dd;
|
|
int start, end;
|
|
|
|
ptr = cleanstr; /* set pointer to start of string */
|
|
while(1) {
|
|
md = pcre2_match_data_create(9, NULL);
|
|
/* ^^ we need to free 'md' in ALL circumstances.
|
|
* remember this if you break or continue in this loop!
|
|
*/
|
|
ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
|
|
if (ret > 0)
|
|
{
|
|
ircd_log(LOG_ERROR, "pcre2_get_ovector_count: %d", pcre2_get_ovector_count(md));
|
|
dd = pcre2_get_ovector_pointer(md);
|
|
start = (int)dd[0];
|
|
end = (int)dd[1];
|
|
if ((start < 0) || (end < 0) || (start > strlen(ptr)) || (end > strlen(ptr)+1))
|
|
{
|
|
ircd_log(LOG_ERROR, "pcre2_match() returned an ovector with OOB start/end: %d/%d, str (%d): '%s'",
|
|
(int)start, (int)end, (int)strlen(ptr), ptr);
|
|
abort();
|
|
}
|
|
m = end - start;
|
|
if (m == 0)
|
|
{
|
|
pcre2_match_data_free(md);
|
|
break; /* anti-loop */
|
|
}
|
|
cleaned = 1;
|
|
matchlen += m;
|
|
strlncat(buf, ptr, sizeof buf, start);
|
|
if (this_word->replace)
|
|
strlcat(buf, this_word->replace, sizeof buf);
|
|
else
|
|
strlcat(buf, REPLACEWORD, sizeof buf);
|
|
ptr += end; /* Set pointer after the match pos */
|
|
pcre2_match_data_free(md);
|
|
continue; /* next! */
|
|
}
|
|
pcre2_match_data_free(md);
|
|
break; /* NOMATCH: we are done! */
|
|
}
|
|
/* All the better to eat you with! */
|
|
strlcat(buf, ptr, sizeof buf);
|
|
memcpy(cleanstr, buf, sizeof cleanstr);
|
|
memset(buf, 0, sizeof(buf));
|
|
if (matchlen == stringlen)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
cleanstr[511] = '\0'; /* cutoff, just to be sure */
|
|
|
|
return (cleaned) ? cleanstr : str;
|
|
}
|
|
|
|
/** Checks if the specified regex (or fast badwords) is valid.
|
|
* returns NULL in case of success [!],
|
|
* pointer to buffer with error message otherwise
|
|
* if check_broadness is 1, the function will attempt to determine
|
|
* if the given regex string is too broad (i.e. matches everything)
|
|
*/
|
|
char *badword_config_check_regex(char *str, int fastsupport, int check_broadness)
|
|
{
|
|
int regex=0;
|
|
char *tmp;
|
|
static char errorbuf[512];
|
|
|
|
if (fastsupport)
|
|
{
|
|
for (tmp = str; *tmp; tmp++) {
|
|
if (!isalnum(*tmp) && !(*tmp >= 128)) {
|
|
if ((str == tmp) && (*tmp == '*'))
|
|
continue;
|
|
if ((*(tmp + 1) == '\0') && (*tmp == '*'))
|
|
continue;
|
|
regex = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!fastsupport || regex)
|
|
{
|
|
int errorcode = 0;
|
|
PCRE2_SIZE erroroffset = 0;
|
|
pcre2_code *expr;
|
|
int options = 0;
|
|
char buf2[512];
|
|
|
|
options = PCRE2_CASELESS|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
|
|
|
|
expr = pcre2_compile(str, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL);
|
|
if (expr == NULL)
|
|
{
|
|
pcre2_get_error_message(errorcode, buf2, sizeof(buf2));
|
|
if (erroroffset > 0)
|
|
snprintf(errorbuf, sizeof(errorbuf), "%s (at character #%d)", buf2, (int)erroroffset);
|
|
else
|
|
strlcpy(errorbuf, buf2, sizeof(errorbuf));
|
|
return errorbuf;
|
|
}
|
|
pcre2_code_free(expr);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
int badword_config_process(ConfigItem_badword *ca, char *str)
|
|
{
|
|
char *tmp;
|
|
short regex = 0;
|
|
int ast_l = 0, ast_r = 0;
|
|
|
|
/* The fast badwords routine can do: "blah" "*blah" "blah*" and "*blah*",
|
|
* in all other cases use regex.
|
|
*/
|
|
for (tmp = str; *tmp; tmp++) {
|
|
if (!isalnum(*tmp) && !(*tmp >= 128)) {
|
|
if ((str == tmp) && (*tmp == '*')) {
|
|
ast_l = 1; /* Asterisk at the left */
|
|
continue;
|
|
}
|
|
if ((*(tmp + 1) == '\0') && (*tmp == '*')) {
|
|
ast_r = 1; /* Asterisk at the right */
|
|
continue;
|
|
}
|
|
regex = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (regex)
|
|
{
|
|
int errorcode = 0;
|
|
PCRE2_SIZE erroroffset = 0;
|
|
int options = 0;
|
|
|
|
ca->type = BADW_TYPE_REGEX;
|
|
safe_strdup(ca->word, str);
|
|
|
|
options = PCRE2_CASELESS|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
|
|
|
|
ca->pcre2_expr = pcre2_compile(str, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL);
|
|
if (ca->pcre2_expr == NULL)
|
|
{
|
|
/* This cannot happen since badword_config_check_regex()
|
|
* should be called from config_test on each regex.
|
|
*/
|
|
config_error("badword_config_process(): failed to compile regex '%s', this is impossible!", str);
|
|
abort();
|
|
}
|
|
pcre2_jit_compile(ca->pcre2_expr, PCRE2_JIT_COMPLETE);
|
|
}
|
|
else
|
|
{
|
|
char *tmpw;
|
|
ca->type = BADW_TYPE_FAST;
|
|
ca->word = tmpw = safe_alloc(strlen(str) - ast_l - ast_r + 1);
|
|
/* Copy except for asterisks */
|
|
for (tmp = str; *tmp; tmp++)
|
|
if (*tmp != '*')
|
|
*tmpw++ = *tmp;
|
|
*tmpw = '\0';
|
|
if (ast_l)
|
|
ca->type |= BADW_TYPE_FAST_L;
|
|
if (ast_r)
|
|
ca->type |= BADW_TYPE_FAST_R;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/** Frees a ConfigItem_badword item.
|
|
* Note that it does NOT remove from the list, you need
|
|
* to do this BEFORE calling this function.
|
|
*/
|
|
void badword_config_free(ConfigItem_badword *e)
|
|
{
|
|
safe_free(e->word);
|
|
safe_free(e->replace);
|
|
if (e->pcre2_expr)
|
|
pcre2_code_free(e->pcre2_expr);
|
|
safe_free(e);
|
|
}
|