freeswitch/libs/sofia-sip/libsofia-sip-ua/su/su_bm.c

274 lines
6.4 KiB
C

/*
* This file is part of the Sofia-SIP package
*
* Copyright (C) 2005 Nokia Corporation.
*
* Contact: Pekka Pessi <pekka.pessi@nokia.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
*/
/**@internal
* @file su_bm.c
* @brief Search with Boyer-Moore algorithm
*
* @author Pekka Pessi <Pekka.Pessi@nokia.com>
*
* @date Created: Mon Apr 11 16:35:16 2005 ppessi
*
*/
#include "config.h"
#include <sofia-sip/su_bm.h>
#include <sys/types.h>
#include <stddef.h>
#include <limits.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#ifndef TORTURELOG
#define TORTURELOG(x) (void)0
#endif
struct bw_fwd_table {
unsigned char table[UCHAR_MAX + 1];
};
/** Build forward skip table #bm_fwd_table_t for Boyer-Moore algorithm. */
static
bm_fwd_table_t *
bm_memmem_study0(char const *needle, size_t nlen, bm_fwd_table_t *fwd)
{
size_t i;
if (nlen >= UCHAR_MAX) {
needle += nlen - UCHAR_MAX;
nlen = UCHAR_MAX;
}
memset(&fwd->table, (unsigned char)nlen, sizeof fwd->table);
for (i = 0; i < nlen; i++) {
fwd->table[(unsigned short)needle[i]] = (unsigned char)(nlen - i - 1);
}
return fwd;
}
/** @defgroup su_bm Fast string searching with Boyer-Moore algorithm
*
* The Boyer-Moore algorithm is used to implement fast substring search. The
* algorithm has some overhead caused by filling a table. Substring search
* then requires at most 1 / substring-length less string comparisons. On
* modern desktop hardware, Boyer-Moore algorithm is seldom faster than the
* naive implementation if the searched substring is shorter than the cache
* line.
*
*/
/**@ingroup su_bm
* @typedef struct bw_fwd_table bm_fwd_table_t;
*
* Forward skip table for Boyer-Moore algorithm.
*
*/
/** Build case-sensitive forward skip table #bm_fwd_table_t
* for Boyer-Moore algorithm.
* @ingroup su_bm
*/
bm_fwd_table_t *
bm_memmem_study(char const *needle, size_t nlen)
{
bm_fwd_table_t *fwd = malloc(sizeof *fwd);
if (fwd)
bm_memmem_study0(needle, nlen, fwd);
return fwd;
}
/** Search for a substring using Boyer-Moore algorithm.
* @ingroup su_bm
*/
char *
bm_memmem(char const *haystack, size_t hlen,
char const *needle, size_t nlen,
bm_fwd_table_t *fwd)
{
size_t i, j;
bm_fwd_table_t fwd0[1];
if (nlen == 0)
return (char *)haystack;
if (needle == NULL || haystack == NULL || nlen > hlen)
return NULL;
if (nlen == 1) {
for (i = 0; i < hlen; i++)
if (haystack[i] == needle[0])
return (char *)haystack + i;
return NULL;
}
if (!fwd)
fwd = bm_memmem_study0(needle, nlen, fwd0);
for (i = j = nlen - 1; i < hlen;) {
unsigned char h = haystack[i];
if (h == needle[j]) {
TORTURELOG(("match \"%s\" at %u\nwith %*s\"%.*s*%s\": %s\n",
haystack, (unsigned)i,
(int)(i - j), "", (int)j, needle, needle + j + 1,
j == 0 ? "match!" : "back by 1"));
if (j == 0)
return (char *)haystack + i;
i--, j--;
}
else {
if (fwd->table[h] > nlen - j) {
TORTURELOG(("match \"%s\" at %u\n"
"last %*s\"%.*s*%s\": (by %u)\n",
haystack, (unsigned)i,
(int)(i - j), "",
(int)j, needle, needle + j + 1, fwd->table[h]));
i += fwd->table[h];
}
else {
TORTURELOG(("match \"%s\" at %u\n"
"2nd %*s\"%.*s*%s\": (by %u)\n",
haystack, (unsigned)i,
(int)(i - j), "",
(int)j, needle, needle + j + 1, (unsigned)(nlen - j)));
i += nlen - j;
}
j = nlen - 1;
}
}
return NULL;
}
/** Build forward skip table for Boyer-Moore algorithm */
static
bm_fwd_table_t *
bm_memcasemem_study0(char const *needle, size_t nlen, bm_fwd_table_t *fwd)
{
size_t i;
if (nlen >= UCHAR_MAX) {
needle += nlen - UCHAR_MAX;
nlen = UCHAR_MAX;
}
for (i = 0; i < UCHAR_MAX; i++)
fwd->table[i] = (unsigned char)nlen;
for (i = 0; i < nlen; i++) {
unsigned char n = tolower(needle[i]);
fwd->table[n] = (unsigned char)(nlen - i - 1);
}
return fwd;
}
/** Build case-insensitive forward skip table for Boyer-Moore algorithm.
* @ingroup su_bm
*/
bm_fwd_table_t *
bm_memcasemem_study(char const *needle, size_t nlen)
{
bm_fwd_table_t *fwd = malloc(sizeof *fwd);
if (fwd)
bm_memcasemem_study0(needle, nlen, fwd);
return fwd;
}
/** Search for substring using Boyer-Moore algorithm.
* @ingroup su_bm
*/
char *
bm_memcasemem(char const *haystack, size_t hlen,
char const *needle, size_t nlen,
bm_fwd_table_t *fwd)
{
size_t i, j;
bm_fwd_table_t fwd0[1];
if (nlen == 0)
return (char *)haystack;
if (needle == 0 || haystack == 0 || nlen > hlen)
return NULL;
if (nlen == 1) {
for (i = 0; i < hlen; i++)
if (haystack[i] == needle[0])
return (char *)haystack + i;
return NULL;
}
if (!fwd) {
fwd = bm_memcasemem_study0(needle, nlen, fwd0);
}
for (i = j = nlen - 1; i < hlen;) {
unsigned char h = haystack[i], n = needle[j];
if (isupper(h))
h = tolower(h);
if (isupper(n))
n = tolower(n);
if (h == n) {
TORTURELOG(("match \"%s\" at %u\n"
"with %*s\"%.*s*%s\": %s\n",
haystack, (unsigned)i,
(int)(i - j), "", (int)j, needle, needle + j + 1,
j == 0 ? "match!" : "back by 1"));
if (j == 0)
return (char *)haystack + i;
i--, j--;
}
else {
if (fwd->table[h] > nlen - j) {
TORTURELOG(("match \"%s\" at %u\n"
"last %*s\"%.*s*%s\": (by %u)\n",
haystack, (unsigned)i,
(int)(i - j), "", (int)j, needle, needle + j + 1,
fwd->table[h]));
i += fwd->table[h];
}
else {
TORTURELOG(("match \"%s\" at %u\n"
"2nd %*s\"%.*s*%s\": (by %u)\n",
haystack, (unsigned)i,
(int)(i - j), "", (int)j, needle, needle + j + 1,
(unsigned)(nlen - j)));
i += nlen - j;
}
j = nlen - 1;
}
}
return NULL;
}