274 lines
6.4 KiB
C
274 lines
6.4 KiB
C
/*
|
|
* This file is part of the Sofia-SIP package
|
|
*
|
|
* Copyright (C) 2005 Nokia Corporation.
|
|
*
|
|
* Contact: Pekka Pessi <pekka.pessi@nokia.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public License
|
|
* as published by the Free Software Foundation; either version 2.1 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
|
* 02110-1301 USA
|
|
*
|
|
*/
|
|
|
|
/**@internal
|
|
* @file su_bm.c
|
|
* @brief Search with Boyer-Moore algorithm
|
|
*
|
|
* @author Pekka Pessi <Pekka.Pessi@nokia.com>
|
|
*
|
|
* @date Created: Mon Apr 11 16:35:16 2005 ppessi
|
|
*
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
#include <sofia-sip/su_bm.h>
|
|
|
|
#include <sys/types.h>
|
|
#include <stddef.h>
|
|
#include <limits.h>
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#ifndef TORTURELOG
|
|
#define TORTURELOG(x) (void)0
|
|
#endif
|
|
|
|
struct bw_fwd_table {
|
|
unsigned char table[UCHAR_MAX + 1];
|
|
};
|
|
|
|
/** Build forward skip table #bm_fwd_table_t for Boyer-Moore algorithm. */
|
|
static
|
|
bm_fwd_table_t *
|
|
bm_memmem_study0(char const *needle, size_t nlen, bm_fwd_table_t *fwd)
|
|
{
|
|
size_t i;
|
|
|
|
if (nlen >= UCHAR_MAX) {
|
|
needle += nlen - UCHAR_MAX;
|
|
nlen = UCHAR_MAX;
|
|
}
|
|
|
|
memset(&fwd->table, (unsigned char)nlen, sizeof fwd->table);
|
|
|
|
for (i = 0; i < nlen; i++) {
|
|
fwd->table[(unsigned short)needle[i]] = (unsigned char)(nlen - i - 1);
|
|
}
|
|
|
|
return fwd;
|
|
}
|
|
|
|
/** @defgroup su_bm Fast string searching with Boyer-Moore algorithm
|
|
*
|
|
* The Boyer-Moore algorithm is used to implement fast substring search. The
|
|
* algorithm has some overhead caused by filling a table. Substring search
|
|
* then requires at most 1 / substring-length less string comparisons. On
|
|
* modern desktop hardware, Boyer-Moore algorithm is seldom faster than the
|
|
* naive implementation if the searched substring is shorter than the cache
|
|
* line.
|
|
*
|
|
*/
|
|
|
|
/**@ingroup su_bm
|
|
* @typedef struct bw_fwd_table bm_fwd_table_t;
|
|
*
|
|
* Forward skip table for Boyer-Moore algorithm.
|
|
*
|
|
*/
|
|
|
|
/** Build case-sensitive forward skip table #bm_fwd_table_t
|
|
* for Boyer-Moore algorithm.
|
|
* @ingroup su_bm
|
|
*/
|
|
bm_fwd_table_t *
|
|
bm_memmem_study(char const *needle, size_t nlen)
|
|
{
|
|
bm_fwd_table_t *fwd = malloc(sizeof *fwd);
|
|
|
|
if (fwd)
|
|
bm_memmem_study0(needle, nlen, fwd);
|
|
|
|
return fwd;
|
|
}
|
|
|
|
/** Search for a substring using Boyer-Moore algorithm.
|
|
* @ingroup su_bm
|
|
*/
|
|
char *
|
|
bm_memmem(char const *haystack, size_t hlen,
|
|
char const *needle, size_t nlen,
|
|
bm_fwd_table_t *fwd)
|
|
{
|
|
size_t i, j;
|
|
bm_fwd_table_t fwd0[1];
|
|
|
|
if (nlen == 0)
|
|
return (char *)haystack;
|
|
if (needle == NULL || haystack == NULL || nlen > hlen)
|
|
return NULL;
|
|
|
|
if (nlen == 1) {
|
|
for (i = 0; i < hlen; i++)
|
|
if (haystack[i] == needle[0])
|
|
return (char *)haystack + i;
|
|
return NULL;
|
|
}
|
|
|
|
if (!fwd)
|
|
fwd = bm_memmem_study0(needle, nlen, fwd0);
|
|
|
|
for (i = j = nlen - 1; i < hlen;) {
|
|
unsigned char h = haystack[i];
|
|
if (h == needle[j]) {
|
|
TORTURELOG(("match \"%s\" at %u\nwith %*s\"%.*s*%s\": %s\n",
|
|
haystack, (unsigned)i,
|
|
(int)(i - j), "", (int)j, needle, needle + j + 1,
|
|
j == 0 ? "match!" : "back by 1"));
|
|
if (j == 0)
|
|
return (char *)haystack + i;
|
|
i--, j--;
|
|
}
|
|
else {
|
|
if (fwd->table[h] > nlen - j) {
|
|
TORTURELOG(("match \"%s\" at %u\n"
|
|
"last %*s\"%.*s*%s\": (by %u)\n",
|
|
haystack, (unsigned)i,
|
|
(int)(i - j), "",
|
|
(int)j, needle, needle + j + 1, fwd->table[h]));
|
|
i += fwd->table[h];
|
|
}
|
|
else {
|
|
TORTURELOG(("match \"%s\" at %u\n"
|
|
"2nd %*s\"%.*s*%s\": (by %u)\n",
|
|
haystack, (unsigned)i,
|
|
(int)(i - j), "",
|
|
(int)j, needle, needle + j + 1, (unsigned)(nlen - j)));
|
|
i += nlen - j;
|
|
}
|
|
j = nlen - 1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/** Build forward skip table for Boyer-Moore algorithm */
|
|
static
|
|
bm_fwd_table_t *
|
|
bm_memcasemem_study0(char const *needle, size_t nlen, bm_fwd_table_t *fwd)
|
|
{
|
|
size_t i;
|
|
|
|
if (nlen >= UCHAR_MAX) {
|
|
needle += nlen - UCHAR_MAX;
|
|
nlen = UCHAR_MAX;
|
|
}
|
|
|
|
for (i = 0; i < UCHAR_MAX; i++)
|
|
fwd->table[i] = (unsigned char)nlen;
|
|
|
|
for (i = 0; i < nlen; i++) {
|
|
unsigned char n = tolower(needle[i]);
|
|
fwd->table[n] = (unsigned char)(nlen - i - 1);
|
|
}
|
|
|
|
return fwd;
|
|
}
|
|
|
|
/** Build case-insensitive forward skip table for Boyer-Moore algorithm.
|
|
* @ingroup su_bm
|
|
*/
|
|
bm_fwd_table_t *
|
|
bm_memcasemem_study(char const *needle, size_t nlen)
|
|
{
|
|
bm_fwd_table_t *fwd = malloc(sizeof *fwd);
|
|
|
|
if (fwd)
|
|
bm_memcasemem_study0(needle, nlen, fwd);
|
|
|
|
return fwd;
|
|
}
|
|
|
|
/** Search for substring using Boyer-Moore algorithm.
|
|
* @ingroup su_bm
|
|
*/
|
|
char *
|
|
bm_memcasemem(char const *haystack, size_t hlen,
|
|
char const *needle, size_t nlen,
|
|
bm_fwd_table_t *fwd)
|
|
{
|
|
size_t i, j;
|
|
bm_fwd_table_t fwd0[1];
|
|
|
|
if (nlen == 0)
|
|
return (char *)haystack;
|
|
if (needle == 0 || haystack == 0 || nlen > hlen)
|
|
return NULL;
|
|
|
|
if (nlen == 1) {
|
|
for (i = 0; i < hlen; i++)
|
|
if (haystack[i] == needle[0])
|
|
return (char *)haystack + i;
|
|
return NULL;
|
|
}
|
|
|
|
if (!fwd) {
|
|
fwd = bm_memcasemem_study0(needle, nlen, fwd0);
|
|
}
|
|
|
|
for (i = j = nlen - 1; i < hlen;) {
|
|
unsigned char h = haystack[i], n = needle[j];
|
|
if (isupper(h))
|
|
h = tolower(h);
|
|
if (isupper(n))
|
|
n = tolower(n);
|
|
|
|
if (h == n) {
|
|
TORTURELOG(("match \"%s\" at %u\n"
|
|
"with %*s\"%.*s*%s\": %s\n",
|
|
haystack, (unsigned)i,
|
|
(int)(i - j), "", (int)j, needle, needle + j + 1,
|
|
j == 0 ? "match!" : "back by 1"));
|
|
if (j == 0)
|
|
return (char *)haystack + i;
|
|
i--, j--;
|
|
}
|
|
else {
|
|
if (fwd->table[h] > nlen - j) {
|
|
TORTURELOG(("match \"%s\" at %u\n"
|
|
"last %*s\"%.*s*%s\": (by %u)\n",
|
|
haystack, (unsigned)i,
|
|
(int)(i - j), "", (int)j, needle, needle + j + 1,
|
|
fwd->table[h]));
|
|
i += fwd->table[h];
|
|
}
|
|
else {
|
|
TORTURELOG(("match \"%s\" at %u\n"
|
|
"2nd %*s\"%.*s*%s\": (by %u)\n",
|
|
haystack, (unsigned)i,
|
|
(int)(i - j), "", (int)j, needle, needle + j + 1,
|
|
(unsigned)(nlen - j)));
|
|
i += nlen - j;
|
|
}
|
|
j = nlen - 1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|