engine/jtree.c

Go to the documentation of this file.
00001 /**************************************
00002 * This is a multi-string matching
00003 * algorithm based on a Boyer-Moore
00004 * string match.  It matches multiple
00005 * substrings in a candidate string
00006 *
00007 * Jason Larsen
00008 * 4/21/03
00009 ***************************************/
00010 #include "jtree.h"
00011 #include <string.h>
00012 #include <stdlib.h>
00013 #include <stdio.h>
00014 #include <ctype.h>
00015 #include "bits.h"
00016 
00017 //#define DEBUGBUILD
00018 //#define DEBUGMATCH
00019 //#define DEBUGFINAL
00020 //#define DEBUGESCAPE
00021 
00022 #ifdef DEBUGFINAL
00023 int     node_count=0;
00024 #endif
00025 
00026 #ifdef DEBUGESCAPE
00027 #define DBGESCAPE(a)    DBG(a)
00028 #else
00029 #define DBGESCAPE(a)
00030 #endif
00031 
00032 
00033 int calloc_count = 0;
00034 int free_count = 0;
00035 int FreeNode(JNode* n);
00036 
00037 extern GlobalVars       Globals;
00038 
00042 int InitJTree(JTree* j, char NoCase)
00043 {
00044         DEBUGPATH;
00045 
00046         bzero(j, sizeof(JTree));
00047         
00048         j->NoCase = NoCase;
00049         
00050         return TRUE;
00051 }
00052 
00056 int AddStringJTreeReal(JTree* j, unsigned char* String, int SLen, int RuleID)
00057 {
00058         JNode*  node;
00059         int             i;
00060 
00061         DEBUGPATH;
00062 
00063         if (!j) return FALSE;
00064         if (!String) return FALSE;
00065         if (SLen>MAX_STRING_LEN) return FALSE;
00066 
00067         j->IsFinalized = FALSE;
00068         
00069         /* if this is the first string in the tree... */
00070         if (!j->Head) {
00071 #ifdef DEBUGBUILD
00072                 printf("First String in tree\n");
00073 #endif  
00074                 j->Head = calloc(sizeof(JNode),1);
00075                 if (!j->Head) {
00076                         printf("Out of memory\n");
00077                         return FALSE;
00078                 }
00079 #ifdef DEBUGFINAL               
00080                 node_count++;
00081 #endif          
00082         }
00083         
00084         node = j->Head;
00085         
00086         for (i=0;i<SLen;i++) {
00087                 if (!node->Bytes[String[i]]) {
00088 #ifdef DEBUGBUILD
00089                         printf("Adding Node for byte %c\n",String[i]);
00090 #endif                  
00091                         node->Bytes[String[i]] = calloc(sizeof(JNode),1);
00092                         calloc_count++;
00093                         if (!node->Bytes[String[i]]) {
00094                                 printf("Out of memory\n");
00095                                 return FALSE;
00096                         }
00097 #ifdef DEBUGFINAL                       
00098                         node_count++;
00099 #endif                  
00100                         
00101                         /*in a nocase tree, point both cases to the same node*/
00102                         if (j->NoCase) {
00103                                 node->Bytes[tolower(String[i])]=node->Bytes[String[i]];
00104                                 node->Bytes[toupper(String[i])]=node->Bytes[String[i]];
00105                         }
00106                         
00107                         node->Count++;
00108                 }
00109                 node = node->Bytes[String[i]];
00110                 node->temp = String[i];
00111         }
00112         
00113         node->IsTerminal = TRUE;
00114         
00115         /*set the bit in the mask*/
00116         SetBit(j->DependMask, Globals.NumRules, RuleID, 1);
00117         //SetBit(node->TerminalMask, Globals.NumRules, RuleID, 1);
00118         node->TerminalRuleID = RuleID;
00119         
00120         return TRUE;
00121 }
00122 
00126 int AddStringJTree(JTree* j, unsigned char* String, int SLen, int RuleID)
00127 {
00128         unsigned char   Buff[MAX_STRING_LEN+1];
00129         int             BuffLen;
00130         int             i;
00131         int             IsBinary;
00132         
00133         char            BinBuff[6];
00134         int             BinChar;
00135         
00136         DEBUGPATH;
00137 
00138         /* apply the escape decoding */
00139         IsBinary = FALSE;
00140         BuffLen = 0;
00141         for (i=0;i<SLen;i++) {
00142                 if (String[i] == 0x00) break;
00143                 if (String[i] == '|') {
00144                         if (String[i+1] == '|') {
00145                                 DBGESCAPE(PRINTERROR("Literal Pipe\n"));
00146                                 Buff[BuffLen]='|';
00147                                 BuffLen++;
00148                         } else {
00149                                 if (IsBinary) {
00150                                         DBGESCAPE(PRINTERROR("Switching to text mode\n"));
00151                                         IsBinary = FALSE;
00152                                 } else {
00153                                         DBGESCAPE(PRINTERROR("Switching to binary mode\n"));
00154                                         IsBinary = TRUE;
00155                                 }
00156                         }
00157                 } else {
00158                         if (IsBinary) {
00159                                 while (String[i] == ' ') i++;
00160                                 if (String[i] == 0x00) {
00161                                         PRINTERROR("Unexpected end of string. Expected |\n");
00162                                         return FALSE;
00163                                 }
00164                                 
00165                                 BinBuff[0] = String[i];
00166                                 BinBuff[1] = String[i+1];
00167                                 BinBuff[2] = 0x00;
00168                                 
00169                                 if ( (BinBuff[0] == '|') || (BinBuff[1] == '|')) {
00170                                         PRINTERROR1("Parse Error \"%s\"\n", BinBuff);
00171                                         return FALSE;
00172                                 }
00173                                 
00174                                 BinChar = strtoul(BinBuff, NULL, 16);
00175                                 
00176                                 DBGESCAPE(PRINTERROR1("Adding binary character %02X\n", BinChar));
00177                                 Buff[BuffLen]=BinChar;
00178 
00179                                 BuffLen++;
00180                                 i++;
00181                         } else {
00182                                 DBGESCAPE(PRINTERROR("Adding literal character %c\n",String[i]));
00183                                 Buff[BuffLen] = String[i];
00184                                 BuffLen++;
00185                         }
00186                 }
00187         }
00188 
00189         DBGESCAPE(PRINTERROR1("Buff is %s\n",Buff));
00190         DBGESCAPE(PRINTERROR1("BuffLen is %i\n", BuffLen));
00191         
00192         /* really add it */
00193         return AddStringJTreeReal(j, Buff, BuffLen, RuleID);
00194 }
00195 
00196 
00200 JNode* FindOptimalNode(JTree* j, JNode* n, unsigned char* String, int SLen)
00201 {
00202         int             i,k;
00203         unsigned char*  s;
00204         JNode*          optimal;
00205         JNode*          node;
00206         
00207         DEBUGPATH;
00208 
00209 #ifdef DEBUGBUILD
00210         printf("(%s)->",String);
00211 #endif
00212         
00213         /*the default is always to return to the root node*/
00214         optimal=j->Head;
00215 
00216         /*go search for a better node to return to*/
00217         if (SLen!=0){
00218                 for (i=SLen-1;i>0;i--){
00219 #ifdef DEBUGBUILD
00220                         printf("Testing char %c\n",String[i]);
00221 #endif                  
00222                         node=j->Head;
00223                         k=0;
00224                         while (node){
00225                                 if (node->Bytes[String[i+k]]){
00226 #ifdef DEBUGBUILD
00227                                         printf("Match at %c\n",String[i+k]);
00228 #endif                                  
00229                                         if (i+k==SLen-1){
00230 #ifdef DEBUGBUILD
00231                                                 printf("New Optimal found\n");
00232 #endif                                          
00233                                                 optimal=node->Bytes[String[i+k]];
00234                                                 break;
00235                                         }
00236                                         node=node->Bytes[String[i+k]];
00237                                         k++;
00238                                 }else{
00239                                         break;
00240                                 }
00241                                 
00242                         }
00243                 }
00244         }
00245 
00246 #ifdef DEBUGBUILD
00247         if (optimal==j->Head){
00248                 printf("Root\n");
00249         }
00250 #endif
00251         
00252         /*find the optimal nodes for the children*/
00253         for (i=0;i<256;i++){
00254                 /*skip the upper case section in nocase trees*/
00255                 if (j->NoCase){
00256                         if (i==65) i=91;
00257                 }
00258                 
00259                 if (n->Bytes[i]){
00260                         s=calloc(sizeof(char), MAX_STRING_LEN+1);
00261                         if (!s){
00262                                 printf("Out of memory\n");
00263                                 return j->Head;
00264                         }
00265                         memcpy(s, String, SLen);
00266                         s[SLen]=i;
00267                         n->Bytes[i]->FailNode=FindOptimalNode(j,n->Bytes[i], s, SLen+1);
00268                         if (s) free(s);
00269                 }
00270         }
00271 
00272         return optimal;
00273 }
00274 
00275 
00276 /******************************************
00277 * Swap full nodes for short nodes to 
00278 * save memory
00279 ******************************************/
00280 int ConvertNode(JNode* n, JNode** Parent, int NoCase){
00281         int             i;
00282         SJNode* Short;
00283         static int              SCount=0;
00284         static int              LCount=0;
00285         
00286         DEBUGPATH;
00287 
00288         printf("This node has %i subnodes \"%c\"\n",n->Count,n->temp);
00289 
00290         /*first convert all the subnodes so the leaves*/
00291         /*get converted first*/
00292         
00293         for (i=0;i<256;i++){
00294                 if (NoCase){
00295                         if (i==65) i=91;
00296                 }
00297 
00298                 if ( (n->Bytes[i]) && (!ConvertNode(n->Bytes[i], &n->Bytes[i], NoCase)) )
00299                         return FALSE;
00300         }
00301         
00302         /*root node is always a normal node*/
00303         if (!Parent) return TRUE;
00304         
00305         /*now convert this node*/
00306         if (n->Count<2){
00307                 Short=calloc(sizeof(SJNode),1);
00308                 Short->NodeType=NODE_TYPE_SHORT;
00309                 if (n->Count==0){
00310                         Short->PassNode=NULL;
00311                 }else{
00312                         for (i=0;i<256;i++)
00313                                 if (n->Bytes[i]){
00314                                         Short->PassNode=n->Bytes[i];
00315                                         break;
00316                                 }
00317                 }
00318                 Short->temp=n->temp;
00319                 Short->Byte=n->temp;
00320                 Short->FailNode=n->FailNode;
00321                 Short->IsTerminal=n->IsTerminal;
00322                 Short->TerminalRuleID=n->TerminalRuleID;
00323                 
00324                 free(*Parent);
00325                 *Parent=(JNode*)Short;
00326                 *Parent=NULL;
00327                 
00328                 SCount++;
00329         }else{
00330                 LCount++;
00331         }
00332 
00333         printf("There are %i short and %i long\n",SCount, LCount);
00334 
00335         return TRUE;
00336 }
00337 
00338 int FreeNode(JNode* n){
00339         int i;
00340         
00341         for (i=0;i<256;i++){            
00342                 if (n->Bytes[i]){
00343                         FreeNode(n->Bytes[i]);
00344                         n->Bytes[i]=NULL;
00345                 }
00346         }
00347 
00348         free(n);
00349         free_count++;
00350         
00351         return TRUE;
00352         
00353 }
00354 
00355 /******************************************
00356 * Swap full nodes for short nodes to 
00357 * save memory
00358 ******************************************/
00359 int CompressJTree(JTree* j){
00360 
00361         DEBUGPATH;
00362 
00363         return ConvertNode(j->Head, NULL, j->NoCase);
00364 }
00365 
00366 /******************************************
00367 * Fill in the FailNode 
00368 * entries
00369 *******************************************/
00370 int FinalizeJTree(JTree* j){
00371         
00372         DEBUGPATH;
00373 
00374         if (!j) return FALSE;
00375         if (!j->Head){
00376                 printf("Tree is empty\n");
00377                 return FALSE;
00378         }
00379 
00380 #ifdef DEBUGFINAL
00381         printf("finding optimal node for %i nodes\n",node_count);
00382 #endif
00383 
00384         j->Head->FailNode=FindOptimalNode(j, j->Head, NULL, 0);
00385 
00386 
00387 /*
00388         printf("Compressing tree\n");
00389         if (!CompressJTree(j)){
00390                 printf("Couldn't compress tree\n");
00391                 return FALSE;
00392         }
00393 */
00394         
00395         return TRUE;
00396 }
00397 
00398 /*****************************************************
00399 * See which of the substrings exist in the string
00400 *****************************************************/
00401 int MatchStrings(JTree* j, unsigned char* PacketRuleBits, unsigned char* String, int SLen){
00402         JNode*                  node;
00403         int                             i;
00404         unsigned char   LocalDepend[MAX_RULES/8];
00405         
00406         DEBUGPATH;
00407 
00408         memcpy(LocalDepend, j->DependMask, MAX_RULES/8);
00409 
00410         node=j->Head;
00411         for (i=0;i<SLen;i++){
00412 #ifdef DEBUGMATCH       
00413                 if (node==j->Head){
00414                         printf("on <Root> looking for %c\n", String[i]);
00415                 }else{
00416                         printf("on %c      looking for %c\n", node->temp, String[i]);
00417                 }
00418 #endif          
00419                 if (node->Bytes[String[i]]){
00420                         node=node->Bytes[String[i]];
00421                         if (node->IsTerminal){
00422 #ifdef DEBUG                    
00423                                 printf("Match on %c\n",String[i]);
00424 #endif                          
00425                                 SetBit(LocalDepend, Globals.NumRules, node->TerminalRuleID, 0);
00426                         }
00427                 }else{
00428 
00429                   // original code! including Boyer moore optimization:
00430                         //node=node->FailNode;
00431                         //if (node!=j->Head) i--;
00432 
00433                         node=j->Head;
00434                         if(node->Bytes[String[i]]) i--;
00435 
00436 
00437                 }
00438         }
00439 
00440         NotAndBitFields(PacketRuleBits, LocalDepend, PacketRuleBits, Globals.NumRules);
00441 
00442         return TRUE;
00443 }

Generated on Sat Jul 7 23:33:10 2007 for HLBR by  doxygen 1.5.2