RegExp.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2005 Palmsource, Inc.
00003  * 
00004  * This software is licensed as described in the file LICENSE, which
00005  * you should have received as part of this distribution. The terms
00006  * are also available at http://www.openbinder.org/license.html.
00007  * 
00008  * This software consists of voluntary contributions made by many
00009  * individuals. For the exact contribution history, see the revision
00010  * history and logs, available at http://www.openbinder.org
00011  */
00012 
00013 // @(#)regexp.c 1.3 of 18 April 87
00014 //
00015 //  Copyright (c) 1986 by University of Toronto.
00016 //  Written by Henry Spencer.  Not derived from licensed software.
00017 //
00018 //  Permission is granted to anyone to use this software for any
00019 //  purpose on any computer system, and to redistribute it freely,
00020 //  subject to the following restrictions:
00021 //
00022 //  1. The author is not responsible for the consequences of use of
00023 //      this software, no matter how awful, even if they arise
00024 //      from defects in it.
00025 //
00026 //  2. The origin of this software must not be misrepresented, either
00027 //      by explicit claim or by omission.
00028 //
00029 //  3. Altered versions must be plainly marked as such, and must not
00030 //      be misrepresented as being the original software.
00031 //
00032 // Beware that some of this code is subtly aware of the way operator
00033 // precedence is structured in regular expressions.  Serious changes in
00034 // regular-expression syntax might require a total rethink.
00035 //
00036 
00037 // ALTERED VERSION: Adapted to ANSI C and C++ for the OpenTracker
00038 // project (www.opentracker.org), Jul 11, 2000.
00039 
00040 // ALTERED VERSION: Modified to work as a Palm OS API, 2001-2004.
00041 
00042 #ifndef _REG_EXB_H
00043 #define _REG_EXB_H
00044 
00050 #include <support/Debug.h>
00051 #include <support/SupportDefs.h>
00052 
00053 #if _SUPPORTS_NAMESPACE
00054 namespace palmos {
00055 namespace support {
00056 #endif
00057 
00062 class SString;
00063 
00064 const int32_t kSubExpressionMax = 10;
00065 
00066 struct regexp 
00067 {
00068     const char *startp[kSubExpressionMax];
00069     const char *endp[kSubExpressionMax];
00070     char regstart;      /* Internal use only. See RegExp.cpp for details. */
00071     char reganch;       /* Internal use only. */
00072     const char *regmust;/* Internal use only. */
00073     int regmlen;        /* Internal use only. */
00074     char program[1];    /* Unwarranted chumminess with compiler. */
00075 };
00076 
00077 class SRegExp 
00078 {
00079 
00080 public:
00081     SRegExp();
00082     SRegExp(const char *);
00083     SRegExp(const SString &);
00084     ~SRegExp();
00085     
00086     status_t InitCheck() const;
00087     
00088     status_t SetTo(const char*);
00089     status_t SetTo(const SString &);
00090     
00091     bool Matches(const char *string) const;
00092     bool Matches(const SString &) const;
00093 
00094     bool Search(const char *text, int32_t searchStart, int32_t *matchStart, int32_t *matchEnd);
00095 
00096     int32_t RunMatcher(regexp *, const char *) const;
00097     regexp *Compile(const char *);
00098     regexp *Expression() const;
00099     const char *ErrorString() const;
00100 
00101 #if DEBUG
00102     void Dump();
00103 #endif
00104 
00105 private:
00106 
00107     void SetError(status_t error) const;
00108 
00109     // Working functions for Compile():
00110     char *Reg(int32_t, int32_t *);
00111     char *Branch(int32_t *);
00112     char *Piece(int32_t *);
00113     char *Atom(int32_t *);
00114     char *Node(char);
00115     char *Next(char *);
00116     const char *Next(const char *) const;
00117     void EmitChar(char);
00118     void Insert(char, char *);
00119     void Tail(char *, char *);
00120     void OpTail(char *, char *);
00121 
00122     // Working functions for RunMatcher():
00123     int32_t Try(regexp *, const char *) const;
00124     int32_t Match(const char *) const;
00125     int32_t Repeat(const char *) const;
00126 
00127     // Utility functions:
00128 #if DEBUG
00129     char *Prop(const char *) const;
00130     void RegExpError(const char *) const;
00131 #endif
00132     inline int32_t UCharAt(const char *p) const;
00133     inline char *Operand(char* p) const;
00134     inline const char *Operand(const char* p) const;
00135     inline bool IsMult(char c) const;
00136 
00137 // --------- Variables -------------
00138 
00139     mutable status_t fError;
00140     regexp *fRegExp;
00141 
00142     // Work variables for Compile().
00143 
00144     const char *fInputScanPointer;
00145     int32_t fParenthesisCount;      
00146     char fDummy;
00147     char *fCodeEmitPointer;     // &fDummy = don't.
00148     long fCodeSize;     
00149 
00150     // Work variables for RunMatcher().
00151 
00152     mutable const char *fStringInputPointer;
00153     mutable const char *fRegBol;    // Beginning of input, for ^ check.
00154     mutable const char **fStartPArrayPointer;
00155     mutable const char **fEndPArrayPointer;
00156 };
00157 
00160 #if _SUPPORTS_NAMESPACE
00161 } }
00162 #endif
00163 
00164 #endif