OpenBinder: TextMgr.h Source File

00001 /*
00002  * Copyright (c) 2005 Palmsource, Inc.
00003  * 
00004  * This software is licensed as described in the file LICENSE, which
00005  * you should have received as part of this distribution. The terms
00006  * are also available at http://www.openbinder.org/license.html.
00007  * 
00008  * This software consists of voluntary contributions made by many
00009  * individuals. For the exact contribution history, see the revision
00010  * history and logs, available at http://www.openbinder.org
00011  */
00012 
00013 #ifndef _TEXTMGR_H_
00014 #define _TEXTMGR_H_
00015 
00016 #include <PalmTypes.h>
00017 #include <Chars.h>
00018 
00019 /***********************************************************************
00020  * Public types & constants
00021  ***********************************************************************/
00022 
00023 typedef uint16_t CharEncodingType;
00024 
00025 // Maximum length of any character encoding name.
00026 #define maxEncodingNameLength   40
00027 
00028 /* Various character encodings supported by the PalmOS. Actually these
00029 are a mixture of repetoires (coded character sets) and character encodings
00030 (CES - character encoding standard). Many, however, are some of both (e.g.
00031 CP932 is the Shift-JIS encoding of three JIS character sets + Microsoft's
00032 extensions).
00033 
00034 WARNING!!!!!
00035 This character code section must match the analogous section of LocaleModule.rh!
00036 You must also update the encoding flags array inside TxtGetEncodingFlags
00037     whenever you define new encodings.
00038 */
00039 #define CHAR_ENCODING_VALUE(value) ((CharEncodingType)value)
00040 
00041 // Unknown to this version of PalmOS.
00042 #define charEncodingUnknown     CHAR_ENCODING_VALUE(0)
00043 
00044 // Maximum character encoding _currently_ defined
00045 #define charEncodingMax         CHAR_ENCODING_VALUE(91)
00046 
00047 // Latin Palm OS character encoding, and subsets.
00048 // PalmOS variant of CP1252, with 10 extra Greek characters
00049 #define charEncodingPalmGSM     CHAR_ENCODING_VALUE(78)
00050 // PalmOS version of CP1252
00051 #define charEncodingPalmLatin   CHAR_ENCODING_VALUE(3)
00052 // Windows variant of 8859-1
00053 #define charEncodingCP1252      CHAR_ENCODING_VALUE(7)
00054 // ISO 8859 Part 1
00055 #define charEncodingISO8859_1   CHAR_ENCODING_VALUE(2)
00056 // ISO 646-1991
00057 #define charEncodingAscii       CHAR_ENCODING_VALUE(1)
00058 
00059 // Japanese Palm OS character encoding, and subsets.
00060 // PalmOS version of CP932
00061 #define charEncodingPalmSJIS    CHAR_ENCODING_VALUE(5)
00062 // Windows variant of ShiftJIS
00063 #define charEncodingCP932       CHAR_ENCODING_VALUE(8)
00064 // Encoding for JIS 0208-1990 + 1-byte katakana
00065 #define charEncodingShiftJIS    CHAR_ENCODING_VALUE(4)
00066 
00067 // Unicode character encodings
00068 #define charEncodingUCS2        CHAR_ENCODING_VALUE(9)
00069 #define charEncodingUTF8        CHAR_ENCODING_VALUE(6)
00070 #define charEncodingUTF7        CHAR_ENCODING_VALUE(24)
00071 #define charEncodingUTF16       CHAR_ENCODING_VALUE(75)
00072 #define charEncodingUTF16BE     CHAR_ENCODING_VALUE(76)
00073 #define charEncodingUTF16LE     CHAR_ENCODING_VALUE(77)
00074 #define charEncodingUTF32       CHAR_ENCODING_VALUE(84)
00075 #define charEncodingUTF32BE     CHAR_ENCODING_VALUE(85)
00076 #define charEncodingUTF32LE     CHAR_ENCODING_VALUE(86)
00077 #define charEncodingUTF7_IMAP   CHAR_ENCODING_VALUE(87)
00078 #define charEncodingUCS4        CHAR_ENCODING_VALUE(88)
00079 
00080 // Latin character encodings
00081 #define charEncodingCP850       CHAR_ENCODING_VALUE(12)
00082 #define charEncodingCP437       CHAR_ENCODING_VALUE(13)
00083 #define charEncodingCP865       CHAR_ENCODING_VALUE(14)
00084 #define charEncodingCP860       CHAR_ENCODING_VALUE(15)
00085 #define charEncodingCP861       CHAR_ENCODING_VALUE(16)
00086 #define charEncodingCP863       CHAR_ENCODING_VALUE(17)
00087 #define charEncodingCP775       CHAR_ENCODING_VALUE(18)
00088 #define charEncodingMacIslande  CHAR_ENCODING_VALUE(19)
00089 #define charEncodingMacintosh   CHAR_ENCODING_VALUE(20)
00090 #define charEncodingCP1257      CHAR_ENCODING_VALUE(21)
00091 #define charEncodingISO8859_3   CHAR_ENCODING_VALUE(22)
00092 #define charEncodingISO8859_4   CHAR_ENCODING_VALUE(23)
00093 
00094 // Extended Latin character encodings
00095 #define charEncodingISO8859_2   CHAR_ENCODING_VALUE(26)
00096 #define charEncodingCP1250      CHAR_ENCODING_VALUE(27)
00097 #define charEncodingCP852       CHAR_ENCODING_VALUE(28)
00098 #define charEncodingXKamenicky  CHAR_ENCODING_VALUE(29)
00099 #define charEncodingMacXCroate  CHAR_ENCODING_VALUE(30)
00100 #define charEncodingMacXLat2    CHAR_ENCODING_VALUE(31)
00101 #define charEncodingMacXRomania CHAR_ENCODING_VALUE(32)
00102 #define charEncodingGSM         CHAR_ENCODING_VALUE(90)
00103 
00104 // Japanese character encodings
00105 #define charEncodingEucJp       CHAR_ENCODING_VALUE(25)
00106 #define charEncodingISO2022Jp   CHAR_ENCODING_VALUE(10)
00107 #define charEncodingXAutoJp     CHAR_ENCODING_VALUE(11)
00108 
00109 // Greek character encodings
00110 #define charEncodingISO8859_7   CHAR_ENCODING_VALUE(33)
00111 #define charEncodingCP1253      CHAR_ENCODING_VALUE(34)
00112 #define charEncodingCP869       CHAR_ENCODING_VALUE(35)
00113 #define charEncodingCP737       CHAR_ENCODING_VALUE(36)
00114 #define charEncodingMacXGr      CHAR_ENCODING_VALUE(37)
00115 
00116 // Cyrillic character encodings
00117 #define charEncodingCP1251      CHAR_ENCODING_VALUE(38)
00118 #define charEncodingISO8859_5   CHAR_ENCODING_VALUE(39)
00119 #define charEncodingKoi8R       CHAR_ENCODING_VALUE(40)
00120 #define charEncodingKoi8        CHAR_ENCODING_VALUE(41)
00121 #define charEncodingCP855       CHAR_ENCODING_VALUE(42)
00122 #define charEncodingCP866       CHAR_ENCODING_VALUE(43)
00123 #define charEncodingMacCyr      CHAR_ENCODING_VALUE(44)
00124 #define charEncodingMacUkraine  CHAR_ENCODING_VALUE(45)
00125 
00126 // Turkish character encodings
00127 #define charEncodingCP1254      CHAR_ENCODING_VALUE(46)
00128 #define charEncodingISO8859_9   CHAR_ENCODING_VALUE(47)
00129 #define charEncodingCP857       CHAR_ENCODING_VALUE(48)
00130 #define charEncodingMacTurc     CHAR_ENCODING_VALUE(49)
00131 #define charEncodingCP853       CHAR_ENCODING_VALUE(50)
00132 
00133 // Arabic character encodings
00134 #define charEncodingISO8859_6   CHAR_ENCODING_VALUE(51)
00135 #define charEncodingAsmo708     CHAR_ENCODING_VALUE(52)
00136 #define charEncodingCP1256      CHAR_ENCODING_VALUE(53)
00137 #define charEncodingCP864       CHAR_ENCODING_VALUE(54)
00138 #define charEncodingAsmo708Plus CHAR_ENCODING_VALUE(55)
00139 #define charEncodingAsmo708Fr   CHAR_ENCODING_VALUE(56)
00140 #define charEncodingMacAra      CHAR_ENCODING_VALUE(57)
00141 
00142 // Simplified Chinese character encodings
00143 #define charEncodingGB2312      CHAR_ENCODING_VALUE(58)
00144 #define charEncodingHZ          CHAR_ENCODING_VALUE(59)
00145 #define charEncodingGBK         CHAR_ENCODING_VALUE(82)
00146 #define charEncodingPalmGB      CHAR_ENCODING_VALUE(83)
00147 
00148 // Traditional Chinese character encodings
00149 #define charEncodingBig5        CHAR_ENCODING_VALUE(60)
00150 #define charEncodingBig5_HKSCS  CHAR_ENCODING_VALUE(79)
00151 #define charEncodingBig5Plus    CHAR_ENCODING_VALUE(80)
00152 #define charEncodingPalmBig5    CHAR_ENCODING_VALUE(81)
00153 #define charEncodingISO2022CN   CHAR_ENCODING_VALUE(89)
00154 
00155 // Vietnamese character encodings
00156 #define charEncodingViscii      CHAR_ENCODING_VALUE(61)
00157 #define charEncodingViqr        CHAR_ENCODING_VALUE(62)
00158 #define charEncodingVncii       CHAR_ENCODING_VALUE(63)
00159 #define charEncodingVietnet     CHAR_ENCODING_VALUE(65)
00160 #define charEncodingCP1258      CHAR_ENCODING_VALUE(66)
00161 
00162 // Korean character encodings
00163 #define charEncodingEucKr       CHAR_ENCODING_VALUE(67)     // Was charEncodingKsc5601
00164 #define charEncodingCP949       CHAR_ENCODING_VALUE(68)
00165 #define charEncodingISO2022Kr   CHAR_ENCODING_VALUE(69)
00166 #define charEncodingPalmKorean  CHAR_ENCODING_VALUE(91)
00167 
00168 // Hebrew character encodings
00169 #define charEncodingISO8859_8I  CHAR_ENCODING_VALUE(70)
00170 #define charEncodingISO8859_8   CHAR_ENCODING_VALUE(71)
00171 #define charEncodingCP1255      CHAR_ENCODING_VALUE(72)
00172 #define charEncodingCP1255V     CHAR_ENCODING_VALUE(73)
00173 
00174 // Thai character encodings
00175 #define charEncodingTis620      CHAR_ENCODING_VALUE(74)
00176 #define charEncodingCP874       CHAR_ENCODING_VALUE(64)
00177 
00178 // Character attribute flags. These replace the old flags defined in
00179 // CharAttr.h, but are bit-compatible.
00180 
00181 // WARNING!!!!!
00182 // This character attribute section must match the analogous section of
00183 // LocaleModule.rh!
00184 
00185 #define charAttr_DO     0x00000400  // display only (never in user data)
00186 #define charAttr_XA     0x00000200  // extra alphabetic
00187 #define charAttr_XS     0x00000100  // extra space
00188 #define charAttr_BB     0x00000080  // BEL, BS, etc.
00189 #define charAttr_CN     0x00000040  // CR, FF, HT, NL, VT
00190 #define charAttr_DI     0x00000020  // '0'-'9'
00191 #define charAttr_LO     0x00000010  // 'a'-'z' and lowercase extended chars.
00192 #define charAttr_PU     0x00000008  // punctuation
00193 #define charAttr_SP     0x00000004  // space
00194 #define charAttr_UP     0x00000002  // 'A'-'Z' and uppercase extended chars.
00195 #define charAttr_XD     0x00000001  // '0'-'9', 'A'-'F', 'a'-'f'
00196 
00197 // Various byte attribute flags. Note that multiple flags can be
00198 // set, thus a byte could be both a single-byte character, or the first
00199 // byte of a multi-byte character.
00200 
00201 // WARNING!!!!!
00202 // This byte attribute section must match the analogous section of
00203 // LocaleModule.rh!
00204 
00205 #define byteAttrFirst               0x80    // First byte of multi-byte char.
00206 #define byteAttrLast                0x40    // Last byte of multi-byte char.
00207 #define byteAttrMiddle              0x20    // Middle byte of muli-byte char.
00208 #define byteAttrSingle              0x01    // Single byte.
00209 
00210 // Some double-byte encoding combinations. Every byte in a stream of
00211 // double-byte data must be either a single byte, a single/low byte,
00212 // or a high/low byte.
00213 #define byteAttrSingleLow       (byteAttrSingle | byteAttrLast)
00214 #define byteAttrHighLow         (byteAttrFirst | byteAttrLast)
00215  
00216 // Transliteration operations for the TxtTransliterate call. We don't use
00217 // an enum, since each character encoding contains its own set of special
00218 // transliteration operations (which begin at translitOpCustomBase).
00219 typedef uint16_t TranslitOpType;
00220 
00221 // Standard transliteration operations.
00222 #define translitOpStandardBase  0           // Beginning of standard operations.
00223 
00224 #define translitOpUpperCase     0
00225 #define translitOpLowerCase     1
00226 #define translitOpReserved2     2
00227 #define translitOpReserved3     3
00228 
00229 // Custom transliteration operations (defined in CharXXXX.h encoding-specific
00230 // header files.
00231 #define translitOpCustomBase        1000        // Beginning of char-encoding specific ops.
00232 
00233 #define translitOpPreprocess        0x8000  // Mask for pre-process option, where
00234                                         // no transliteration actually is done.
00235 
00236 // Structure used to maintain state across calls to TxtConvertEncoding, for
00237 // proper handling of source or destination encodings with have modes.
00238 #define kTxtConvertStateSize        32
00239 
00240 typedef struct {
00241     uint8_t     ioSrcState[kTxtConvertStateSize];
00242     uint8_t     ioDstState[kTxtConvertStateSize];
00243 } TxtConvertStateType;
00244 
00245 // Character encoding assumed for substitution text by TxtConvertEncoding
00246 #define textSubstitutionEncoding    charEncodingUTF8
00247 
00248 // Default substitution text for use with TxtConvertEncoding
00249 #define textSubstitutionDefaultStr  "?"
00250 #define textSubstitutionDefaultLen  1
00251 
00252 // Flag to OR with the charEncodingType that is passed to TxtConvertEncoding
00253 #define charEncodingDstBestFitFlag  0x8000
00254 
00255 // Flags returned by TxtGetEncodingFlags. These are also available to 68K
00256 // apps via the sysFtrNumCharEncodingFlags feature.
00257 #define charEncodingOnlySingleByte  0x00000001
00258 #define charEncodingHasDoubleByte   0x00000002
00259 #define charEncodingHasLigatures    0x00000004
00260 #define charEncodingRightToLeft     0x00000008
00261 
00262 // Flags returned by FtrGet(sysFtrCreator, sysFtrNumTextMgrFlags)
00263 #define textMgrExistsFlag           0x00000001  // Was intlMgrExists
00264 #define textMgrStrictFlag           0x00000002  // Was intlMgrStrict
00265 #define textMgrBestFitFlag          0x00000004  // Was intlMgrBestFit
00266 #define textMgrHighASCIIFixupFlag   0x00000008  // New in 6.2 for 68K apps only!
00267 
00268 // Various sets of character attribute flags.
00269 #define charAttrPrint               (charAttr_DI|charAttr_LO|charAttr_PU|charAttr_SP|charAttr_UP|charAttr_XA)
00270 #define charAttrSpace               (charAttr_CN|charAttr_SP|charAttr_XS)
00271 #define charAttrAlNum               (charAttr_DI|charAttr_LO|charAttr_UP|charAttr_XA)
00272 #define charAttrAlpha               (charAttr_LO|charAttr_UP|charAttr_XA)
00273 #define charAttrCntrl               (charAttr_BB|charAttr_CN)
00274 #define charAttrGraph               (charAttr_DI|charAttr_LO|charAttr_PU|charAttr_UP|charAttr_XA)
00275 #define charAttrDelim               (charAttr_SP|charAttr_PU)
00276 
00277 // Remember that sizeof(0x0D) == 2 because 0x0D is treated like an int. The
00278 // same is true of sizeof('a'), sizeof('\0'), and sizeof(chrNull). For this
00279 // reason it's safest to use the sizeOf7BitChar macro to document buffer size
00280 // and string length calcs. Note that this can only be used with low-ascii
00281 // characters, as anything else might be the high byte of a double-byte char.
00282 #define sizeOf7BitChar(c)           1
00283 
00284 // Maximum size a single character will occupy in a text string.
00285 #define maxCharBytes                4
00286 
00287 // Text Manager error codes.
00288 #define txtErrUknownTranslitOp              (txtErrorClass | 1)
00289 #define txtErrTranslitOverrun               (txtErrorClass | 2)
00290 #define txtErrTranslitOverflow              (txtErrorClass | 3)
00291 #define txtErrConvertOverflow               (txtErrorClass | 4)
00292 #define txtErrConvertUnderflow              (txtErrorClass | 5)
00293 #define txtErrUnknownEncoding               (txtErrorClass | 6)
00294 #define txtErrNoCharMapping                 (txtErrorClass | 7)
00295 #define txtErrTranslitUnderflow             (txtErrorClass | 8)
00296 #define txtErrMalformedText                 (txtErrorClass | 9)
00297 #define txtErrUnknownEncodingFallbackCopy   (txtErrorClass | 10)
00298 
00299 /***********************************************************************
00300  * Public macros
00301  ***********************************************************************/
00302 
00303 #define TxtCharIsSpace(ch)      ((TxtCharAttr(ch) & charAttrSpace) != 0)
00304 #define TxtCharIsPrint(ch)      ((TxtCharAttr(ch) & charAttrPrint) != 0)
00305 #define TxtCharIsDigit(ch)      ((TxtCharAttr(ch) & charAttr_DI) != 0)
00306 #define TxtCharIsAlNum(ch)      ((TxtCharAttr(ch) & charAttrAlNum) != 0)
00307 #define TxtCharIsAlpha(ch)      ((TxtCharAttr(ch) & charAttrAlpha) != 0)
00308 #define TxtCharIsCntrl(ch)      ((TxtCharAttr(ch) & charAttrCntrl) != 0)
00309 #define TxtCharIsGraph(ch)      ((TxtCharAttr(ch) & charAttrGraph) != 0)
00310 #define TxtCharIsLower(ch)      ((TxtCharAttr(ch) & charAttr_LO) != 0)
00311 #define TxtCharIsPunct(ch)      ((TxtCharAttr(ch) & charAttr_PU) != 0)
00312 #define TxtCharIsUpper(ch)      ((TxtCharAttr(ch) & charAttr_UP) != 0)
00313 #define TxtCharIsHex(ch)        ((TxtCharAttr(ch) & charAttr_XD) != 0)
00314 #define TxtCharIsDelim(ch)      ((TxtCharAttr(ch) & charAttrDelim) != 0)
00315 
00316 // <c> is a hard key if the event modifier <m> has the command bit set
00317 // and <c> is either in the proper range or is the calculator character.
00318 #define TxtCharIsHardKey(m, c)  ((((m) & commandKeyMask) != 0) && \
00319                                 ((((c) >= hardKeyMin) && ((c) <= hardKeyMax)) || ((c) == calcChr)))
00320 
00321 // <c> is a virtual character if the event modifier <m> has the command
00322 // bit set.
00323 #define TxtCharIsVirtual(m, c)  (((m) & commandKeyMask) != 0)
00324 
00325 #define TxtPreviousCharSize(iTextP, iOffset)    TxtGetPreviousChar((iTextP), (iOffset), NULL)
00326 #define TxtNextCharSize(iTextP, iOffset)        TxtGetNextChar((iTextP), (iOffset), NULL)
00327 
00328 //***************************************************************************
00329 // Macros for detecting if character is a rocker character or wheel character
00330 //***************************************************************************
00331 
00332 // <c> is a rocker key if the event modifier <m> has the command bit set
00333 // and <c> is in the proper range
00334 #define TxtCharIsRockerKey(m, c)    ((((m) & commandKeyMask) != 0) && \
00335                                     ((((c) >= vchrRockerUp) && ((c) <= vchrRockerCenter))))
00336 
00337 // <c> is a wheel key if the event modifier <m> has the command bit set
00338 // and <c> is in the proper range
00339 #define TxtCharIsWheelKey(m, c) ((((m) & commandKeyMask) != 0) && \
00340                                 ((((c) >= vchrThumbWheelUp) && ((c) <= vchrThumbWheelBack))))
00341 
00342 /***********************************************************************
00343  * Public routines
00344  ***********************************************************************/
00345 
00346 #ifdef __cplusplus
00347     extern "C" {
00348 #endif
00349 
00350 // Return back byte attribute (first, last, single, middle) for <iByte>.
00351 uint8_t TxtByteAttr(uint8_t iByte);
00352         
00353 // Return back the standard attribute bits for <iChar>.
00354 uint32_t TxtCharAttr(wchar32_t iChar);
00355 
00356 // Return back the extended attribute bits for <iChar>.
00357 uint32_t TxtCharXAttr(wchar32_t iChar);
00358 
00359 // Return the size (in bytes) of the character <iChar>. This represents
00360 // how many bytes would be required to store the character in a string.
00361 size_t TxtCharSize(wchar32_t iChar);
00362 
00363 // Load the character before offset <iOffset> in the <iTextP> text. Return
00364 // back the size of the character.
00365 size_t TxtGetPreviousChar(const char* iTextP, size_t iOffset, wchar32_t* oChar);
00366 
00367 // Load the character at offset <iOffset> in the <iTextP> text. Return
00368 // back the size of the character.
00369 size_t TxtGetNextChar(const char* iTextP, size_t iOffset, wchar32_t* oChar);
00370 
00371 // Return the character at offset <iOffset> in the <iTextP> text.
00372 wchar32_t TxtGetChar(const char* iTextP, size_t iOffset);
00373 
00374 // Set the character at offset <iOffset> in the <iTextP> text, and
00375 // return back the size of the character.
00376 size_t TxtSetNextChar(char* iTextP, size_t iOffset, wchar32_t iChar);
00377 
00378 // Replace the substring "^X" (where X is 0..9, as specified by <iParamNum>)
00379 // with the string <inParamStr>. If <iParamStringP> is NULL then don't modify <iStringP>.
00380 // Make sure the resulting string doesn't contain more than <iMaxLen> bytes,
00381 // excluding the terminating null. Return back the number of occurances of
00382 // the substring found in <iStringP>.
00383 uint16_t TxtReplaceStr(char* iStringP, size_t iMaxLen, const char* iParamStringP, uint16_t iParamNum);
00384 
00385 // Allocate a handle containing the result of substituting param0...param3
00386 // for ^0...^3 in <inTemplate>, and return the locked result. If a parameter
00387 // is NULL, replace the corresponding substring in the template with "".
00388 
00389 char* TxtParamString(const char* inTemplate, const char* param0,
00390             const char* param1, const char* param2, const char* param3);
00391 
00392 // Return the bounds of the character at <iOffset> in the <iTextP>
00393 // text, via the <oCharStart> & <oCharEnd> offsets, and also return the
00394 // actual value of character at or following <iOffset>.
00395 wchar32_t TxtCharBounds(const char* iTextP, size_t iOffset, size_t* oCharStart, size_t* oCharEnd);
00396 
00397 // Return the appropriate byte position for truncating <iTextP> such that it is
00398 // at most <iOffset> bytes long.
00399 size_t TxtGetTruncationOffset(const char* iTextP, size_t iOffset);
00400 
00401 // Truncate string <iSrcString> to be no more than <iMaxLength> bytes long, copying
00402 // the resulting string to <iDstString>. If truncation is required, add an ellipsis
00403 // if <iAddEllipsis> is true. Return true if the string was truncated. The source
00404 // and destination strings can be the same.
00405 Boolean TxtTruncateString(  char* iDstString,
00406                             const char* iSrcString,
00407                             size_t iMaxLength,
00408                             Boolean iAddEllipsis);
00409 
00410 // Convert the characters in <iSrcTextP> into an appropriate form for searching,
00411 // and copy up to <iDstSize> bytes of converted characters into <oDstTextP>. The
00412 // resulting string will be null-terminated. We assume that <iDstSize> includes
00413 // the space required for the null. Return back the number of bytes consumed in
00414 // <iSrcTextP>. Note that this routine returned nothing (void) in versions of
00415 // the OS previous to 3.5 (and didn't exist before Palm OS 3.1), and that it
00416 // used to not take an <iSrcLen> parameter.
00417 size_t TxtPrepFindString(const char* iSrcTextP, size_t iSrcLen, char* oDstTextP, size_t iDstSize);
00418 
00419 // Search for <iTargetStringP> in <iSrcStringP>. If found return true and pass back
00420 // the found position (byte offset) in <oFoundPos>, and the length of the matched
00421 // text in <oFoundLen>.
00422 Boolean TxtFindString(const char* iSrcStringP, const char* iTargetStringP,
00423             size_t* oFoundPos, size_t* oFoundLen);
00424 
00425 // Find the bounds of the word that contains the character at <iOffset>.
00426 // Return the offsets in <*oWordStart> and <*oWordEnd>. Return true if the
00427 // word we found was not empty & not a delimiter (attribute of first char
00428 // in word not equal to space or punct).
00429 Boolean TxtWordBounds(const char* iTextP, size_t iLength, size_t iOffset,
00430             size_t* oWordStart, size_t* oWordEnd);
00431 
00432 // Return the offset of the first break position (for text wrapping) that
00433 // occurs at or before <iOffset> in <iTextP>. Note that this routine will
00434 // also add trailing spaces and a trailing linefeed to the break position,
00435 // thus the result could be greater than <iOffset>.
00436 size_t TxtGetWordWrapOffset(const char* iTextP, size_t iOffset);
00437 
00438 // Return the minimum (lowest) encoding required for <iChar>. If we
00439 // don't know about the character, return encoding_Unknown.
00440 CharEncodingType TxtCharEncoding(wchar32_t iChar);
00441 
00442 // Return the minimum (lowest) encoding required to represent <iStringP>.
00443 // This is the maximum encoding of any character in the string, where
00444 // highest is unknown, and lowest is ascii.
00445 CharEncodingType TxtStrEncoding(const char* iStringP);
00446 
00447 // Return the higher (max) encoding of <a> and <b>.
00448 CharEncodingType TxtMaxEncoding(CharEncodingType a, CharEncodingType b);
00449 
00450 // Return a pointer to the 'standard' name for <iEncoding>. If the
00451 // encoding is unknown, return a pointer to an empty string.
00452 const char* TxtEncodingName(CharEncodingType iEncoding);
00453 
00454 // Map from a character set name <iEncodingName> to a CharEncodingType.
00455 // If the character set name is unknown, return charEncodingUnknown.
00456 CharEncodingType TxtNameToEncoding(const char* iEncodingName);
00457 
00458 // Return information about the encoding <iEncoding>, such as whether
00459 // it encodes all characters as single bytes in a string.
00460 uint32_t TxtGetEncodingFlags(CharEncodingType iEncoding);
00461 
00462 // Transliterate <iSrcLength> bytes of text found in <iSrcTextP>, based
00463 // on the requested <iTranslitOp> operation. Place the results in <oDstTextP>,
00464 // and set the resulting length in <ioDstLength>. On entry <ioDstLength>
00465 // must contain the maximum size of the <oDstTextP> buffer. If the
00466 // buffer isn't large enough, return an error (note that outDestText
00467 // might have been modified during the operation). Note that if <iTranslitOp>
00468 // has the preprocess bit set, then <oDstTextP> is not modified, and
00469 // <ioDstLength> will contain the total space required in the destination
00470 // buffer in order to perform the operation. 
00471 status_t TxtTransliterate(const char* iSrcTextP, size_t iSrcLength, char* oDstTextP,
00472             size_t* ioDstLength, TranslitOpType iTranslitOp);
00473 
00474 // Convert <*ioSrcBytes> of text from <srcTextP> between the <srcEncoding>
00475 // and <dstEncoding> character encodings. If <dstTextP> is not NULL, write
00476 // the resulting bytes to the buffer, and always return the number of
00477 // resulting bytes in <*ioDstBytes>. Update <*srcBytes> with the number of
00478 // bytes from the beginning of <*srcTextP> that were successfully converted.
00479 // When the routine is called with <srcTextP> pointing to the beginning of
00480 // a string or text buffer, <newConversion> should be true; if the text is
00481 // processed in multiple chunks, either because errors occurred or due to
00482 // source/destination buffer size constraints, then subsequent calls to
00483 // this routine should pass false for <newConversion>. The TxtConvertStateType
00484 // record maintains state information so that if the source or destination
00485 // character encodings have state or modes (e.g. JIS), processing a single
00486 // sequence of text with multiple calls will work correctly.
00487 
00488 // When an error occurs due to an unconvertable character, the behavior of
00489 // the routine will depend on the <substitutionStr> parameter. If it is NULL,
00490 // then <*ioSrcBytes> will be set to the offset of the unconvertable character,
00491 // <ioDstBytes> will be set to the number of successfully converted resulting
00492 // bytes, and <dstTextP>, in not NULL, will contain conversion results up to
00493 // the point of the error. The routine will return an appropriate error code,
00494 // and it is up to the caller to either terminate conversion or skip over the
00495 // unconvertable character and continue the conversion process (passing false
00496 // for the <newConversion> parameter in subsequent calls to TxtConvertEncoding).
00497 // If <substitutionStr> is not NULL, then this string is written to the
00498 // destination buffer when an unconvertable character is encountered in the
00499 // source text, and the source character is skipped. Processing continues, though
00500 // the error code will still be returned when the routine terminates. Note that
00501 // if a more serious error occurs during processing (e.g. buffer overflow) then
00502 // that error will be returned even if there was an earlier unconvertable character.
00503 // Note that the substitution string must use the destination character encoding.
00504 status_t TxtConvertEncoding(Boolean newConversion, TxtConvertStateType* ioStateP,
00505             const char* srcTextP, size_t* ioSrcBytes, CharEncodingType srcEncoding,
00506             char* dstTextP, size_t* ioDstBytes, CharEncodingType dstEncoding,
00507             const char* substitutionStr, size_t substitutionLen);
00508 
00509 // Return true if <iChar> is a valid (drawable) character. Note that we'll
00510 // return false if it is a virtual character code.
00511 Boolean TxtCharIsValid(wchar32_t iChar);
00512 
00513 // Compare the first <s1Len> bytes of <s1> with the first <s2Len> bytes
00514 // of <s2>. Return the results of the comparison: < 0 if <s1> sorts before
00515 // <s2>, > 0 if <s1> sorts after <s2>, and 0 if they are equal. Also return
00516 // the number of bytes that matched in <s1MatchLen> and <s2MatchLen>
00517 // (either one of which can be NULL if the match length is not needed).
00518 // This comparison is "caseless", in the same manner as a find operation,
00519 // thus case, character size, etc. don't matter.
00520 int16_t TxtCaselessCompare(const char* s1, size_t s1Len, size_t* s1MatchLen,
00521             const char* s2, size_t s2Len, size_t* s2MatchLen);
00522 
00523 // Compare the first <s1Len> bytes of <s1> with the first <s2Len> bytes
00524 // of <s2>. Return the results of the comparison: < 0 if <s1> sorts before
00525 // <s2>, > 0 if <s1> sorts after <s2>, and 0 if they are equal. Also return
00526 // the number of bytes that matched in <s1MatchLen> and <s2MatchLen>
00527 // (either one of which can be NULL if the match length is not needed).
00528 int16_t TxtCompare(const char* s1, size_t s1Len, size_t* s1MatchLen,
00529             const char* s2, size_t s2Len, size_t* s2MatchLen);
00530 
00531 #ifdef __cplusplus
00532     }
00533 #endif
00534 
00535 #endif // _TEXTMGR_H_