#include "xmldef.h"
#include "xmltok.h"
#include "nametab.h"
#include "xmltok_impl.h"
#include "xmltok_impl.c"
#include "asciitab.h"
#include "utf8tab.h"
#include "iasciitab.h"
#include "latin1tab.h"
#include "xmltok_ns.c"
Go to the source code of this file.
Classes | |
struct | normal_encoding |
struct | unknown_encoding |
Defines | |
#define | VTABLE1 |
#define | VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) |
#define | UCS2_GET_NAMING(pages, hi, lo) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) |
#define | UTF8_GET_NAMING2(pages, byte) |
#define | UTF8_GET_NAMING3(pages, byte) |
#define | UTF8_GET_NAMING(pages, p, n) |
#define | UTF8_INVALID3(p) |
#define | UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) |
#define | utf8_isName4 isNever |
#define | utf8_isNmstrt4 isNever |
#define | utf8_isInvalid2 isNever |
#define | STANDARD_VTABLE(E) |
#define | NORMAL_VTABLE(E) |
#define | MINBPC(enc) 1 |
#define | SB_BYTE_TYPE(enc, p) (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) |
#define | BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) |
#define | BYTE_TO_ASCII(enc, p) (*p) |
#define | IS_NAME_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) |
#define | IS_NMSTRT_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) |
#define | IS_INVALID_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) |
#define | IS_NAME_CHAR_MINBPC(enc, p) (0) |
#define | IS_NMSTRT_CHAR_MINBPC(enc, p) (0) |
#define | CHAR_MATCHES(enc, p, c) (*(p) == c) |
#define | PREFIX(ident) normal_ ## ident |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | DEFINE_UTF16_TO_UTF8(E) |
#define | DEFINE_UTF16_TO_UTF16(E) |
#define | SET2(ptr, ch) (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) |
#define | GET_LO(ptr) ((unsigned char)(ptr)[0]) |
#define | GET_HI(ptr) ((unsigned char)(ptr)[1]) |
#define | SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) |
#define | GET_LO(ptr) ((unsigned char)(ptr)[1]) |
#define | GET_HI(ptr) ((unsigned char)(ptr)[0]) |
#define | LITTLE2_BYTE_TYPE(enc, p) |
#define | LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) |
#define | LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) |
#define | LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) |
#define | LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) |
#define | PREFIX(ident) little2_ ## ident |
#define | MINBPC(enc) 2 |
#define | BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) |
#define | BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) |
#define | CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) |
#define | IS_NAME_CHAR(enc, p, n) 0 |
#define | IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) |
#define | IS_NMSTRT_CHAR(enc, p, n) (0) |
#define | IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | BIG2_BYTE_TYPE(enc, p) |
#define | BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) |
#define | BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) |
#define | BIG2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) |
#define | BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) |
#define | PREFIX(ident) big2_ ## ident |
#define | MINBPC(enc) 2 |
#define | BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) |
#define | BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) |
#define | CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) |
#define | IS_NAME_CHAR(enc, p, n) 0 |
#define | IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) |
#define | IS_NMSTRT_CHAR(enc, p, n) (0) |
#define | IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | INIT_ENC_INDEX(enc) ((enc)->initEnc.isUtf16) |
#define | NS(x) x |
#define | ns(x) x |
Enumerations | |
enum | { UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 } |
enum | { UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, NO_ENC } |
Functions | |
static int | isNever (const ENCODING *enc, const char *p) |
static int | utf8_isName2 (const ENCODING *enc, const char *p) |
static int | utf8_isName3 (const ENCODING *enc, const char *p) |
static int | utf8_isNmstrt2 (const ENCODING *enc, const char *p) |
static int | utf8_isNmstrt3 (const ENCODING *enc, const char *p) |
static int | utf8_isInvalid3 (const ENCODING *enc, const char *p) |
static int | utf8_isInvalid4 (const ENCODING *enc, const char *p) |
static int | checkCharRefNumber (int) |
static void | utf8_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
static void | utf8_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) |
static void | latin1_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
static void | latin1_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) |
static void | ascii_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
static int | unicode_byte_type (char hi, char lo) |
static int | streqci (const char *s1, const char *s2) |
static void | initUpdatePosition (const ENCODING *enc, const char *ptr, const char *end, POSITION *pos) |
static int | toAscii (const ENCODING *enc, const char *ptr, const char *end) |
static int | isSpace (int c) |
static int | parsePseudoAttribute (const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **valPtr, const char **nextTokPtr) |
static int | doParseXmlDecl (const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingName, const ENCODING **encoding, int *standalone) |
int | XmlUtf8Encode (int c, char *buf) |
int | XmlUtf16Encode (int charNum, unsigned short *buf) |
int | XmlSizeOfUnknownEncoding () |
static int | unknown_isName (const ENCODING *enc, const char *p) |
static int | unknown_isNmstrt (const ENCODING *enc, const char *p) |
static int | unknown_isInvalid (const ENCODING *enc, const char *p) |
static void | unknown_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
static void | unknown_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) |
ENCODING * | XmlInitUnknownEncoding (void *mem, int *table, int(*convert)(void *userData, const char *p), void *userData) |
static int | getEncodingIndex (const char *name) |
static int | initScan (const ENCODING **encodingTable, const INIT_ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr) |
Variables | |
static struct normal_encoding | utf8_encoding |
static struct normal_encoding | internal_utf8_encoding |
static struct normal_encoding | latin1_encoding |
static struct normal_encoding | ascii_encoding |
static struct normal_encoding | little2_encoding |
static struct normal_encoding | internal_little2_encoding |
static struct normal_encoding | big2_encoding |
static struct normal_encoding | internal_big2_encoding |
#define BIG2_BYTE_TO_ASCII | ( | enc, | |||
p | ) | ((p)[0] == 0 ? (p)[1] : -1) |
#define BIG2_BYTE_TYPE | ( | enc, | |||
p | ) |
Value:
((p)[0] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ : unicode_byte_type((p)[0], (p)[1]))
#define BIG2_CHAR_MATCHES | ( | enc, | |||
p, | |||||
c | ) | ((p)[0] == 0 && (p)[1] == c) |
#define BIG2_IS_NAME_CHAR_MINBPC | ( | enc, | |||
p | ) | UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) |
#define BIG2_IS_NMSTRT_CHAR_MINBPC | ( | enc, | |||
p | ) | UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) |
#define BT_COLON BT_NMSTRT |
#define BT_COLON BT_NMSTRT |
#define BT_COLON BT_NMSTRT |
#define BT_COLON BT_NMSTRT |
#define BT_COLON BT_NMSTRT |
#define BT_COLON BT_NMSTRT |
#define BT_COLON BT_NMSTRT |
#define BT_COLON BT_NMSTRT |
#define BYTE_TO_ASCII | ( | enc, | |||
p | ) | BIG2_BYTE_TO_ASCII(enc, p) |
#define BYTE_TO_ASCII | ( | enc, | |||
p | ) | LITTLE2_BYTE_TO_ASCII(enc, p) |
#define CHAR_MATCHES | ( | enc, | |||
p, | |||||
c | ) | BIG2_CHAR_MATCHES(enc, p, c) |
#define CHAR_MATCHES | ( | enc, | |||
p, | |||||
c | ) | LITTLE2_CHAR_MATCHES(enc, p, c) |
#define DEFINE_UTF16_TO_UTF16 | ( | E | ) |
Value:
static \ void E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ fromLim -= 2; \ for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ }
#define INIT_ENC_INDEX | ( | enc | ) | ((enc)->initEnc.isUtf16) |
#define IS_INVALID_CHAR | ( | enc, | |||
p, | |||||
n | ) | (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) |
#define IS_NAME_CHAR | ( | enc, | |||
p, | |||||
n | ) | (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) |
#define IS_NAME_CHAR_MINBPC | ( | enc, | |||
p | ) | BIG2_IS_NAME_CHAR_MINBPC(enc, p) |
#define IS_NAME_CHAR_MINBPC | ( | enc, | |||
p | ) | LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) |
#define IS_NMSTRT_CHAR | ( | enc, | |||
p, | |||||
n | ) | (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) |
#define IS_NMSTRT_CHAR_MINBPC | ( | enc, | |||
p | ) | BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
#define IS_NMSTRT_CHAR_MINBPC | ( | enc, | |||
p | ) | LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
#define LITTLE2_BYTE_TO_ASCII | ( | enc, | |||
p | ) | ((p)[1] == 0 ? (p)[0] : -1) |
#define LITTLE2_BYTE_TYPE | ( | enc, | |||
p | ) |
Value:
((p)[1] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ : unicode_byte_type((p)[1], (p)[0]))
#define LITTLE2_CHAR_MATCHES | ( | enc, | |||
p, | |||||
c | ) | ((p)[1] == 0 && (p)[0] == c) |
#define LITTLE2_IS_NAME_CHAR_MINBPC | ( | enc, | |||
p | ) | UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) |
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC | ( | enc, | |||
p | ) | UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) |
#define NORMAL_VTABLE | ( | E | ) |
Definition at line 1504 of file xmltok.c.
Referenced by BesShortLivedConstructor::ConstructMesons(), RecMdcTrackCnv::DataObjectToTObject(), MdcTrackCnv::DataObjectToTObject(), BesTofDigitizerEcV4::Digitize(), BesTofDigitizerEcV3::Digitize(), BesTofDigitizerEcV2::Digitize(), BesTofDigitizerBrV2::Digitize(), G4HepMCInterface::HepMC2G4(), RecEmcShower::NearestSeed(), EvtPolInt::polynomial(), EvtPolInt::ratint(), BesRootIO::SaveTofHitRoot(), RecMdcTrackCnv::TObjectToDataObject(), MdcTrackCnv::TObjectToDataObject(), XmlGetUtf16InternalEncoding(), and XmlGetUtf8InternalEncoding().
Definition at line 1503 of file xmltok.c.
Referenced by TConformalFinder::fastFinding3D(), findEncoding(), initScanContent(), initScanProlog(), XmlInitEncoding(), and XmlParseXmlDecl().
#define SB_BYTE_TYPE | ( | enc, | |||
p | ) | (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) |
#define SET2 | ( | ptr, | |||
ch | ) | (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) |
#define SET2 | ( | ptr, | |||
ch | ) | (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) |
#define UCS2_GET_NAMING | ( | pages, | |||
hi, | |||||
lo | ) | (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) |
Definition at line 54 of file xmltok.c.
Referenced by unknown_isName(), unknown_isNmstrt(), and XmlInitUnknownEncoding().
#define UTF8_GET_NAMING | ( | pages, | |||
p, | |||||
n | ) |
Value:
((n) == 2 \ ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ : ((n) == 3 \ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0))
#define UTF8_GET_NAMING2 | ( | pages, | |||
byte | ) |
Value:
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ & (1 << (((byte)[1]) & 0x1F)))
Definition at line 61 of file xmltok.c.
Referenced by utf8_isName2(), and utf8_isNmstrt2().
#define UTF8_GET_NAMING3 | ( | pages, | |||
byte | ) |
Value:
(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + ((((byte)[1]) >> 2) & 0xF)] \ << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F)))
Definition at line 71 of file xmltok.c.
Referenced by utf8_isName3(), and utf8_isNmstrt3().
#define UTF8_INVALID3 | ( | p | ) |
Value:
((*p) == 0xED \ ? (((p)[1] & 0x20) != 0) \ : ((*p) == 0xEF \ ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ : 0))
Definition at line 86 of file xmltok.c.
Referenced by utf8_isInvalid3().
#define UTF8_INVALID4 | ( | p | ) | ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) |
#define VTABLE1 |
Value:
{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ PREFIX(sameName), \ PREFIX(nameMatchesAscii), \ PREFIX(nameLength), \ PREFIX(skipS), \ PREFIX(getAtts), \ PREFIX(charRefNumber), \ PREFIX(predefinedEntityName), \ PREFIX(updatePosition), \ PREFIX(isPublicId)
anonymous enum |
Definition at line 276 of file xmltok.c.
00276 { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 00277 UTF8_cval1 = 0x00, 00278 UTF8_cval2 = 0xc0, 00279 UTF8_cval3 = 0xe0, 00280 UTF8_cval4 = 0xf0 00281 };
anonymous enum |
UNKNOWN_ENC | |
ISO_8859_1_ENC | |
US_ASCII_ENC | |
UTF_8_ENC | |
UTF_16_ENC | |
UTF_16BE_ENC | |
UTF_16LE_ENC | |
NO_ENC |
Definition at line 1343 of file xmltok.c.
01343 { 01344 UNKNOWN_ENC = -1, 01345 ISO_8859_1_ENC = 0, 01346 US_ASCII_ENC, 01347 UTF_8_ENC, 01348 UTF_16_ENC, 01349 UTF_16BE_ENC, 01350 UTF_16LE_ENC, 01351 /* must match encodingNames up to here */ 01352 NO_ENC 01353 };
static void ascii_toUtf8 | ( | const ENCODING * | enc, | |
const char ** | fromP, | |||
const char * | fromLim, | |||
char ** | toP, | |||
const char * | toLim | |||
) | [static] |
static int checkCharRefNumber | ( | int | ) | [static] |
Definition at line 1096 of file xmltok.c.
References latin1_encoding, and normal_encoding::type.
Referenced by unknown_isInvalid(), and XmlInitUnknownEncoding().
01097 { 01098 switch (result >> 8) { 01099 case 0xD8: case 0xD9: case 0xDA: case 0xDB: 01100 case 0xDC: case 0xDD: case 0xDE: case 0xDF: 01101 return -1; 01102 case 0: 01103 if (latin1_encoding.type[result] == BT_NONXML) 01104 return -1; 01105 break; 01106 case 0xFF: 01107 if (result == 0xFFFE || result == 0xFFFF) 01108 return -1; 01109 break; 01110 } 01111 return result; 01112 }
static int doParseXmlDecl | ( | const ENCODING *(*)(const ENCODING *, const char *, const char *) | encodingFinder, | |
int | isGeneralTextEntity, | |||
const ENCODING * | enc, | |||
const char * | ptr, | |||
const char * | end, | |||
const char ** | badPtr, | |||
const char ** | versionPtr, | |||
const char ** | encodingName, | |||
const ENCODING ** | encoding, | |||
int * | standalone | |||
) | [static] |
Definition at line 1010 of file xmltok.c.
References isSpace(), parsePseudoAttribute(), and toAscii().
Referenced by XmlParseXmlDecl().
01022 { 01023 const char *val = 0; 01024 const char *name = 0; 01025 ptr += 5 * enc->minBytesPerChar; 01026 end -= 2 * enc->minBytesPerChar; 01027 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) { 01028 *badPtr = ptr; 01029 return 0; 01030 } 01031 if (!XmlNameMatchesAscii(enc, name, "version")) { 01032 if (!isGeneralTextEntity) { 01033 *badPtr = name; 01034 return 0; 01035 } 01036 } 01037 else { 01038 if (versionPtr) 01039 *versionPtr = val; 01040 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { 01041 *badPtr = ptr; 01042 return 0; 01043 } 01044 if (!name) { 01045 if (isGeneralTextEntity) { 01046 /* a TextDecl must have an EncodingDecl */ 01047 *badPtr = ptr; 01048 return 0; 01049 } 01050 return 1; 01051 } 01052 } 01053 if (XmlNameMatchesAscii(enc, name, "encoding")) { 01054 int c = toAscii(enc, val, end); 01055 if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) { 01056 *badPtr = val; 01057 return 0; 01058 } 01059 if (encodingName) 01060 *encodingName = val; 01061 if (encoding) 01062 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); 01063 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { 01064 *badPtr = ptr; 01065 return 0; 01066 } 01067 if (!name) 01068 return 1; 01069 } 01070 if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) { 01071 *badPtr = name; 01072 return 0; 01073 } 01074 if (XmlNameMatchesAscii(enc, val, "yes")) { 01075 if (standalone) 01076 *standalone = 1; 01077 } 01078 else if (XmlNameMatchesAscii(enc, val, "no")) { 01079 if (standalone) 01080 *standalone = 0; 01081 } 01082 else { 01083 *badPtr = val; 01084 return 0; 01085 } 01086 while (isSpace(toAscii(enc, ptr, end))) 01087 ptr += enc->minBytesPerChar; 01088 if (ptr != end) { 01089 *badPtr = ptr; 01090 return 0; 01091 } 01092 return 1; 01093 }
static int getEncodingIndex | ( | const char * | name | ) | [static] |
Definition at line 1356 of file xmltok.c.
References genRecEmupikp::i, streqci(), and UNKNOWN_ENC.
Referenced by findEncoding(), and XmlInitEncoding().
01357 { 01358 static const char *encodingNames[] = { 01359 "ISO-8859-1", 01360 "US-ASCII", 01361 "UTF-8", 01362 "UTF-16", 01363 "UTF-16BE" 01364 "UTF-16LE", 01365 }; 01366 int i; 01367 if (name == 0) 01368 return NO_ENC; 01369 for (i = 0; i < sizeof(encodingNames)/sizeof(encodingNames[0]); i++) 01370 if (streqci(name, encodingNames[i])) 01371 return i; 01372 return UNKNOWN_ENC; 01373 }
static int initScan | ( | const ENCODING ** | encodingTable, | |
const INIT_ENCODING * | enc, | |||
int | state, | |||
const char * | ptr, | |||
const char * | end, | |||
const char ** | nextTokPtr | |||
) | [static] |
Definition at line 1389 of file xmltok.c.
References INIT_ENC_INDEX, ISO_8859_1_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, and UTF_8_ENC.
Referenced by initScanContent(), and initScanProlog().
01395 { 01396 const ENCODING **encPtr; 01397 01398 if (ptr == end) 01399 return XML_TOK_NONE; 01400 encPtr = enc->encPtr; 01401 if (ptr + 1 == end) { 01402 /* only a single byte available for auto-detection */ 01403 /* a well-formed document entity must have more than one byte */ 01404 if (state != XML_CONTENT_STATE) 01405 return XML_TOK_PARTIAL; 01406 /* so we're parsing an external text entity... */ 01407 /* if UTF-16 was externally specified, then we need at least 2 bytes */ 01408 switch (INIT_ENC_INDEX(enc)) { 01409 case UTF_16_ENC: 01410 case UTF_16LE_ENC: 01411 case UTF_16BE_ENC: 01412 return XML_TOK_PARTIAL; 01413 } 01414 switch ((unsigned char)*ptr) { 01415 case 0xFE: 01416 case 0xFF: 01417 case 0xEF: /* possibly first byte of UTF-8 BOM */ 01418 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 01419 && state == XML_CONTENT_STATE) 01420 break; 01421 /* fall through */ 01422 case 0x00: 01423 case 0x3C: 01424 return XML_TOK_PARTIAL; 01425 } 01426 } 01427 else { 01428 switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { 01429 case 0xFEFF: 01430 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 01431 && state == XML_CONTENT_STATE) 01432 break; 01433 *nextTokPtr = ptr + 2; 01434 *encPtr = encodingTable[UTF_16BE_ENC]; 01435 return XML_TOK_BOM; 01436 /* 00 3C is handled in the default case */ 01437 case 0x3C00: 01438 if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC 01439 || INIT_ENC_INDEX(enc) == UTF_16_ENC) 01440 && state == XML_CONTENT_STATE) 01441 break; 01442 *encPtr = encodingTable[UTF_16LE_ENC]; 01443 return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 01444 case 0xFFFE: 01445 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 01446 && state == XML_CONTENT_STATE) 01447 break; 01448 *nextTokPtr = ptr + 2; 01449 *encPtr = encodingTable[UTF_16LE_ENC]; 01450 return XML_TOK_BOM; 01451 case 0xEFBB: 01452 /* Maybe a UTF-8 BOM (EF BB BF) */ 01453 /* If there's an explicitly specified (external) encoding 01454 of ISO-8859-1 or some flavour of UTF-16 01455 and this is an external text entity, 01456 don't look for the BOM, 01457 because it might be a legal data. */ 01458 if (state == XML_CONTENT_STATE) { 01459 int e = INIT_ENC_INDEX(enc); 01460 if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC) 01461 break; 01462 } 01463 if (ptr + 2 == end) 01464 return XML_TOK_PARTIAL; 01465 if ((unsigned char)ptr[2] == 0xBF) { 01466 *encPtr = encodingTable[UTF_8_ENC]; 01467 return XML_TOK_BOM; 01468 } 01469 break; 01470 default: 01471 if (ptr[0] == '\0') { 01472 /* 0 isn't a legal data character. Furthermore a document entity can only 01473 start with ASCII characters. So the only way this can fail to be big-endian 01474 UTF-16 if it it's an external parsed general entity that's labelled as 01475 UTF-16LE. */ 01476 if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) 01477 break; 01478 *encPtr = encodingTable[UTF_16BE_ENC]; 01479 return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 01480 } 01481 else if (ptr[1] == '\0') { 01482 /* We could recover here in the case: 01483 - parsing an external entity 01484 - second byte is 0 01485 - no externally specified encoding 01486 - no encoding declaration 01487 by assuming UTF-16LE. But we don't, because this would mean when 01488 presented just with a single byte, we couldn't reliably determine 01489 whether we needed further bytes. */ 01490 if (state == XML_CONTENT_STATE) 01491 break; 01492 *encPtr = encodingTable[UTF_16LE_ENC]; 01493 return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 01494 } 01495 break; 01496 } 01497 } 01498 *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; 01499 return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 01500 }
static void initUpdatePosition | ( | const ENCODING * | enc, | |
const char * | ptr, | |||
const char * | end, | |||
POSITION * | pos | |||
) | [static] |
Definition at line 895 of file xmltok.c.
References normal_encoding::enc, and utf8_encoding.
Referenced by XmlInitEncoding().
00897 { 00898 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); 00899 }
static int isNever | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
static int isSpace | ( | int | c | ) | [static] |
Definition at line 914 of file xmltok.c.
Referenced by doParseXmlDecl(), and parsePseudoAttribute().
00915 { 00916 switch (c) { 00917 case 0x20: 00918 case 0xD: 00919 case 0xA: 00920 case 0x9: 00921 return 1; 00922 } 00923 return 0; 00924 }
static void latin1_toUtf16 | ( | const ENCODING * | enc, | |
const char ** | fromP, | |||
const char * | fromLim, | |||
unsigned short ** | toP, | |||
const unsigned short * | toLim | |||
) | [static] |
static void latin1_toUtf8 | ( | const ENCODING * | enc, | |
const char ** | fromP, | |||
const char * | fromLim, | |||
char ** | toP, | |||
const char * | toLim | |||
) | [static] |
Definition at line 388 of file xmltok.c.
References UTF8_cval2.
00391 { 00392 for (;;) { 00393 unsigned char c; 00394 if (*fromP == fromLim) 00395 break; 00396 c = (unsigned char)**fromP; 00397 if (c & 0x80) { 00398 if (toLim - *toP < 2) 00399 break; 00400 *(*toP)++ = ((c >> 6) | UTF8_cval2); 00401 *(*toP)++ = ((c & 0x3f) | 0x80); 00402 (*fromP)++; 00403 } 00404 else { 00405 if (*toP == toLim) 00406 break; 00407 *(*toP)++ = *(*fromP)++; 00408 } 00409 } 00410 }
static int parsePseudoAttribute | ( | const ENCODING * | enc, | |
const char * | ptr, | |||
const char * | end, | |||
const char ** | namePtr, | |||
const char ** | valPtr, | |||
const char ** | nextTokPtr | |||
) | [static] |
Definition at line 929 of file xmltok.c.
References isSpace(), and toAscii().
Referenced by doParseXmlDecl().
00935 { 00936 int c; 00937 char open; 00938 if (ptr == end) { 00939 *namePtr = 0; 00940 return 1; 00941 } 00942 if (!isSpace(toAscii(enc, ptr, end))) { 00943 *nextTokPtr = ptr; 00944 return 0; 00945 } 00946 do { 00947 ptr += enc->minBytesPerChar; 00948 } while (isSpace(toAscii(enc, ptr, end))); 00949 if (ptr == end) { 00950 *namePtr = 0; 00951 return 1; 00952 } 00953 *namePtr = ptr; 00954 for (;;) { 00955 c = toAscii(enc, ptr, end); 00956 if (c == -1) { 00957 *nextTokPtr = ptr; 00958 return 0; 00959 } 00960 if (c == '=') 00961 break; 00962 if (isSpace(c)) { 00963 do { 00964 ptr += enc->minBytesPerChar; 00965 } while (isSpace(c = toAscii(enc, ptr, end))); 00966 if (c != '=') { 00967 *nextTokPtr = ptr; 00968 return 0; 00969 } 00970 break; 00971 } 00972 ptr += enc->minBytesPerChar; 00973 } 00974 if (ptr == *namePtr) { 00975 *nextTokPtr = ptr; 00976 return 0; 00977 } 00978 ptr += enc->minBytesPerChar; 00979 c = toAscii(enc, ptr, end); 00980 while (isSpace(c)) { 00981 ptr += enc->minBytesPerChar; 00982 c = toAscii(enc, ptr, end); 00983 } 00984 if (c != '"' && c != '\'') { 00985 *nextTokPtr = ptr; 00986 return 0; 00987 } 00988 open = c; 00989 ptr += enc->minBytesPerChar; 00990 *valPtr = ptr; 00991 for (;; ptr += enc->minBytesPerChar) { 00992 c = toAscii(enc, ptr, end); 00993 if (c == open) 00994 break; 00995 if (!('a' <= c && c <= 'z') 00996 && !('A' <= c && c <= 'Z') 00997 && !('0' <= c && c <= '9') 00998 && c != '.' 00999 && c != '-' 01000 && c != '_') { 01001 *nextTokPtr = ptr; 01002 return 0; 01003 } 01004 } 01005 *nextTokPtr = ptr + enc->minBytesPerChar; 01006 return 1; 01007 }
static int streqci | ( | const char * | s1, | |
const char * | s2 | |||
) | [static] |
Definition at line 877 of file xmltok.c.
Referenced by findEncoding(), and getEncodingIndex().
00878 { 00879 for (;;) { 00880 char c1 = *s1++; 00881 char c2 = *s2++; 00882 if ('a' <= c1 && c1 <= 'z') 00883 c1 += 'A' - 'a'; 00884 if ('a' <= c2 && c2 <= 'z') 00885 c2 += 'A' - 'a'; 00886 if (c1 != c2) 00887 return 0; 00888 if (!c1) 00889 break; 00890 } 00891 return 1; 00892 }
static int toAscii | ( | const ENCODING * | enc, | |
const char * | ptr, | |||
const char * | end | |||
) | [static] |
Definition at line 902 of file xmltok.c.
Referenced by doParseXmlDecl(), and parsePseudoAttribute().
00903 { 00904 char buf[1]; 00905 char *p = buf; 00906 XmlUtf8Convert(enc, &ptr, end, &p, p + 1); 00907 if (p == buf) 00908 return -1; 00909 else 00910 return buf[0]; 00911 }
static int unicode_byte_type | ( | char | hi, | |
char | lo | |||
) | [static] |
Definition at line 478 of file xmltok.c.
00479 { 00480 switch ((unsigned char)hi) { 00481 case 0xD8: case 0xD9: case 0xDA: case 0xDB: 00482 return BT_LEAD4; 00483 case 0xDC: case 0xDD: case 0xDE: case 0xDF: 00484 return BT_TRAIL; 00485 case 0xFF: 00486 switch ((unsigned char)lo) { 00487 case 0xFF: 00488 case 0xFE: 00489 return BT_NONXML; 00490 } 00491 break; 00492 } 00493 return BT_NONASCII; 00494 }
static int unknown_isInvalid | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 1201 of file xmltok.c.
References checkCharRefNumber(), and unknown_encoding::userData.
Referenced by XmlInitUnknownEncoding().
01202 { 01203 int c = ((const struct unknown_encoding *)enc) 01204 ->convert(((const struct unknown_encoding *)enc)->userData, p); 01205 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 01206 }
static int unknown_isName | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 1181 of file xmltok.c.
References UCS2_GET_NAMING, and unknown_encoding::userData.
Referenced by XmlInitUnknownEncoding().
01182 { 01183 int c = ((const struct unknown_encoding *)enc) 01184 ->convert(((const struct unknown_encoding *)enc)->userData, p); 01185 if (c & ~0xFFFF) 01186 return 0; 01187 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); 01188 }
static int unknown_isNmstrt | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 1191 of file xmltok.c.
References UCS2_GET_NAMING, and unknown_encoding::userData.
Referenced by XmlInitUnknownEncoding().
01192 { 01193 int c = ((const struct unknown_encoding *)enc) 01194 ->convert(((const struct unknown_encoding *)enc)->userData, p); 01195 if (c & ~0xFFFF) 01196 return 0; 01197 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); 01198 }
static void unknown_toUtf16 | ( | const ENCODING * | enc, | |
const char ** | fromP, | |||
const char * | fromLim, | |||
unsigned short ** | toP, | |||
const unsigned short * | toLim | |||
) | [static] |
Definition at line 1243 of file xmltok.c.
References eformat::old::convert(), normal_encoding::enc, and unknown_encoding::userData.
Referenced by XmlInitUnknownEncoding().
01246 { 01247 while (*fromP != fromLim && *toP != toLim) { 01248 unsigned short c 01249 = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP]; 01250 if (c == 0) { 01251 c = (unsigned short)((const struct unknown_encoding *)enc) 01252 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); 01253 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] 01254 - (BT_LEAD2 - 2); 01255 } 01256 else 01257 (*fromP)++; 01258 *(*toP)++ = c; 01259 } 01260 }
static void unknown_toUtf8 | ( | const ENCODING * | enc, | |
const char ** | fromP, | |||
const char * | fromLim, | |||
char ** | toP, | |||
const char * | toLim | |||
) | [static] |
Definition at line 1209 of file xmltok.c.
References unknown_encoding::userData, unknown_encoding::utf8, and XmlUtf8Encode().
Referenced by XmlInitUnknownEncoding().
01212 { 01213 char buf[XML_UTF8_ENCODE_MAX]; 01214 for (;;) { 01215 const char *utf8; 01216 int n; 01217 if (*fromP == fromLim) 01218 break; 01219 utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; 01220 n = *utf8++; 01221 if (n == 0) { 01222 int c = ((const struct unknown_encoding *)enc) 01223 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); 01224 n = XmlUtf8Encode(c, buf); 01225 if (n > toLim - *toP) 01226 break; 01227 utf8 = buf; 01228 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] 01229 - (BT_LEAD2 - 2); 01230 } 01231 else { 01232 if (n > toLim - *toP) 01233 break; 01234 (*fromP)++; 01235 } 01236 do { 01237 *(*toP)++ = *utf8++; 01238 } while (--n != 0); 01239 } 01240 }
static int utf8_isInvalid3 | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 132 of file xmltok.c.
References UTF8_INVALID3.
00133 { 00134 return UTF8_INVALID3((const unsigned char *)p); 00135 }
static int utf8_isInvalid4 | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 138 of file xmltok.c.
References UTF8_INVALID4.
00139 { 00140 return UTF8_INVALID4((const unsigned char *)p); 00141 }
static int utf8_isName2 | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 102 of file xmltok.c.
References UTF8_GET_NAMING2.
00103 { 00104 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); 00105 }
static int utf8_isName3 | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 108 of file xmltok.c.
References UTF8_GET_NAMING3.
00109 { 00110 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); 00111 }
static int utf8_isNmstrt2 | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 116 of file xmltok.c.
References UTF8_GET_NAMING2.
00117 { 00118 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); 00119 }
static int utf8_isNmstrt3 | ( | const ENCODING * | enc, | |
const char * | p | |||
) | [static] |
Definition at line 122 of file xmltok.c.
References UTF8_GET_NAMING3.
00123 { 00124 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); 00125 }
static void utf8_toUtf16 | ( | const ENCODING * | enc, | |
const char ** | fromP, | |||
const char * | fromLim, | |||
unsigned short ** | toP, | |||
const unsigned short * | toLim | |||
) | [static] |
Definition at line 303 of file xmltok.c.
References type.
00306 { 00307 unsigned short *to = *toP; 00308 const char *from = *fromP; 00309 while (from != fromLim && to != toLim) { 00310 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { 00311 case BT_LEAD2: 00312 *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f); 00313 from += 2; 00314 break; 00315 case BT_LEAD3: 00316 *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f); 00317 from += 3; 00318 break; 00319 case BT_LEAD4: 00320 { 00321 unsigned long n; 00322 if (to + 1 == toLim) 00323 break; 00324 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); 00325 n -= 0x10000; 00326 to[0] = (unsigned short)((n >> 10) | 0xD800); 00327 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); 00328 to += 2; 00329 from += 4; 00330 } 00331 break; 00332 default: 00333 *to++ = *from++; 00334 break; 00335 } 00336 } 00337 *fromP = from; 00338 *toP = to; 00339 }
static void utf8_toUtf8 | ( | const ENCODING * | enc, | |
const char ** | fromP, | |||
const char * | fromLim, | |||
char ** | toP, | |||
const char * | toLim | |||
) | [static] |
Definition at line 284 of file xmltok.c.
00287 { 00288 char *to; 00289 const char *from; 00290 if (fromLim - *fromP > toLim - *toP) { 00291 /* Avoid copying partial characters. */ 00292 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) 00293 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) 00294 break; 00295 } 00296 for (to = *toP, from = *fromP; from != fromLim; from++, to++) 00297 *to = *from; 00298 *fromP = from; 00299 *toP = to; 00300 }
ENCODING* XmlInitUnknownEncoding | ( | void * | mem, | |
int * | table, | |||
int(*)(void *userData, const char *p) | convert, | |||
void * | userData | |||
) |
Definition at line 1263 of file xmltok.c.
References checkCharRefNumber(), unknown_encoding::convert, normal_encoding::enc, for, genRecEmupikp::i, if(), normal_encoding::isInvalid2, normal_encoding::isInvalid3, normal_encoding::isInvalid4, normal_encoding::isName2, normal_encoding::isName3, normal_encoding::isName4, normal_encoding::isNmstrt2, normal_encoding::isNmstrt3, normal_encoding::isNmstrt4, latin1_encoding, unknown_encoding::normal, normal_encoding::type, UCS2_GET_NAMING, unknown_isInvalid(), unknown_isName(), unknown_isNmstrt(), unknown_toUtf16(), unknown_toUtf8(), unknown_encoding::userData, unknown_encoding::utf16, unknown_encoding::utf8, and XmlUtf8Encode().
01267 { 01268 int i; 01269 struct unknown_encoding *e = mem; 01270 for (i = 0; i < sizeof(struct normal_encoding); i++) 01271 ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; 01272 for (i = 0; i < 128; i++) 01273 if (latin1_encoding.type[i] != BT_OTHER 01274 && latin1_encoding.type[i] != BT_NONXML 01275 && table[i] != i) 01276 return 0; 01277 for (i = 0; i < 256; i++) { 01278 int c = table[i]; 01279 if (c == -1) { 01280 e->normal.type[i] = BT_MALFORM; 01281 /* This shouldn't really get used. */ 01282 e->utf16[i] = 0xFFFF; 01283 e->utf8[i][0] = 1; 01284 e->utf8[i][1] = 0; 01285 } 01286 else if (c < 0) { 01287 if (c < -4) 01288 return 0; 01289 e->normal.type[i] = BT_LEAD2 - (c + 2); 01290 e->utf8[i][0] = 0; 01291 e->utf16[i] = 0; 01292 } 01293 else if (c < 0x80) { 01294 if (latin1_encoding.type[c] != BT_OTHER 01295 && latin1_encoding.type[c] != BT_NONXML 01296 && c != i) 01297 return 0; 01298 e->normal.type[i] = latin1_encoding.type[c]; 01299 e->utf8[i][0] = 1; 01300 e->utf8[i][1] = (char)c; 01301 e->utf16[i] = c == 0 ? 0xFFFF : c; 01302 } 01303 else if (checkCharRefNumber(c) < 0) { 01304 e->normal.type[i] = BT_NONXML; 01305 /* This shouldn't really get used. */ 01306 e->utf16[i] = 0xFFFF; 01307 e->utf8[i][0] = 1; 01308 e->utf8[i][1] = 0; 01309 } 01310 else { 01311 if (c > 0xFFFF) 01312 return 0; 01313 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) 01314 e->normal.type[i] = BT_NMSTRT; 01315 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) 01316 e->normal.type[i] = BT_NAME; 01317 else 01318 e->normal.type[i] = BT_OTHER; 01319 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); 01320 e->utf16[i] = c; 01321 } 01322 } 01323 e->userData = userData; 01324 e->convert = convert; 01325 if (convert) { 01326 e->normal.isName2 = unknown_isName; 01327 e->normal.isName3 = unknown_isName; 01328 e->normal.isName4 = unknown_isName; 01329 e->normal.isNmstrt2 = unknown_isNmstrt; 01330 e->normal.isNmstrt3 = unknown_isNmstrt; 01331 e->normal.isNmstrt4 = unknown_isNmstrt; 01332 e->normal.isInvalid2 = unknown_isInvalid; 01333 e->normal.isInvalid3 = unknown_isInvalid; 01334 e->normal.isInvalid4 = unknown_isInvalid; 01335 } 01336 e->normal.enc.utf8Convert = unknown_toUtf8; 01337 e->normal.enc.utf16Convert = unknown_toUtf16; 01338 return &(e->normal.enc); 01339 }
int XmlSizeOfUnknownEncoding | ( | ) |
Definition at line 1175 of file xmltok.c.
01176 { 01177 return sizeof(struct unknown_encoding); 01178 }
int XmlUtf16Encode | ( | int | charNum, | |
unsigned short * | buf | |||
) |
Definition at line 1150 of file xmltok.c.
01151 { 01152 if (charNum < 0) 01153 return 0; 01154 if (charNum < 0x10000) { 01155 buf[0] = charNum; 01156 return 1; 01157 } 01158 if (charNum < 0x110000) { 01159 charNum -= 0x10000; 01160 buf[0] = (charNum >> 10) + 0xD800; 01161 buf[1] = (charNum & 0x3FF) + 0xDC00; 01162 return 2; 01163 } 01164 return 0; 01165 }
int XmlUtf8Encode | ( | int | c, | |
char * | buf | |||
) |
Definition at line 1114 of file xmltok.c.
References UTF8_cval1, UTF8_cval2, UTF8_cval3, and UTF8_cval4.
Referenced by unknown_toUtf8(), and XmlInitUnknownEncoding().
01115 { 01116 enum { 01117 /* minN is minimum legal resulting value for N byte sequence */ 01118 min2 = 0x80, 01119 min3 = 0x800, 01120 min4 = 0x10000 01121 }; 01122 01123 if (c < 0) 01124 return 0; 01125 if (c < min2) { 01126 buf[0] = (c | UTF8_cval1); 01127 return 1; 01128 } 01129 if (c < min3) { 01130 buf[0] = ((c >> 6) | UTF8_cval2); 01131 buf[1] = ((c & 0x3f) | 0x80); 01132 return 2; 01133 } 01134 if (c < min4) { 01135 buf[0] = ((c >> 12) | UTF8_cval3); 01136 buf[1] = (((c >> 6) & 0x3f) | 0x80); 01137 buf[2] = ((c & 0x3f) | 0x80); 01138 return 3; 01139 } 01140 if (c < 0x110000) { 01141 buf[0] = ((c >> 18) | UTF8_cval4); 01142 buf[1] = (((c >> 12) & 0x3f) | 0x80); 01143 buf[2] = (((c >> 6) & 0x3f) | 0x80); 01144 buf[3] = ((c & 0x3f) | 0x80); 01145 return 4; 01146 } 01147 return 0; 01148 }
struct normal_encoding ascii_encoding [static] |
Initial value:
{ { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, { #define BT_COLON }, }
struct normal_encoding big2_encoding [static] |
struct normal_encoding internal_big2_encoding [static] |
Initial value:
{ { VTABLE, 2, 0, 1 }, { #define BT_COLON }, }
Definition at line 861 of file xmltok.c.
Referenced by XmlGetUtf16InternalEncoding().
struct normal_encoding internal_little2_encoding [static] |
Initial value:
{ { VTABLE, 2, 0, 1 }, { #define BT_COLON }, }
Definition at line 722 of file xmltok.c.
Referenced by XmlGetUtf16InternalEncoding().
struct normal_encoding internal_utf8_encoding [static] |
Initial value:
{ { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { #define BT_COLON }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }
Definition at line 376 of file xmltok.c.
Referenced by XmlGetUtf8InternalEncoding().
struct normal_encoding latin1_encoding [static] |
Initial value:
{ { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, { #define BT_COLON }, }
Definition at line 434 of file xmltok.c.
Referenced by checkCharRefNumber(), and XmlInitUnknownEncoding().
struct normal_encoding little2_encoding [static] |
struct normal_encoding utf8_encoding [static] |
Initial value:
{ { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { #define BT_COLON }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }
Definition at line 352 of file xmltok.c.
Referenced by initUpdatePosition().