/home/bes3soft/bes3soft/Boss/7.0.2/dist/7.0.2/Calibration/xmlBase/xmlBase-00-00-03/expat/xmltok.c File Reference

#include "xmldef.h"
#include "xmltok.h"
#include "nametab.h"
#include "xmltok_impl.h"
#include "xmltok_impl.c"
#include "asciitab.h"
#include "utf8tab.h"
#include "iasciitab.h"
#include "latin1tab.h"
#include "xmltok_ns.c"

Go to the source code of this file.

Classes

struct normal_encoding

struct unknown_encoding

Defines

#define VTABLE1

#define VTABLE   VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)

#define UCS2_GET_NAMING(pages, hi, lo)   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))

#define UTF8_GET_NAMING2(pages, byte)

#define UTF8_GET_NAMING3(pages, byte)

#define UTF8_GET_NAMING(pages, p, n)

#define UTF8_INVALID3(p)

#define UTF8_INVALID4(p)   ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)

#define utf8_isName4   isNever

#define utf8_isNmstrt4   isNever

#define utf8_isInvalid2   isNever

#define STANDARD_VTABLE(E)

#define NORMAL_VTABLE(E)

#define MINBPC(enc)   1

#define SB_BYTE_TYPE(enc, p)   (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])

#define BYTE_TYPE(enc, p)   SB_BYTE_TYPE(enc, p)

#define BYTE_TO_ASCII(enc, p)   (*p)

#define IS_NAME_CHAR(enc, p, n)   (((const struct normal_encoding *)(enc))->isName ## n(enc, p))

#define IS_NMSTRT_CHAR(enc, p, n)   (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))

#define IS_INVALID_CHAR(enc, p, n)   (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))

#define IS_NAME_CHAR_MINBPC(enc, p)   (0)

#define IS_NMSTRT_CHAR_MINBPC(enc, p)   (0)

#define CHAR_MATCHES(enc, p, c)   (*(p) == c)

#define PREFIX(ident)   normal_ ## ident

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define DEFINE_UTF16_TO_UTF8(E)

#define DEFINE_UTF16_TO_UTF16(E)

#define SET2(ptr, ch)   (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))

#define GET_LO(ptr)   ((unsigned char)(ptr)[0])

#define GET_HI(ptr)   ((unsigned char)(ptr)[1])

#define SET2(ptr, ch)   (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))

#define GET_LO(ptr)   ((unsigned char)(ptr)[1])

#define GET_HI(ptr)   ((unsigned char)(ptr)[0])

#define LITTLE2_BYTE_TYPE(enc, p)

#define LITTLE2_BYTE_TO_ASCII(enc, p)   ((p)[1] == 0 ? (p)[0] : -1)

#define LITTLE2_CHAR_MATCHES(enc, p, c)   ((p)[1] == 0 && (p)[0] == c)

#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])

#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])

#define PREFIX(ident)   little2_ ## ident

#define MINBPC(enc)   2

#define BYTE_TYPE(enc, p)   LITTLE2_BYTE_TYPE(enc, p)

#define BYTE_TO_ASCII(enc, p)   LITTLE2_BYTE_TO_ASCII(enc, p)

#define CHAR_MATCHES(enc, p, c)   LITTLE2_CHAR_MATCHES(enc, p, c)

#define IS_NAME_CHAR(enc, p, n)   0

#define IS_NAME_CHAR_MINBPC(enc, p)   LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)

#define IS_NMSTRT_CHAR(enc, p, n)   (0)

#define IS_NMSTRT_CHAR_MINBPC(enc, p)   LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BIG2_BYTE_TYPE(enc, p)

#define BIG2_BYTE_TO_ASCII(enc, p)   ((p)[0] == 0 ? (p)[1] : -1)

#define BIG2_CHAR_MATCHES(enc, p, c)   ((p)[0] == 0 && (p)[1] == c)

#define BIG2_IS_NAME_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])

#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])

#define PREFIX(ident)   big2_ ## ident

#define MINBPC(enc)   2

#define BYTE_TYPE(enc, p)   BIG2_BYTE_TYPE(enc, p)

#define BYTE_TO_ASCII(enc, p)   BIG2_BYTE_TO_ASCII(enc, p)

#define CHAR_MATCHES(enc, p, c)   BIG2_CHAR_MATCHES(enc, p, c)

#define IS_NAME_CHAR(enc, p, n)   0

#define IS_NAME_CHAR_MINBPC(enc, p)   BIG2_IS_NAME_CHAR_MINBPC(enc, p)

#define IS_NMSTRT_CHAR(enc, p, n)   (0)

#define IS_NMSTRT_CHAR_MINBPC(enc, p)   BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define INIT_ENC_INDEX(enc)   ((enc)->initEnc.isUtf16)

#define NS(x)   x

#define ns(x)   x

Enumerations

enum { UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 }

enum {
  UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC,
  UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, NO_ENC
}

Functions

static int isNever (const ENCODING *enc, const char *p)

static int utf8_isName2 (const ENCODING *enc, const char *p)

static int utf8_isName3 (const ENCODING *enc, const char *p)

static int utf8_isNmstrt2 (const ENCODING *enc, const char *p)

static int utf8_isNmstrt3 (const ENCODING *enc, const char *p)

static int utf8_isInvalid3 (const ENCODING *enc, const char *p)

static int utf8_isInvalid4 (const ENCODING *enc, const char *p)

static int checkCharRefNumber (int)

static void utf8_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)

static void utf8_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)

static void latin1_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)

static void latin1_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)

static void ascii_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)

static int unicode_byte_type (char hi, char lo)

static int streqci (const char *s1, const char *s2)

static void initUpdatePosition (const ENCODING *enc, const char *ptr, const char *end, POSITION *pos)

static int toAscii (const ENCODING *enc, const char *ptr, const char *end)

static int isSpace (int c)

static int parsePseudoAttribute (const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **valPtr, const char **nextTokPtr)

static int doParseXmlDecl (const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingName, const ENCODING **encoding, int *standalone)

int XmlUtf8Encode (int c, char *buf)

int XmlUtf16Encode (int charNum, unsigned short *buf)

int XmlSizeOfUnknownEncoding ()

static int unknown_isName (const ENCODING *enc, const char *p)

static int unknown_isNmstrt (const ENCODING *enc, const char *p)

static int unknown_isInvalid (const ENCODING *enc, const char *p)

static void unknown_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)

static void unknown_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)

ENCODING * XmlInitUnknownEncoding (void *mem, int *table, int(*convert)(void *userData, const char *p), void *userData)

static int getEncodingIndex (const char *name)

static int initScan (const ENCODING **encodingTable, const INIT_ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr)

Variables

static struct normal_encoding utf8_encoding

static struct normal_encoding internal_utf8_encoding

static struct normal_encoding latin1_encoding

static struct normal_encoding ascii_encoding

static struct normal_encoding little2_encoding

static struct normal_encoding internal_little2_encoding

static struct normal_encoding big2_encoding

static struct normal_encoding internal_big2_encoding

Define Documentation

#define BIG2_BYTE_TO_ASCII	(	enc,
		p		)	((p)[0] == 0 ? (p)[1] : -1)

Definition at line 740 of file xmltok.c.

#define BIG2_BYTE_TYPE	(	enc,
		p		)

Value:

((p)[0] == 0 \
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
  : unicode_byte_type((p)[0], (p)[1]))

Definition at line 736 of file xmltok.c.

#define BIG2_CHAR_MATCHES	(	enc,
		p,
		c	)	((p)[0] == 0 && (p)[1] == c)

Definition at line 741 of file xmltok.c.

#define BIG2_IS_NAME_CHAR_MINBPC	(	enc,
		p		)	UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])

Definition at line 742 of file xmltok.c.

#define BIG2_IS_NMSTRT_CHAR_MINBPC	(	enc,
		p		)	UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])

Definition at line 744 of file xmltok.c.

#define BT_COLON BT_NMSTRT

#define BYTE_TO_ASCII	(	enc,
		p		)	BIG2_BYTE_TO_ASCII(enc, p)

Definition at line 789 of file xmltok.c.

#define BYTE_TO_ASCII	(	enc,
		p		)	LITTLE2_BYTE_TO_ASCII(enc, p)

Definition at line 789 of file xmltok.c.

#define BYTE_TO_ASCII	(	enc,
		p		)	(*p)

Definition at line 789 of file xmltok.c.

#define BYTE_TYPE	(	enc,
		p		)	BIG2_BYTE_TYPE(enc, p)

Definition at line 788 of file xmltok.c.

#define BYTE_TYPE	(	enc,
		p		)	LITTLE2_BYTE_TYPE(enc, p)

Definition at line 788 of file xmltok.c.

#define BYTE_TYPE	(	enc,
		p		)	SB_BYTE_TYPE(enc, p)

Definition at line 788 of file xmltok.c.

#define CHAR_MATCHES	(	enc,
		p,
		c	)	BIG2_CHAR_MATCHES(enc, p, c)

Definition at line 790 of file xmltok.c.

#define CHAR_MATCHES	(	enc,
		p,
		c	)	LITTLE2_CHAR_MATCHES(enc, p, c)

Definition at line 790 of file xmltok.c.

#define CHAR_MATCHES	(	enc,
		p,
		c	)	(*(p) == c)

Definition at line 790 of file xmltok.c.

#define DEFINE_UTF16_TO_UTF16 ( E )

Value:

static \
void E ## toUtf16(const ENCODING *enc, \
                  const char **fromP, const char *fromLim, \
                  unsigned short **toP, const unsigned short *toLim) \
{ \
  /* Avoid copying first half only of surrogate */ \
  if (fromLim - *fromP > ((toLim - *toP) << 1) \
      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
    fromLim -= 2; \
  for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
}

Definition at line 559 of file xmltok.c.

#define DEFINE_UTF16_TO_UTF8 ( E )

Definition at line 496 of file xmltok.c.

#define GET_HI ( ptr ) ((unsigned char)(ptr)[0])

Definition at line 588 of file xmltok.c.

#define GET_HI ( ptr ) ((unsigned char)(ptr)[1])

Definition at line 588 of file xmltok.c.

#define GET_LO ( ptr ) ((unsigned char)(ptr)[1])

Definition at line 587 of file xmltok.c.

#define GET_LO ( ptr ) ((unsigned char)(ptr)[0])

Definition at line 587 of file xmltok.c.

#define INIT_ENC_INDEX ( enc ) ((enc)->initEnc.isUtf16)

Definition at line 1378 of file xmltok.c.

Referenced by initScan(), and XmlInitEncoding().

#define IS_INVALID_CHAR	(	enc,
		p,
		n	)	(((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))

Definition at line 237 of file xmltok.c.

#define IS_NAME_CHAR	(	enc,
		p,
		n	)	0

Definition at line 791 of file xmltok.c.

#define IS_NAME_CHAR	(	enc,
		p,
		n	)	0

Definition at line 791 of file xmltok.c.

#define IS_NAME_CHAR	(	enc,
		p,
		n	)	(((const struct normal_encoding *)(enc))->isName ## n(enc, p))

Definition at line 791 of file xmltok.c.

#define IS_NAME_CHAR_MINBPC	(	enc,
		p		)	BIG2_IS_NAME_CHAR_MINBPC(enc, p)

Definition at line 792 of file xmltok.c.

#define IS_NAME_CHAR_MINBPC	(	enc,
		p		)	LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)

Definition at line 792 of file xmltok.c.

#define IS_NAME_CHAR_MINBPC	(	enc,
		p		)	(0)

Definition at line 792 of file xmltok.c.

#define IS_NMSTRT_CHAR	(	enc,
		p,
		n	)	(0)

Definition at line 793 of file xmltok.c.

#define IS_NMSTRT_CHAR	(	enc,
		p,
		n	)	(0)

Definition at line 793 of file xmltok.c.

#define IS_NMSTRT_CHAR	(	enc,
		p,
		n	)	(((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))

Definition at line 793 of file xmltok.c.

#define IS_NMSTRT_CHAR_MINBPC	(	enc,
		p		)	BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)

Definition at line 794 of file xmltok.c.

#define IS_NMSTRT_CHAR_MINBPC	(	enc,
		p		)	LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)

Definition at line 794 of file xmltok.c.

#define IS_NMSTRT_CHAR_MINBPC	(	enc,
		p		)	(0)

Definition at line 794 of file xmltok.c.

#define LITTLE2_BYTE_TO_ASCII	(	enc,
		p		)	((p)[1] == 0 ? (p)[0] : -1)

Definition at line 601 of file xmltok.c.

#define LITTLE2_BYTE_TYPE	(	enc,
		p		)

Value:

((p)[1] == 0 \
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
  : unicode_byte_type((p)[1], (p)[0]))

Definition at line 597 of file xmltok.c.

#define LITTLE2_CHAR_MATCHES	(	enc,
		p,
		c	)	((p)[1] == 0 && (p)[0] == c)

Definition at line 602 of file xmltok.c.

#define LITTLE2_IS_NAME_CHAR_MINBPC	(	enc,
		p		)	UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])

Definition at line 603 of file xmltok.c.

#define LITTLE2_IS_NMSTRT_CHAR_MINBPC	(	enc,
		p		)	UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])

Definition at line 605 of file xmltok.c.

#define MINBPC ( enc ) 2

Definition at line 786 of file xmltok.c.

#define MINBPC ( enc ) 2

Definition at line 786 of file xmltok.c.

#define MINBPC ( enc ) 1

Definition at line 786 of file xmltok.c.

#define NORMAL_VTABLE ( E )

Value:

E ## isName2, \
 E ## isName3, \
 E ## isName4, \
 E ## isNmstrt2, \
 E ## isNmstrt3, \
 E ## isNmstrt4, \
 E ## isInvalid2, \
 E ## isInvalid3, \
 E ## isInvalid4

Definition at line 179 of file xmltok.c.

#define NS ( x ) x

Definition at line 1503 of file xmltok.c.

Referenced by TConformalFinder::fastFinding3D(), findEncoding(), initScanContent(), initScanProlog(), XmlInitEncoding(), and XmlParseXmlDecl().

#define PREFIX ( ident ) big2_ ## ident

Definition at line 785 of file xmltok.c.

#define PREFIX ( ident ) little2_ ## ident

Definition at line 785 of file xmltok.c.

#define PREFIX ( ident ) normal_ ## ident

Definition at line 785 of file xmltok.c.

#define SB_BYTE_TYPE	(	enc,
		p		)	(((struct normal_encoding )(enc))->type[(unsigned char)(p)])

Definition at line 206 of file xmltok.c.

#define SET2	(	ptr,
		ch		)	(((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))

Definition at line 585 of file xmltok.c.

#define SET2	(	ptr,
		ch		)	(((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))

Definition at line 585 of file xmltok.c.

#define STANDARD_VTABLE ( E )

Definition at line 175 of file xmltok.c.

#define UCS2_GET_NAMING	(	pages,
		hi,
		lo	)	(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))

Definition at line 54 of file xmltok.c.

Referenced by unknown_isName(), unknown_isNmstrt(), and XmlInitUnknownEncoding().

#define UTF8_GET_NAMING	(	pages,
		p,
		n	)

Value:

((n) == 2 \
  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
  : ((n) == 3 \
     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
     : 0))

Definition at line 79 of file xmltok.c.

#define UTF8_GET_NAMING2	(	pages,
		byte		)

Value:

(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
                      + ((((byte)[0]) & 3) << 1) \
                      + ((((byte)[1]) >> 5) & 1)] \
         & (1 << (((byte)[1]) & 0x1F)))

Definition at line 61 of file xmltok.c.

Referenced by utf8_isName2(), and utf8_isNmstrt2().

#define UTF8_GET_NAMING3	(	pages,
		byte		)

Value:

(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
                             + ((((byte)[1]) >> 2) & 0xF)] \
                       << 3) \
                      + ((((byte)[1]) & 3) << 1) \
                      + ((((byte)[2]) >> 5) & 1)] \
         & (1 << (((byte)[2]) & 0x1F)))

Definition at line 71 of file xmltok.c.

Referenced by utf8_isName3(), and utf8_isNmstrt3().

#define UTF8_INVALID3 ( p )

Value:

((*p) == 0xED \
  ? (((p)[1] & 0x20) != 0) \
  : ((*p) == 0xEF \
     ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
     : 0))

Definition at line 86 of file xmltok.c.

Referenced by utf8_isInvalid3().

#define UTF8_INVALID4 ( p ) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)

Definition at line 93 of file xmltok.c.

Referenced by utf8_isInvalid4().

#define utf8_isInvalid2 isNever

Definition at line 129 of file xmltok.c.

#define utf8_isName4 isNever

Definition at line 113 of file xmltok.c.

#define utf8_isNmstrt4 isNever

Definition at line 127 of file xmltok.c.

#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)

Definition at line 52 of file xmltok.c.

#define VTABLE1

Value:

{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \
  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
  PREFIX(sameName), \
  PREFIX(nameMatchesAscii), \
  PREFIX(nameLength), \
  PREFIX(skipS), \
  PREFIX(getAtts), \
  PREFIX(charRefNumber), \
  PREFIX(predefinedEntityName), \
  PREFIX(updatePosition), \
  PREFIX(isPublicId)

Definition at line 39 of file xmltok.c.

Enumeration Type Documentation

anonymous enum

Enumerator:

UTF8_cval1
UTF8_cval2
UTF8_cval3
UTF8_cval4

Definition at line 276 of file xmltok.c.

00276      {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
00277   UTF8_cval1 = 0x00,
00278   UTF8_cval2 = 0xc0,
00279   UTF8_cval3 = 0xe0,
00280   UTF8_cval4 = 0xf0
00281 };

anonymous enum

Enumerator:

UNKNOWN_ENC
ISO_8859_1_ENC
US_ASCII_ENC
UTF_8_ENC
UTF_16_ENC
UTF_16BE_ENC
UTF_16LE_ENC
NO_ENC

Definition at line 1343 of file xmltok.c.

01343      {
01344   UNKNOWN_ENC = -1,
01345   ISO_8859_1_ENC = 0,
01346   US_ASCII_ENC,
01347   UTF_8_ENC,
01348   UTF_16_ENC,
01349   UTF_16BE_ENC,
01350   UTF_16LE_ENC,
01351   /* must match encodingNames up to here */
01352   NO_ENC
01353 };

Function Documentation

static void ascii_toUtf8	(	const ENCODING *	enc,
		const char **	fromP,
		const char *	fromLim,
		char **	toP,
		const char *	toLim
	)			`[static]`

Definition at line 446 of file xmltok.c.

00449 {
00450   while (*fromP != fromLim && *toP != toLim)
00451     *(*toP)++ = *(*fromP)++;
00452 }

static int checkCharRefNumber ( int ) [static]

Definition at line 1096 of file xmltok.c.

References latin1_encoding, and normal_encoding::type.

Referenced by unknown_isInvalid(), and XmlInitUnknownEncoding().

01097 {
01098   switch (result >> 8) {
01099   case 0xD8: case 0xD9: case 0xDA: case 0xDB:
01100   case 0xDC: case 0xDD: case 0xDE: case 0xDF:
01101     return -1;
01102   case 0:
01103     if (latin1_encoding.type[result] == BT_NONXML)
01104       return -1;
01105     break;
01106   case 0xFF:
01107     if (result == 0xFFFE || result == 0xFFFF)
01108       return -1;
01109     break;
01110   }
01111   return result;
01112 }

static int doParseXmlDecl	(	const ENCODING ()(const ENCODING , const char , const char *)	encodingFinder,
		int	isGeneralTextEntity,
		const ENCODING *	enc,
		const char *	ptr,
		const char *	end,
		const char **	badPtr,
		const char **	versionPtr,
		const char **	encodingName,
		const ENCODING **	encoding,
		int *	standalone
	)			`[static]`

Definition at line 1010 of file xmltok.c.

References isSpace(), parsePseudoAttribute(), and toAscii().

Referenced by XmlParseXmlDecl().

01022 {
01023   const char *val = 0;
01024   const char *name = 0;
01025   ptr += 5 * enc->minBytesPerChar;
01026   end -= 2 * enc->minBytesPerChar;
01027   if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) {
01028     *badPtr = ptr;
01029     return 0;
01030   }
01031   if (!XmlNameMatchesAscii(enc, name, "version")) {
01032     if (!isGeneralTextEntity) {
01033       *badPtr = name;
01034       return 0;
01035     }
01036   }
01037   else {
01038     if (versionPtr)
01039       *versionPtr = val;
01040     if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
01041       *badPtr = ptr;
01042       return 0;
01043     }
01044     if (!name) {
01045       if (isGeneralTextEntity) {
01046         /* a TextDecl must have an EncodingDecl */
01047         *badPtr = ptr;
01048         return 0;
01049       }
01050       return 1;
01051     }
01052   }
01053   if (XmlNameMatchesAscii(enc, name, "encoding")) {
01054     int c = toAscii(enc, val, end);
01055     if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
01056       *badPtr = val;
01057       return 0;
01058     }
01059     if (encodingName)
01060       *encodingName = val;
01061     if (encoding)
01062       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
01063     if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
01064       *badPtr = ptr;
01065       return 0;
01066     }
01067     if (!name)
01068       return 1;
01069   }
01070   if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) {
01071     *badPtr = name;
01072     return 0;
01073   }
01074   if (XmlNameMatchesAscii(enc, val, "yes")) {
01075     if (standalone)
01076       *standalone = 1;
01077   }
01078   else if (XmlNameMatchesAscii(enc, val, "no")) {
01079     if (standalone)
01080       *standalone = 0;
01081   }
01082   else {
01083     *badPtr = val;
01084     return 0;
01085   }
01086   while (isSpace(toAscii(enc, ptr, end)))
01087     ptr += enc->minBytesPerChar;
01088   if (ptr != end) {
01089     *badPtr = ptr;
01090     return 0;
01091   }
01092   return 1;
01093 }

static int getEncodingIndex ( const char * name ) [static]

Definition at line 1356 of file xmltok.c.

References genRecEmupikp::i, streqci(), and UNKNOWN_ENC.

Referenced by findEncoding(), and XmlInitEncoding().

01357 {
01358   static const char *encodingNames[] = {
01359     "ISO-8859-1",
01360     "US-ASCII",
01361     "UTF-8",
01362     "UTF-16",
01363     "UTF-16BE"
01364     "UTF-16LE",
01365   };
01366   int i;
01367   if (name == 0)
01368     return NO_ENC;
01369   for (i = 0; i < sizeof(encodingNames)/sizeof(encodingNames[0]); i++)
01370     if (streqci(name, encodingNames[i]))
01371       return i;
01372   return UNKNOWN_ENC;
01373 }

static int initScan	(	const ENCODING **	encodingTable,
		const INIT_ENCODING *	enc,
		int	state,
		const char *	ptr,
		const char *	end,
		const char **	nextTokPtr
	)			`[static]`

Definition at line 1389 of file xmltok.c.

References INIT_ENC_INDEX, ISO_8859_1_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, and UTF_8_ENC.

Referenced by initScanContent(), and initScanProlog().

01395 {
01396   const ENCODING **encPtr;
01397 
01398   if (ptr == end)
01399     return XML_TOK_NONE;
01400   encPtr = enc->encPtr;
01401   if (ptr + 1 == end) {
01402     /* only a single byte available for auto-detection */
01403     /* a well-formed document entity must have more than one byte */
01404     if (state != XML_CONTENT_STATE)
01405       return XML_TOK_PARTIAL;
01406     /* so we're parsing an external text entity... */
01407     /* if UTF-16 was externally specified, then we need at least 2 bytes */
01408     switch (INIT_ENC_INDEX(enc)) {
01409     case UTF_16_ENC:
01410     case UTF_16LE_ENC:
01411     case UTF_16BE_ENC:
01412       return XML_TOK_PARTIAL;
01413     }
01414     switch ((unsigned char)*ptr) {
01415     case 0xFE:
01416     case 0xFF:
01417     case 0xEF: /* possibly first byte of UTF-8 BOM */
01418       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
01419           && state == XML_CONTENT_STATE)
01420         break;
01421       /* fall through */
01422     case 0x00:
01423     case 0x3C:
01424       return XML_TOK_PARTIAL;
01425     }
01426   }
01427   else {
01428     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
01429     case 0xFEFF:
01430       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
01431           && state == XML_CONTENT_STATE)
01432         break;
01433       *nextTokPtr = ptr + 2;
01434       *encPtr = encodingTable[UTF_16BE_ENC];
01435       return XML_TOK_BOM;
01436     /* 00 3C is handled in the default case */
01437     case 0x3C00:
01438       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
01439            || INIT_ENC_INDEX(enc) == UTF_16_ENC)
01440           && state == XML_CONTENT_STATE)
01441         break;
01442       *encPtr = encodingTable[UTF_16LE_ENC];
01443       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01444     case 0xFFFE:
01445       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
01446           && state == XML_CONTENT_STATE)
01447         break;
01448       *nextTokPtr = ptr + 2;
01449       *encPtr = encodingTable[UTF_16LE_ENC];
01450       return XML_TOK_BOM;
01451     case 0xEFBB:
01452       /* Maybe a UTF-8 BOM (EF BB BF) */
01453       /* If there's an explicitly specified (external) encoding
01454          of ISO-8859-1 or some flavour of UTF-16
01455          and this is an external text entity,
01456          don't look for the BOM,
01457          because it might be a legal data. */
01458       if (state == XML_CONTENT_STATE) {
01459         int e = INIT_ENC_INDEX(enc);
01460         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
01461           break;
01462       }
01463       if (ptr + 2 == end)
01464         return XML_TOK_PARTIAL;
01465       if ((unsigned char)ptr[2] == 0xBF) {
01466         *encPtr = encodingTable[UTF_8_ENC];
01467         return XML_TOK_BOM;
01468       }
01469       break;
01470     default:
01471       if (ptr[0] == '\0') {
01472         /* 0 isn't a legal data character. Furthermore a document entity can only
01473            start with ASCII characters.  So the only way this can fail to be big-endian
01474            UTF-16 if it it's an external parsed general entity that's labelled as
01475            UTF-16LE. */
01476         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
01477           break;
01478         *encPtr = encodingTable[UTF_16BE_ENC];
01479         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01480       }
01481       else if (ptr[1] == '\0') {
01482         /* We could recover here in the case:
01483             - parsing an external entity
01484             - second byte is 0
01485             - no externally specified encoding
01486             - no encoding declaration
01487            by assuming UTF-16LE.  But we don't, because this would mean when
01488            presented just with a single byte, we couldn't reliably determine
01489            whether we needed further bytes. */
01490         if (state == XML_CONTENT_STATE)
01491           break;
01492         *encPtr = encodingTable[UTF_16LE_ENC];
01493         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01494       }
01495       break;
01496     }
01497   }
01498   *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
01499   return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01500 }

static void initUpdatePosition	(	const ENCODING *	enc,
		const char *	ptr,
		const char *	end,
		POSITION *	pos
	)			`[static]`

Definition at line 895 of file xmltok.c.

References normal_encoding::enc, and utf8_encoding.

Referenced by XmlInitEncoding().

00897 {
00898   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
00899 }

static int isNever	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 96 of file xmltok.c.

00097 {
00098   return 0;
00099 }

static int isSpace ( int c ) [static]

Definition at line 914 of file xmltok.c.

Referenced by doParseXmlDecl(), and parsePseudoAttribute().

00915 {
00916   switch (c) {
00917   case 0x20:
00918   case 0xD:
00919   case 0xA:
00920   case 0x9:     
00921     return 1;
00922   }
00923   return 0;
00924 }

static void latin1_toUtf16	(	const ENCODING *	enc,
		const char **	fromP,
		const char *	fromLim,
		unsigned short **	toP,
		const unsigned short *	toLim
	)			`[static]`

Definition at line 413 of file xmltok.c.

00416 {
00417   while (*fromP != fromLim && *toP != toLim)
00418     *(*toP)++ = (unsigned char)*(*fromP)++;
00419 }

static void latin1_toUtf8	(	const ENCODING *	enc,
		const char **	fromP,
		const char *	fromLim,
		char **	toP,
		const char *	toLim
	)			`[static]`

Definition at line 388 of file xmltok.c.

References UTF8_cval2.

00391 {
00392   for (;;) {
00393     unsigned char c;
00394     if (*fromP == fromLim)
00395       break;
00396     c = (unsigned char)**fromP;
00397     if (c & 0x80) {
00398       if (toLim - *toP < 2)
00399         break;
00400       *(*toP)++ = ((c >> 6) | UTF8_cval2);
00401       *(*toP)++ = ((c & 0x3f) | 0x80);
00402       (*fromP)++;
00403     }
00404     else {
00405       if (*toP == toLim)
00406         break;
00407       *(*toP)++ = *(*fromP)++;
00408     }
00409   }
00410 }

static int parsePseudoAttribute	(	const ENCODING *	enc,
		const char *	ptr,
		const char *	end,
		const char **	namePtr,
		const char **	valPtr,
		const char **	nextTokPtr
	)			`[static]`

Definition at line 929 of file xmltok.c.

References isSpace(), and toAscii().

Referenced by doParseXmlDecl().

00935 {
00936   int c;
00937   char open;
00938   if (ptr == end) {
00939     *namePtr = 0;
00940     return 1;
00941   }
00942   if (!isSpace(toAscii(enc, ptr, end))) {
00943     *nextTokPtr = ptr;
00944     return 0;
00945   }
00946   do {
00947     ptr += enc->minBytesPerChar;
00948   } while (isSpace(toAscii(enc, ptr, end)));
00949   if (ptr == end) {
00950     *namePtr = 0;
00951     return 1;
00952   }
00953   *namePtr = ptr;
00954   for (;;) {
00955     c = toAscii(enc, ptr, end);
00956     if (c == -1) {
00957       *nextTokPtr = ptr;
00958       return 0;
00959     }
00960     if (c == '=')
00961       break;
00962     if (isSpace(c)) {
00963       do {
00964         ptr += enc->minBytesPerChar;
00965       } while (isSpace(c = toAscii(enc, ptr, end)));
00966       if (c != '=') {
00967         *nextTokPtr = ptr;
00968         return 0;
00969       }
00970       break;
00971     }
00972     ptr += enc->minBytesPerChar;
00973   }
00974   if (ptr == *namePtr) {
00975     *nextTokPtr = ptr;
00976     return 0;
00977   }
00978   ptr += enc->minBytesPerChar;
00979   c = toAscii(enc, ptr, end);
00980   while (isSpace(c)) {
00981     ptr += enc->minBytesPerChar;
00982     c = toAscii(enc, ptr, end);
00983   }
00984   if (c != '"' && c != '\'') {
00985     *nextTokPtr = ptr;
00986     return 0;
00987   }
00988   open = c;
00989   ptr += enc->minBytesPerChar;
00990   *valPtr = ptr;
00991   for (;; ptr += enc->minBytesPerChar) {
00992     c = toAscii(enc, ptr, end);
00993     if (c == open)
00994       break;
00995     if (!('a' <= c && c <= 'z')
00996         && !('A' <= c && c <= 'Z')
00997         && !('0' <= c && c <= '9')
00998         && c != '.'
00999         && c != '-'
01000         && c != '_') {
01001       *nextTokPtr = ptr;
01002       return 0;
01003     }
01004   }
01005   *nextTokPtr = ptr + enc->minBytesPerChar;
01006   return 1;
01007 }

static int streqci	(	const char *	s1,
		const char *	s2
	)			`[static]`

Definition at line 877 of file xmltok.c.

Referenced by findEncoding(), and getEncodingIndex().

00878 {
00879   for (;;) {
00880     char c1 = *s1++;
00881     char c2 = *s2++;
00882     if ('a' <= c1 && c1 <= 'z')
00883       c1 += 'A' - 'a';
00884     if ('a' <= c2 && c2 <= 'z')
00885       c2 += 'A' - 'a';
00886     if (c1 != c2)
00887       return 0;
00888     if (!c1)
00889       break;
00890   }
00891   return 1;
00892 }

static int toAscii	(	const ENCODING *	enc,
		const char *	ptr,
		const char *	end
	)			`[static]`

Definition at line 902 of file xmltok.c.

Referenced by doParseXmlDecl(), and parsePseudoAttribute().

00903 {
00904   char buf[1];
00905   char *p = buf;
00906   XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
00907   if (p == buf)
00908     return -1;
00909   else
00910     return buf[0];
00911 }

static int unicode_byte_type	(	char	hi,
		char	lo
	)			`[static]`

Definition at line 478 of file xmltok.c.

00479 {
00480   switch ((unsigned char)hi) {
00481   case 0xD8: case 0xD9: case 0xDA: case 0xDB:
00482     return BT_LEAD4;
00483   case 0xDC: case 0xDD: case 0xDE: case 0xDF:
00484     return BT_TRAIL;
00485   case 0xFF:
00486     switch ((unsigned char)lo) {
00487     case 0xFF:
00488     case 0xFE:
00489       return BT_NONXML;
00490     }
00491     break;
00492   }
00493   return BT_NONASCII;
00494 }

static int unknown_isInvalid	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 1201 of file xmltok.c.

References checkCharRefNumber(), and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01202 {
01203   int c = ((const struct unknown_encoding *)enc)
01204            ->convert(((const struct unknown_encoding *)enc)->userData, p);
01205   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
01206 }

static int unknown_isName	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 1181 of file xmltok.c.

References UCS2_GET_NAMING, and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01182 {
01183   int c = ((const struct unknown_encoding *)enc)
01184           ->convert(((const struct unknown_encoding *)enc)->userData, p);
01185   if (c & ~0xFFFF)
01186     return 0;
01187   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
01188 }

static int unknown_isNmstrt	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 1191 of file xmltok.c.

References UCS2_GET_NAMING, and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01192 {
01193   int c = ((const struct unknown_encoding *)enc)
01194           ->convert(((const struct unknown_encoding *)enc)->userData, p);
01195   if (c & ~0xFFFF)
01196     return 0;
01197   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
01198 }

static void unknown_toUtf16	(	const ENCODING *	enc,
		const char **	fromP,
		const char *	fromLim,
		unsigned short **	toP,
		const unsigned short *	toLim
	)			`[static]`

Definition at line 1243 of file xmltok.c.

References eformat::old::convert(), normal_encoding::enc, and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01246 {
01247   while (*fromP != fromLim && *toP != toLim) {
01248     unsigned short c
01249       = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
01250     if (c == 0) {
01251       c = (unsigned short)((const struct unknown_encoding *)enc)
01252            ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
01253       *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
01254                  - (BT_LEAD2 - 2);
01255     }
01256     else
01257       (*fromP)++;
01258     *(*toP)++ = c;
01259   }
01260 }

static void unknown_toUtf8	(	const ENCODING *	enc,
		const char **	fromP,
		const char *	fromLim,
		char **	toP,
		const char *	toLim
	)			`[static]`

Definition at line 1209 of file xmltok.c.

References unknown_encoding::userData, unknown_encoding::utf8, and XmlUtf8Encode().

Referenced by XmlInitUnknownEncoding().

01212 {
01213   char buf[XML_UTF8_ENCODE_MAX];
01214   for (;;) {
01215     const char *utf8;
01216     int n;
01217     if (*fromP == fromLim)
01218       break;
01219     utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
01220     n = *utf8++;
01221     if (n == 0) {
01222       int c = ((const struct unknown_encoding *)enc)
01223               ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
01224       n = XmlUtf8Encode(c, buf);
01225       if (n > toLim - *toP)
01226         break;
01227       utf8 = buf;
01228       *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
01229                  - (BT_LEAD2 - 2);
01230     }
01231     else {
01232       if (n > toLim - *toP)
01233         break;
01234       (*fromP)++;
01235     }
01236     do {
01237       *(*toP)++ = *utf8++;
01238     } while (--n != 0);
01239   }
01240 }

static int utf8_isInvalid3	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 132 of file xmltok.c.

References UTF8_INVALID3.

00133 {
00134   return UTF8_INVALID3((const unsigned char *)p);
00135 }

static int utf8_isInvalid4	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 138 of file xmltok.c.

References UTF8_INVALID4.

00139 {
00140   return UTF8_INVALID4((const unsigned char *)p);
00141 }

static int utf8_isName2	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 102 of file xmltok.c.

References UTF8_GET_NAMING2.

00103 {
00104   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
00105 }

static int utf8_isName3	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 108 of file xmltok.c.

References UTF8_GET_NAMING3.

00109 {
00110   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
00111 }

static int utf8_isNmstrt2	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 116 of file xmltok.c.

References UTF8_GET_NAMING2.

00117 {
00118   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
00119 }

static int utf8_isNmstrt3	(	const ENCODING *	enc,
		const char *	p
	)			`[static]`

Definition at line 122 of file xmltok.c.

References UTF8_GET_NAMING3.

00123 {
00124   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
00125 }

static void utf8_toUtf16	(	const ENCODING *	enc,
		const char **	fromP,
		const char *	fromLim,
		unsigned short **	toP,
		const unsigned short *	toLim
	)			`[static]`

Definition at line 303 of file xmltok.c.

References type.

00306 {
00307   unsigned short *to = *toP;
00308   const char *from = *fromP;
00309   while (from != fromLim && to != toLim) {
00310     switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
00311     case BT_LEAD2:
00312       *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
00313       from += 2;
00314       break;
00315     case BT_LEAD3:
00316       *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
00317       from += 3;
00318       break;
00319     case BT_LEAD4:
00320       {
00321         unsigned long n;
00322         if (to + 1 == toLim)
00323           break;
00324         n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
00325         n -= 0x10000;
00326         to[0] = (unsigned short)((n >> 10) | 0xD800);
00327         to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
00328         to += 2;
00329         from += 4;
00330       }
00331       break;
00332     default:
00333       *to++ = *from++;
00334       break;
00335     }
00336   }
00337   *fromP = from;
00338   *toP = to;
00339 }

static void utf8_toUtf8	(	const ENCODING *	enc,
		const char **	fromP,
		const char *	fromLim,
		char **	toP,
		const char *	toLim
	)			`[static]`

Definition at line 284 of file xmltok.c.

00287 {
00288   char *to;
00289   const char *from;
00290   if (fromLim - *fromP > toLim - *toP) {
00291     /* Avoid copying partial characters. */
00292     for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
00293       if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
00294         break;
00295   }
00296   for (to = *toP, from = *fromP; from != fromLim; from++, to++)
00297     *to = *from;
00298   *fromP = from;
00299   *toP = to;
00300 }

ENCODING* XmlInitUnknownEncoding	(	void *	mem,
		int *	table,
		int()(void userData, const char *p)	convert,
		void *	userData
	)

Definition at line 1263 of file xmltok.c.

References checkCharRefNumber(), unknown_encoding::convert, normal_encoding::enc, for, genRecEmupikp::i, if(), normal_encoding::isInvalid2, normal_encoding::isInvalid3, normal_encoding::isInvalid4, normal_encoding::isName2, normal_encoding::isName3, normal_encoding::isName4, normal_encoding::isNmstrt2, normal_encoding::isNmstrt3, normal_encoding::isNmstrt4, latin1_encoding, unknown_encoding::normal, normal_encoding::type, UCS2_GET_NAMING, unknown_isInvalid(), unknown_isName(), unknown_isNmstrt(), unknown_toUtf16(), unknown_toUtf8(), unknown_encoding::userData, unknown_encoding::utf16, unknown_encoding::utf8, and XmlUtf8Encode().

01267 {
01268   int i;
01269   struct unknown_encoding *e = mem;
01270   for (i = 0; i < sizeof(struct normal_encoding); i++)
01271     ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
01272   for (i = 0; i < 128; i++)
01273     if (latin1_encoding.type[i] != BT_OTHER
01274         && latin1_encoding.type[i] != BT_NONXML
01275         && table[i] != i)
01276       return 0;
01277   for (i = 0; i < 256; i++) {
01278     int c = table[i];
01279     if (c == -1) {
01280       e->normal.type[i] = BT_MALFORM;
01281       /* This shouldn't really get used. */
01282       e->utf16[i] = 0xFFFF;
01283       e->utf8[i][0] = 1;
01284       e->utf8[i][1] = 0;
01285     }
01286     else if (c < 0) {
01287       if (c < -4)
01288         return 0;
01289       e->normal.type[i] = BT_LEAD2 - (c + 2);
01290       e->utf8[i][0] = 0;
01291       e->utf16[i] = 0;
01292     }
01293     else if (c < 0x80) {
01294       if (latin1_encoding.type[c] != BT_OTHER
01295           && latin1_encoding.type[c] != BT_NONXML
01296           && c != i)
01297         return 0;
01298       e->normal.type[i] = latin1_encoding.type[c];
01299       e->utf8[i][0] = 1;
01300       e->utf8[i][1] = (char)c;
01301       e->utf16[i] = c == 0 ? 0xFFFF : c;
01302     }
01303     else if (checkCharRefNumber(c) < 0) {
01304       e->normal.type[i] = BT_NONXML;
01305       /* This shouldn't really get used. */
01306       e->utf16[i] = 0xFFFF;
01307       e->utf8[i][0] = 1;
01308       e->utf8[i][1] = 0;
01309     }
01310     else {
01311       if (c > 0xFFFF)
01312         return 0;
01313       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
01314         e->normal.type[i] = BT_NMSTRT;
01315       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
01316         e->normal.type[i] = BT_NAME;
01317       else
01318         e->normal.type[i] = BT_OTHER;
01319       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
01320       e->utf16[i] = c;
01321     }
01322   }
01323   e->userData = userData;
01324   e->convert = convert;
01325   if (convert) {
01326     e->normal.isName2 = unknown_isName;
01327     e->normal.isName3 = unknown_isName;
01328     e->normal.isName4 = unknown_isName;
01329     e->normal.isNmstrt2 = unknown_isNmstrt;
01330     e->normal.isNmstrt3 = unknown_isNmstrt;
01331     e->normal.isNmstrt4 = unknown_isNmstrt;
01332     e->normal.isInvalid2 = unknown_isInvalid;
01333     e->normal.isInvalid3 = unknown_isInvalid;
01334     e->normal.isInvalid4 = unknown_isInvalid;
01335   }
01336   e->normal.enc.utf8Convert = unknown_toUtf8;
01337   e->normal.enc.utf16Convert = unknown_toUtf16;
01338   return &(e->normal.enc);
01339 }

int XmlSizeOfUnknownEncoding ( )

Definition at line 1175 of file xmltok.c.

01176 {
01177   return sizeof(struct unknown_encoding);
01178 }

int XmlUtf16Encode	(	int	charNum,
		unsigned short *	buf
	)

Definition at line 1150 of file xmltok.c.

01151 {
01152   if (charNum < 0)
01153     return 0;
01154   if (charNum < 0x10000) {
01155     buf[0] = charNum;
01156     return 1;
01157   }
01158   if (charNum < 0x110000) {
01159     charNum -= 0x10000;
01160     buf[0] = (charNum >> 10) + 0xD800;
01161     buf[1] = (charNum & 0x3FF) + 0xDC00;
01162     return 2;
01163   }
01164   return 0;
01165 }

int XmlUtf8Encode	(	int	c,
		char *	buf
	)

Definition at line 1114 of file xmltok.c.

References UTF8_cval1, UTF8_cval2, UTF8_cval3, and UTF8_cval4.

Referenced by unknown_toUtf8(), and XmlInitUnknownEncoding().

01115 {
01116   enum {
01117     /* minN is minimum legal resulting value for N byte sequence */
01118     min2 = 0x80,
01119     min3 = 0x800,
01120     min4 = 0x10000
01121   };
01122 
01123   if (c < 0)
01124     return 0;
01125   if (c < min2) {
01126     buf[0] = (c | UTF8_cval1);
01127     return 1;
01128   }
01129   if (c < min3) {
01130     buf[0] = ((c >> 6) | UTF8_cval2);
01131     buf[1] = ((c & 0x3f) | 0x80);
01132     return 2;
01133   }
01134   if (c < min4) {
01135     buf[0] = ((c >> 12) | UTF8_cval3);
01136     buf[1] = (((c >> 6) & 0x3f) | 0x80);
01137     buf[2] = ((c & 0x3f) | 0x80);
01138     return 3;
01139   }
01140   if (c < 0x110000) {
01141     buf[0] = ((c >> 18) | UTF8_cval4);
01142     buf[1] = (((c >> 12) & 0x3f) | 0x80);
01143     buf[2] = (((c >> 6) & 0x3f) | 0x80);
01144     buf[3] = ((c & 0x3f) | 0x80);
01145     return 4;
01146   }
01147   return 0;
01148 }

Variable Documentation

struct normal_encoding ascii_encoding [static]

Initial value:

 {
  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
  {
#define BT_COLON



  },

}

Definition at line 467 of file xmltok.c.

struct normal_encoding big2_encoding [static]

Initial value:

 {
  { VTABLE, 2, 0,



  0

  },
  {
#define BT_COLON



  },

}

Definition at line 829 of file xmltok.c.

struct normal_encoding internal_big2_encoding [static]

Initial value:

 {
  { VTABLE, 2, 0, 1 },
  {
#define BT_COLON



  },

}

Definition at line 861 of file xmltok.c.

Referenced by XmlGetUtf16InternalEncoding().

struct normal_encoding internal_little2_encoding [static]

Initial value:

 { 
  { VTABLE, 2, 0, 1 },
  {
#define BT_COLON



  },

}

Definition at line 722 of file xmltok.c.

Referenced by XmlGetUtf16InternalEncoding().

struct normal_encoding internal_utf8_encoding [static]

Initial value:

 {
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
  {
#define BT_COLON



  },
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
}

Definition at line 376 of file xmltok.c.

Referenced by XmlGetUtf8InternalEncoding().

struct normal_encoding latin1_encoding [static]

Initial value:

 {
  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
  {
#define BT_COLON



  },

}

Definition at line 434 of file xmltok.c.

Referenced by checkCharRefNumber(), and XmlInitUnknownEncoding().

struct normal_encoding little2_encoding [static]

Initial value:

 { 
  { VTABLE, 2, 0,



    0

  },
  {
#define BT_COLON



  },

}

Definition at line 690 of file xmltok.c.

struct normal_encoding utf8_encoding [static]

Initial value:

 {
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
  {
#define BT_COLON



  },
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
}

Definition at line 352 of file xmltok.c.

Referenced by initUpdatePosition().

Generated on Tue Nov 29 23:14:53 2016 for BOSS_7.0.2 by

1.4.7


Classes
struct	normal_encoding
struct	unknown_encoding
Defines
#define	VTABLE1
#define	VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define	UCS2_GET_NAMING(pages, hi, lo) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
#define	UTF8_GET_NAMING2(pages, byte)
#define	UTF8_GET_NAMING3(pages, byte)
#define	UTF8_GET_NAMING(pages, p, n)
#define	UTF8_INVALID3(p)
#define	UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
#define	utf8_isName4 isNever
#define	utf8_isNmstrt4 isNever
#define	utf8_isInvalid2 isNever
#define	STANDARD_VTABLE(E)
#define	NORMAL_VTABLE(E)
#define	MINBPC(enc) 1
#define	SB_BYTE_TYPE(enc, p) (((struct normal_encoding )(enc))->type[(unsigned char)(p)])
#define	BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
#define	BYTE_TO_ASCII(enc, p) (*p)
#define	IS_NAME_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
#define	IS_NMSTRT_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
#define	IS_INVALID_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
#define	IS_NAME_CHAR_MINBPC(enc, p) (0)
#define	IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
#define	CHAR_MATCHES(enc, p, c) (*(p) == c)
#define	PREFIX(ident) normal_ ## ident
#define	BT_COLON BT_NMSTRT
#define	BT_COLON BT_NMSTRT
#define	BT_COLON BT_NMSTRT
#define	BT_COLON BT_NMSTRT
#define	DEFINE_UTF16_TO_UTF8(E)
#define	DEFINE_UTF16_TO_UTF16(E)
#define	SET2(ptr, ch) (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
#define	GET_LO(ptr) ((unsigned char)(ptr)[0])
#define	GET_HI(ptr) ((unsigned char)(ptr)[1])
#define	SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
#define	GET_LO(ptr) ((unsigned char)(ptr)[1])
#define	GET_HI(ptr) ((unsigned char)(ptr)[0])
#define	LITTLE2_BYTE_TYPE(enc, p)
#define	LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
#define	LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
#define	LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
#define	LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
#define	PREFIX(ident) little2_ ## ident
#define	MINBPC(enc) 2
#define	BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
#define	BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
#define	CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
#define	IS_NAME_CHAR(enc, p, n) 0
#define	IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
#define	IS_NMSTRT_CHAR(enc, p, n) (0)
#define	IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
#define	BT_COLON BT_NMSTRT
#define	BT_COLON BT_NMSTRT
#define	BIG2_BYTE_TYPE(enc, p)
#define	BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
#define	BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
#define	BIG2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
#define	BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
#define	PREFIX(ident) big2_ ## ident
#define	MINBPC(enc) 2
#define	BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
#define	BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
#define	CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
#define	IS_NAME_CHAR(enc, p, n) 0
#define	IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
#define	IS_NMSTRT_CHAR(enc, p, n) (0)
#define	IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
#define	BT_COLON BT_NMSTRT
#define	BT_COLON BT_NMSTRT
#define	INIT_ENC_INDEX(enc) ((enc)->initEnc.isUtf16)
#define	NS(x) x
#define	ns(x) x
Enumerations
enum	{ UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 }
enum	{ UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, NO_ENC }
Functions
static int	isNever (const ENCODING enc, const char p)
static int	utf8_isName2 (const ENCODING enc, const char p)
static int	utf8_isName3 (const ENCODING enc, const char p)
static int	utf8_isNmstrt2 (const ENCODING enc, const char p)
static int	utf8_isNmstrt3 (const ENCODING enc, const char p)
static int	utf8_isInvalid3 (const ENCODING enc, const char p)
static int	utf8_isInvalid4 (const ENCODING enc, const char p)
static int	checkCharRefNumber (int)
static void	utf8_toUtf8 (const ENCODING enc, const char fromP, const char fromLim, char *toP, const char toLim)
static void	utf8_toUtf16 (const ENCODING enc, const char fromP, const char fromLim, unsigned short *toP, const unsigned short toLim)
static void	latin1_toUtf8 (const ENCODING enc, const char fromP, const char fromLim, char *toP, const char toLim)
static void	latin1_toUtf16 (const ENCODING enc, const char fromP, const char fromLim, unsigned short *toP, const unsigned short toLim)
static void	ascii_toUtf8 (const ENCODING enc, const char fromP, const char fromLim, char *toP, const char toLim)
static int	unicode_byte_type (char hi, char lo)
static int	streqci (const char s1, const char s2)
static void	initUpdatePosition (const ENCODING enc, const char ptr, const char end, POSITION pos)
static int	toAscii (const ENCODING enc, const char ptr, const char *end)
static int	isSpace (int c)
static int	parsePseudoAttribute (const ENCODING enc, const char ptr, const char end, const char namePtr, const char valPtr, const char *nextTokPtr)
static int	doParseXmlDecl (const ENCODING (encodingFinder)(const ENCODING , const char , const char ), int isGeneralTextEntity, const ENCODING enc, const char ptr, const char end, const char badPtr, const char versionPtr, const char encodingName, const ENCODING encoding, int *standalone)
int	XmlUtf8Encode (int c, char *buf)
int	XmlUtf16Encode (int charNum, unsigned short *buf)
int	XmlSizeOfUnknownEncoding ()
static int	unknown_isName (const ENCODING enc, const char p)
static int	unknown_isNmstrt (const ENCODING enc, const char p)
static int	unknown_isInvalid (const ENCODING enc, const char p)
static void	unknown_toUtf8 (const ENCODING enc, const char fromP, const char fromLim, char *toP, const char toLim)
static void	unknown_toUtf16 (const ENCODING enc, const char fromP, const char fromLim, unsigned short *toP, const unsigned short toLim)
ENCODING *	XmlInitUnknownEncoding (void mem, int table, int(convert)(void userData, const char p), void userData)
static int	getEncodingIndex (const char *name)
static int	initScan (const ENCODING *encodingTable, const INIT_ENCODING enc, int state, const char ptr, const char end, const char **nextTokPtr)
Variables
static struct normal_encoding	utf8_encoding
static struct normal_encoding	internal_utf8_encoding
static struct normal_encoding	latin1_encoding
static struct normal_encoding	ascii_encoding
static struct normal_encoding	little2_encoding
static struct normal_encoding	internal_little2_encoding
static struct normal_encoding	big2_encoding
static struct normal_encoding	internal_big2_encoding