/home/bes3soft/bes3soft/Boss/7.0.2/dist/7.0.2/Calibration/xmlBase/xmlBase-00-00-03/expat/xmltok.c File Reference

#include "xmldef.h"
#include "xmltok.h"
#include "nametab.h"
#include "xmltok_impl.h"
#include "xmltok_impl.c"
#include "asciitab.h"
#include "utf8tab.h"
#include "iasciitab.h"
#include "latin1tab.h"
#include "xmltok_ns.c"

Go to the source code of this file.

Classes

struct  normal_encoding
struct  unknown_encoding

Defines

#define VTABLE1
#define VTABLE   VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo)   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
#define UTF8_GET_NAMING2(pages, byte)
#define UTF8_GET_NAMING3(pages, byte)
#define UTF8_GET_NAMING(pages, p, n)
#define UTF8_INVALID3(p)
#define UTF8_INVALID4(p)   ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
#define utf8_isName4   isNever
#define utf8_isNmstrt4   isNever
#define utf8_isInvalid2   isNever
#define STANDARD_VTABLE(E)
#define NORMAL_VTABLE(E)
#define MINBPC(enc)   1
#define SB_BYTE_TYPE(enc, p)   (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#define BYTE_TYPE(enc, p)   SB_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p)   (*p)
#define IS_NAME_CHAR(enc, p, n)   (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n)   (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
#define IS_INVALID_CHAR(enc, p, n)   (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
#define IS_NAME_CHAR_MINBPC(enc, p)   (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p)   (0)
#define CHAR_MATCHES(enc, p, c)   (*(p) == c)
#define PREFIX(ident)   normal_ ## ident
#define BT_COLON   BT_NMSTRT
#define BT_COLON   BT_NMSTRT
#define BT_COLON   BT_NMSTRT
#define BT_COLON   BT_NMSTRT
#define DEFINE_UTF16_TO_UTF8(E)
#define DEFINE_UTF16_TO_UTF16(E)
#define SET2(ptr, ch)   (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
#define GET_LO(ptr)   ((unsigned char)(ptr)[0])
#define GET_HI(ptr)   ((unsigned char)(ptr)[1])
#define SET2(ptr, ch)   (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
#define GET_LO(ptr)   ((unsigned char)(ptr)[1])
#define GET_HI(ptr)   ((unsigned char)(ptr)[0])
#define LITTLE2_BYTE_TYPE(enc, p)
#define LITTLE2_BYTE_TO_ASCII(enc, p)   ((p)[1] == 0 ? (p)[0] : -1)
#define LITTLE2_CHAR_MATCHES(enc, p, c)   ((p)[1] == 0 && (p)[0] == c)
#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
#define PREFIX(ident)   little2_ ## ident
#define MINBPC(enc)   2
#define BYTE_TYPE(enc, p)   LITTLE2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p)   LITTLE2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c)   LITTLE2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n)   0
#define IS_NAME_CHAR_MINBPC(enc, p)   LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
#define IS_NMSTRT_CHAR(enc, p, n)   (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p)   LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
#define BT_COLON   BT_NMSTRT
#define BT_COLON   BT_NMSTRT
#define BIG2_BYTE_TYPE(enc, p)
#define BIG2_BYTE_TO_ASCII(enc, p)   ((p)[0] == 0 ? (p)[1] : -1)
#define BIG2_CHAR_MATCHES(enc, p, c)   ((p)[0] == 0 && (p)[1] == c)
#define BIG2_IS_NAME_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
#define PREFIX(ident)   big2_ ## ident
#define MINBPC(enc)   2
#define BYTE_TYPE(enc, p)   BIG2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p)   BIG2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c)   BIG2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n)   0
#define IS_NAME_CHAR_MINBPC(enc, p)   BIG2_IS_NAME_CHAR_MINBPC(enc, p)
#define IS_NMSTRT_CHAR(enc, p, n)   (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p)   BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
#define BT_COLON   BT_NMSTRT
#define BT_COLON   BT_NMSTRT
#define INIT_ENC_INDEX(enc)   ((enc)->initEnc.isUtf16)
#define NS(x)   x
#define ns(x)   x

Enumerations

enum  { UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 }
enum  {
  UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC,
  UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, NO_ENC
}

Functions

static int isNever (const ENCODING *enc, const char *p)
static int utf8_isName2 (const ENCODING *enc, const char *p)
static int utf8_isName3 (const ENCODING *enc, const char *p)
static int utf8_isNmstrt2 (const ENCODING *enc, const char *p)
static int utf8_isNmstrt3 (const ENCODING *enc, const char *p)
static int utf8_isInvalid3 (const ENCODING *enc, const char *p)
static int utf8_isInvalid4 (const ENCODING *enc, const char *p)
static int checkCharRefNumber (int)
static void utf8_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static void utf8_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
static void latin1_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static void latin1_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
static void ascii_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static int unicode_byte_type (char hi, char lo)
static int streqci (const char *s1, const char *s2)
static void initUpdatePosition (const ENCODING *enc, const char *ptr, const char *end, POSITION *pos)
static int toAscii (const ENCODING *enc, const char *ptr, const char *end)
static int isSpace (int c)
static int parsePseudoAttribute (const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **valPtr, const char **nextTokPtr)
static int doParseXmlDecl (const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingName, const ENCODING **encoding, int *standalone)
int XmlUtf8Encode (int c, char *buf)
int XmlUtf16Encode (int charNum, unsigned short *buf)
int XmlSizeOfUnknownEncoding ()
static int unknown_isName (const ENCODING *enc, const char *p)
static int unknown_isNmstrt (const ENCODING *enc, const char *p)
static int unknown_isInvalid (const ENCODING *enc, const char *p)
static void unknown_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static void unknown_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
ENCODING * XmlInitUnknownEncoding (void *mem, int *table, int(*convert)(void *userData, const char *p), void *userData)
static int getEncodingIndex (const char *name)
static int initScan (const ENCODING **encodingTable, const INIT_ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr)

Variables

static struct normal_encoding utf8_encoding
static struct normal_encoding internal_utf8_encoding
static struct normal_encoding latin1_encoding
static struct normal_encoding ascii_encoding
static struct normal_encoding little2_encoding
static struct normal_encoding internal_little2_encoding
static struct normal_encoding big2_encoding
static struct normal_encoding internal_big2_encoding


Define Documentation

#define BIG2_BYTE_TO_ASCII ( enc,
 )     ((p)[0] == 0 ? (p)[1] : -1)

Definition at line 740 of file xmltok.c.

#define BIG2_BYTE_TYPE ( enc,
 ) 

Value:

((p)[0] == 0 \
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
  : unicode_byte_type((p)[0], (p)[1]))

Definition at line 736 of file xmltok.c.

#define BIG2_CHAR_MATCHES ( enc,
p,
 )     ((p)[0] == 0 && (p)[1] == c)

Definition at line 741 of file xmltok.c.

#define BIG2_IS_NAME_CHAR_MINBPC ( enc,
 )     UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])

Definition at line 742 of file xmltok.c.

#define BIG2_IS_NMSTRT_CHAR_MINBPC ( enc,
 )     UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])

Definition at line 744 of file xmltok.c.

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BT_COLON   BT_NMSTRT

#define BYTE_TO_ASCII ( enc,
 )     BIG2_BYTE_TO_ASCII(enc, p)

Definition at line 789 of file xmltok.c.

#define BYTE_TO_ASCII ( enc,
 )     LITTLE2_BYTE_TO_ASCII(enc, p)

Definition at line 789 of file xmltok.c.

#define BYTE_TO_ASCII ( enc,
 )     (*p)

Definition at line 789 of file xmltok.c.

#define BYTE_TYPE ( enc,
 )     BIG2_BYTE_TYPE(enc, p)

Definition at line 788 of file xmltok.c.

#define BYTE_TYPE ( enc,
 )     LITTLE2_BYTE_TYPE(enc, p)

Definition at line 788 of file xmltok.c.

#define BYTE_TYPE ( enc,
 )     SB_BYTE_TYPE(enc, p)

Definition at line 788 of file xmltok.c.

#define CHAR_MATCHES ( enc,
p,
 )     BIG2_CHAR_MATCHES(enc, p, c)

Definition at line 790 of file xmltok.c.

#define CHAR_MATCHES ( enc,
p,
 )     LITTLE2_CHAR_MATCHES(enc, p, c)

Definition at line 790 of file xmltok.c.

#define CHAR_MATCHES ( enc,
p,
 )     (*(p) == c)

Definition at line 790 of file xmltok.c.

#define DEFINE_UTF16_TO_UTF16 (  ) 

Value:

static \
void E ## toUtf16(const ENCODING *enc, \
                  const char **fromP, const char *fromLim, \
                  unsigned short **toP, const unsigned short *toLim) \
{ \
  /* Avoid copying first half only of surrogate */ \
  if (fromLim - *fromP > ((toLim - *toP) << 1) \
      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
    fromLim -= 2; \
  for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
}

Definition at line 559 of file xmltok.c.

#define DEFINE_UTF16_TO_UTF8 (  ) 

Definition at line 496 of file xmltok.c.

#define GET_HI ( ptr   )     ((unsigned char)(ptr)[0])

Definition at line 588 of file xmltok.c.

#define GET_HI ( ptr   )     ((unsigned char)(ptr)[1])

Definition at line 588 of file xmltok.c.

#define GET_LO ( ptr   )     ((unsigned char)(ptr)[1])

Definition at line 587 of file xmltok.c.

#define GET_LO ( ptr   )     ((unsigned char)(ptr)[0])

Definition at line 587 of file xmltok.c.

#define INIT_ENC_INDEX ( enc   )     ((enc)->initEnc.isUtf16)

Definition at line 1378 of file xmltok.c.

Referenced by initScan(), and XmlInitEncoding().

#define IS_INVALID_CHAR ( enc,
p,
 )     (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))

Definition at line 237 of file xmltok.c.

#define IS_NAME_CHAR ( enc,
p,
 )     0

Definition at line 791 of file xmltok.c.

#define IS_NAME_CHAR ( enc,
p,
 )     0

Definition at line 791 of file xmltok.c.

#define IS_NAME_CHAR ( enc,
p,
 )     (((const struct normal_encoding *)(enc))->isName ## n(enc, p))

Definition at line 791 of file xmltok.c.

#define IS_NAME_CHAR_MINBPC ( enc,
 )     BIG2_IS_NAME_CHAR_MINBPC(enc, p)

Definition at line 792 of file xmltok.c.

#define IS_NAME_CHAR_MINBPC ( enc,
 )     LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)

Definition at line 792 of file xmltok.c.

#define IS_NAME_CHAR_MINBPC ( enc,
 )     (0)

Definition at line 792 of file xmltok.c.

#define IS_NMSTRT_CHAR ( enc,
p,
 )     (0)

Definition at line 793 of file xmltok.c.

#define IS_NMSTRT_CHAR ( enc,
p,
 )     (0)

Definition at line 793 of file xmltok.c.

#define IS_NMSTRT_CHAR ( enc,
p,
 )     (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))

Definition at line 793 of file xmltok.c.

#define IS_NMSTRT_CHAR_MINBPC ( enc,
 )     BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)

Definition at line 794 of file xmltok.c.

#define IS_NMSTRT_CHAR_MINBPC ( enc,
 )     LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)

Definition at line 794 of file xmltok.c.

#define IS_NMSTRT_CHAR_MINBPC ( enc,
 )     (0)

Definition at line 794 of file xmltok.c.

#define LITTLE2_BYTE_TO_ASCII ( enc,
 )     ((p)[1] == 0 ? (p)[0] : -1)

Definition at line 601 of file xmltok.c.

#define LITTLE2_BYTE_TYPE ( enc,
 ) 

Value:

((p)[1] == 0 \
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
  : unicode_byte_type((p)[1], (p)[0]))

Definition at line 597 of file xmltok.c.

#define LITTLE2_CHAR_MATCHES ( enc,
p,
 )     ((p)[1] == 0 && (p)[0] == c)

Definition at line 602 of file xmltok.c.

#define LITTLE2_IS_NAME_CHAR_MINBPC ( enc,
 )     UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])

Definition at line 603 of file xmltok.c.

#define LITTLE2_IS_NMSTRT_CHAR_MINBPC ( enc,
 )     UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])

Definition at line 605 of file xmltok.c.

#define MINBPC ( enc   )     2

Definition at line 786 of file xmltok.c.

#define MINBPC ( enc   )     2

Definition at line 786 of file xmltok.c.

#define MINBPC ( enc   )     1

Definition at line 786 of file xmltok.c.

#define NORMAL_VTABLE (  ) 

Value:

E ## isName2, \
 E ## isName3, \
 E ## isName4, \
 E ## isNmstrt2, \
 E ## isNmstrt3, \
 E ## isNmstrt4, \
 E ## isInvalid2, \
 E ## isInvalid3, \
 E ## isInvalid4

Definition at line 179 of file xmltok.c.

#define ns ( x   )     x

Definition at line 1504 of file xmltok.c.

Referenced by BesShortLivedConstructor::ConstructMesons(), RecMdcTrackCnv::DataObjectToTObject(), MdcTrackCnv::DataObjectToTObject(), BesTofDigitizerEcV4::Digitize(), BesTofDigitizerEcV3::Digitize(), BesTofDigitizerEcV2::Digitize(), BesTofDigitizerBrV2::Digitize(), G4HepMCInterface::HepMC2G4(), RecEmcShower::NearestSeed(), EvtPolInt::polynomial(), EvtPolInt::ratint(), BesRootIO::SaveTofHitRoot(), RecMdcTrackCnv::TObjectToDataObject(), MdcTrackCnv::TObjectToDataObject(), XmlGetUtf16InternalEncoding(), and XmlGetUtf8InternalEncoding().

#define NS ( x   )     x

Definition at line 1503 of file xmltok.c.

Referenced by TConformalFinder::fastFinding3D(), findEncoding(), initScanContent(), initScanProlog(), XmlInitEncoding(), and XmlParseXmlDecl().

#define PREFIX ( ident   )     big2_ ## ident

Definition at line 785 of file xmltok.c.

#define PREFIX ( ident   )     little2_ ## ident

Definition at line 785 of file xmltok.c.

#define PREFIX ( ident   )     normal_ ## ident

Definition at line 785 of file xmltok.c.

#define SB_BYTE_TYPE ( enc,
 )     (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])

Definition at line 206 of file xmltok.c.

#define SET2 ( ptr,
ch   )     (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))

Definition at line 585 of file xmltok.c.

#define SET2 ( ptr,
ch   )     (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))

Definition at line 585 of file xmltok.c.

#define STANDARD_VTABLE (  ) 

Definition at line 175 of file xmltok.c.

#define UCS2_GET_NAMING ( pages,
hi,
lo   )     (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))

Definition at line 54 of file xmltok.c.

Referenced by unknown_isName(), unknown_isNmstrt(), and XmlInitUnknownEncoding().

#define UTF8_GET_NAMING ( pages,
p,
 ) 

Value:

((n) == 2 \
  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
  : ((n) == 3 \
     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
     : 0))

Definition at line 79 of file xmltok.c.

#define UTF8_GET_NAMING2 ( pages,
byte   ) 

Value:

(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
                      + ((((byte)[0]) & 3) << 1) \
                      + ((((byte)[1]) >> 5) & 1)] \
         & (1 << (((byte)[1]) & 0x1F)))

Definition at line 61 of file xmltok.c.

Referenced by utf8_isName2(), and utf8_isNmstrt2().

#define UTF8_GET_NAMING3 ( pages,
byte   ) 

Value:

(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
                             + ((((byte)[1]) >> 2) & 0xF)] \
                       << 3) \
                      + ((((byte)[1]) & 3) << 1) \
                      + ((((byte)[2]) >> 5) & 1)] \
         & (1 << (((byte)[2]) & 0x1F)))

Definition at line 71 of file xmltok.c.

Referenced by utf8_isName3(), and utf8_isNmstrt3().

#define UTF8_INVALID3 (  ) 

Value:

((*p) == 0xED \
  ? (((p)[1] & 0x20) != 0) \
  : ((*p) == 0xEF \
     ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
     : 0))

Definition at line 86 of file xmltok.c.

Referenced by utf8_isInvalid3().

#define UTF8_INVALID4 (  )     ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)

Definition at line 93 of file xmltok.c.

Referenced by utf8_isInvalid4().

#define utf8_isInvalid2   isNever

Definition at line 129 of file xmltok.c.

#define utf8_isName4   isNever

Definition at line 113 of file xmltok.c.

#define utf8_isNmstrt4   isNever

Definition at line 127 of file xmltok.c.

#define VTABLE   VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)

Definition at line 52 of file xmltok.c.

#define VTABLE1

Value:

{ PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \
  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
  PREFIX(sameName), \
  PREFIX(nameMatchesAscii), \
  PREFIX(nameLength), \
  PREFIX(skipS), \
  PREFIX(getAtts), \
  PREFIX(charRefNumber), \
  PREFIX(predefinedEntityName), \
  PREFIX(updatePosition), \
  PREFIX(isPublicId)

Definition at line 39 of file xmltok.c.


Enumeration Type Documentation

anonymous enum

Enumerator:
UTF8_cval1 
UTF8_cval2 
UTF8_cval3 
UTF8_cval4 

Definition at line 276 of file xmltok.c.

00276      {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
00277   UTF8_cval1 = 0x00,
00278   UTF8_cval2 = 0xc0,
00279   UTF8_cval3 = 0xe0,
00280   UTF8_cval4 = 0xf0
00281 };

anonymous enum

Enumerator:
UNKNOWN_ENC 
ISO_8859_1_ENC 
US_ASCII_ENC 
UTF_8_ENC 
UTF_16_ENC 
UTF_16BE_ENC 
UTF_16LE_ENC 
NO_ENC 

Definition at line 1343 of file xmltok.c.

01343      {
01344   UNKNOWN_ENC = -1,
01345   ISO_8859_1_ENC = 0,
01346   US_ASCII_ENC,
01347   UTF_8_ENC,
01348   UTF_16_ENC,
01349   UTF_16BE_ENC,
01350   UTF_16LE_ENC,
01351   /* must match encodingNames up to here */
01352   NO_ENC
01353 };


Function Documentation

static void ascii_toUtf8 ( const ENCODING *  enc,
const char **  fromP,
const char *  fromLim,
char **  toP,
const char *  toLim 
) [static]

Definition at line 446 of file xmltok.c.

00449 {
00450   while (*fromP != fromLim && *toP != toLim)
00451     *(*toP)++ = *(*fromP)++;
00452 }

static int checkCharRefNumber ( int   )  [static]

Definition at line 1096 of file xmltok.c.

References latin1_encoding, and normal_encoding::type.

Referenced by unknown_isInvalid(), and XmlInitUnknownEncoding().

01097 {
01098   switch (result >> 8) {
01099   case 0xD8: case 0xD9: case 0xDA: case 0xDB:
01100   case 0xDC: case 0xDD: case 0xDE: case 0xDF:
01101     return -1;
01102   case 0:
01103     if (latin1_encoding.type[result] == BT_NONXML)
01104       return -1;
01105     break;
01106   case 0xFF:
01107     if (result == 0xFFFE || result == 0xFFFF)
01108       return -1;
01109     break;
01110   }
01111   return result;
01112 }

static int doParseXmlDecl ( const ENCODING *(*)(const ENCODING *, const char *, const char *)  encodingFinder,
int  isGeneralTextEntity,
const ENCODING *  enc,
const char *  ptr,
const char *  end,
const char **  badPtr,
const char **  versionPtr,
const char **  encodingName,
const ENCODING **  encoding,
int *  standalone 
) [static]

Definition at line 1010 of file xmltok.c.

References isSpace(), parsePseudoAttribute(), and toAscii().

Referenced by XmlParseXmlDecl().

01022 {
01023   const char *val = 0;
01024   const char *name = 0;
01025   ptr += 5 * enc->minBytesPerChar;
01026   end -= 2 * enc->minBytesPerChar;
01027   if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) {
01028     *badPtr = ptr;
01029     return 0;
01030   }
01031   if (!XmlNameMatchesAscii(enc, name, "version")) {
01032     if (!isGeneralTextEntity) {
01033       *badPtr = name;
01034       return 0;
01035     }
01036   }
01037   else {
01038     if (versionPtr)
01039       *versionPtr = val;
01040     if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
01041       *badPtr = ptr;
01042       return 0;
01043     }
01044     if (!name) {
01045       if (isGeneralTextEntity) {
01046         /* a TextDecl must have an EncodingDecl */
01047         *badPtr = ptr;
01048         return 0;
01049       }
01050       return 1;
01051     }
01052   }
01053   if (XmlNameMatchesAscii(enc, name, "encoding")) {
01054     int c = toAscii(enc, val, end);
01055     if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
01056       *badPtr = val;
01057       return 0;
01058     }
01059     if (encodingName)
01060       *encodingName = val;
01061     if (encoding)
01062       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
01063     if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
01064       *badPtr = ptr;
01065       return 0;
01066     }
01067     if (!name)
01068       return 1;
01069   }
01070   if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) {
01071     *badPtr = name;
01072     return 0;
01073   }
01074   if (XmlNameMatchesAscii(enc, val, "yes")) {
01075     if (standalone)
01076       *standalone = 1;
01077   }
01078   else if (XmlNameMatchesAscii(enc, val, "no")) {
01079     if (standalone)
01080       *standalone = 0;
01081   }
01082   else {
01083     *badPtr = val;
01084     return 0;
01085   }
01086   while (isSpace(toAscii(enc, ptr, end)))
01087     ptr += enc->minBytesPerChar;
01088   if (ptr != end) {
01089     *badPtr = ptr;
01090     return 0;
01091   }
01092   return 1;
01093 }

static int getEncodingIndex ( const char *  name  )  [static]

Definition at line 1356 of file xmltok.c.

References genRecEmupikp::i, streqci(), and UNKNOWN_ENC.

Referenced by findEncoding(), and XmlInitEncoding().

01357 {
01358   static const char *encodingNames[] = {
01359     "ISO-8859-1",
01360     "US-ASCII",
01361     "UTF-8",
01362     "UTF-16",
01363     "UTF-16BE"
01364     "UTF-16LE",
01365   };
01366   int i;
01367   if (name == 0)
01368     return NO_ENC;
01369   for (i = 0; i < sizeof(encodingNames)/sizeof(encodingNames[0]); i++)
01370     if (streqci(name, encodingNames[i]))
01371       return i;
01372   return UNKNOWN_ENC;
01373 }

static int initScan ( const ENCODING **  encodingTable,
const INIT_ENCODING *  enc,
int  state,
const char *  ptr,
const char *  end,
const char **  nextTokPtr 
) [static]

Definition at line 1389 of file xmltok.c.

References INIT_ENC_INDEX, ISO_8859_1_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, and UTF_8_ENC.

Referenced by initScanContent(), and initScanProlog().

01395 {
01396   const ENCODING **encPtr;
01397 
01398   if (ptr == end)
01399     return XML_TOK_NONE;
01400   encPtr = enc->encPtr;
01401   if (ptr + 1 == end) {
01402     /* only a single byte available for auto-detection */
01403     /* a well-formed document entity must have more than one byte */
01404     if (state != XML_CONTENT_STATE)
01405       return XML_TOK_PARTIAL;
01406     /* so we're parsing an external text entity... */
01407     /* if UTF-16 was externally specified, then we need at least 2 bytes */
01408     switch (INIT_ENC_INDEX(enc)) {
01409     case UTF_16_ENC:
01410     case UTF_16LE_ENC:
01411     case UTF_16BE_ENC:
01412       return XML_TOK_PARTIAL;
01413     }
01414     switch ((unsigned char)*ptr) {
01415     case 0xFE:
01416     case 0xFF:
01417     case 0xEF: /* possibly first byte of UTF-8 BOM */
01418       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
01419           && state == XML_CONTENT_STATE)
01420         break;
01421       /* fall through */
01422     case 0x00:
01423     case 0x3C:
01424       return XML_TOK_PARTIAL;
01425     }
01426   }
01427   else {
01428     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
01429     case 0xFEFF:
01430       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
01431           && state == XML_CONTENT_STATE)
01432         break;
01433       *nextTokPtr = ptr + 2;
01434       *encPtr = encodingTable[UTF_16BE_ENC];
01435       return XML_TOK_BOM;
01436     /* 00 3C is handled in the default case */
01437     case 0x3C00:
01438       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
01439            || INIT_ENC_INDEX(enc) == UTF_16_ENC)
01440           && state == XML_CONTENT_STATE)
01441         break;
01442       *encPtr = encodingTable[UTF_16LE_ENC];
01443       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01444     case 0xFFFE:
01445       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
01446           && state == XML_CONTENT_STATE)
01447         break;
01448       *nextTokPtr = ptr + 2;
01449       *encPtr = encodingTable[UTF_16LE_ENC];
01450       return XML_TOK_BOM;
01451     case 0xEFBB:
01452       /* Maybe a UTF-8 BOM (EF BB BF) */
01453       /* If there's an explicitly specified (external) encoding
01454          of ISO-8859-1 or some flavour of UTF-16
01455          and this is an external text entity,
01456          don't look for the BOM,
01457          because it might be a legal data. */
01458       if (state == XML_CONTENT_STATE) {
01459         int e = INIT_ENC_INDEX(enc);
01460         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
01461           break;
01462       }
01463       if (ptr + 2 == end)
01464         return XML_TOK_PARTIAL;
01465       if ((unsigned char)ptr[2] == 0xBF) {
01466         *encPtr = encodingTable[UTF_8_ENC];
01467         return XML_TOK_BOM;
01468       }
01469       break;
01470     default:
01471       if (ptr[0] == '\0') {
01472         /* 0 isn't a legal data character. Furthermore a document entity can only
01473            start with ASCII characters.  So the only way this can fail to be big-endian
01474            UTF-16 if it it's an external parsed general entity that's labelled as
01475            UTF-16LE. */
01476         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
01477           break;
01478         *encPtr = encodingTable[UTF_16BE_ENC];
01479         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01480       }
01481       else if (ptr[1] == '\0') {
01482         /* We could recover here in the case:
01483             - parsing an external entity
01484             - second byte is 0
01485             - no externally specified encoding
01486             - no encoding declaration
01487            by assuming UTF-16LE.  But we don't, because this would mean when
01488            presented just with a single byte, we couldn't reliably determine
01489            whether we needed further bytes. */
01490         if (state == XML_CONTENT_STATE)
01491           break;
01492         *encPtr = encodingTable[UTF_16LE_ENC];
01493         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01494       }
01495       break;
01496     }
01497   }
01498   *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
01499   return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
01500 }

static void initUpdatePosition ( const ENCODING *  enc,
const char *  ptr,
const char *  end,
POSITION *  pos 
) [static]

Definition at line 895 of file xmltok.c.

References normal_encoding::enc, and utf8_encoding.

Referenced by XmlInitEncoding().

00897 {
00898   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
00899 }

static int isNever ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 96 of file xmltok.c.

00097 {
00098   return 0;
00099 }

static int isSpace ( int  c  )  [static]

Definition at line 914 of file xmltok.c.

Referenced by doParseXmlDecl(), and parsePseudoAttribute().

00915 {
00916   switch (c) {
00917   case 0x20:
00918   case 0xD:
00919   case 0xA:
00920   case 0x9:     
00921     return 1;
00922   }
00923   return 0;
00924 }

static void latin1_toUtf16 ( const ENCODING *  enc,
const char **  fromP,
const char *  fromLim,
unsigned short **  toP,
const unsigned short *  toLim 
) [static]

Definition at line 413 of file xmltok.c.

00416 {
00417   while (*fromP != fromLim && *toP != toLim)
00418     *(*toP)++ = (unsigned char)*(*fromP)++;
00419 }

static void latin1_toUtf8 ( const ENCODING *  enc,
const char **  fromP,
const char *  fromLim,
char **  toP,
const char *  toLim 
) [static]

Definition at line 388 of file xmltok.c.

References UTF8_cval2.

00391 {
00392   for (;;) {
00393     unsigned char c;
00394     if (*fromP == fromLim)
00395       break;
00396     c = (unsigned char)**fromP;
00397     if (c & 0x80) {
00398       if (toLim - *toP < 2)
00399         break;
00400       *(*toP)++ = ((c >> 6) | UTF8_cval2);
00401       *(*toP)++ = ((c & 0x3f) | 0x80);
00402       (*fromP)++;
00403     }
00404     else {
00405       if (*toP == toLim)
00406         break;
00407       *(*toP)++ = *(*fromP)++;
00408     }
00409   }
00410 }

static int parsePseudoAttribute ( const ENCODING *  enc,
const char *  ptr,
const char *  end,
const char **  namePtr,
const char **  valPtr,
const char **  nextTokPtr 
) [static]

Definition at line 929 of file xmltok.c.

References isSpace(), and toAscii().

Referenced by doParseXmlDecl().

00935 {
00936   int c;
00937   char open;
00938   if (ptr == end) {
00939     *namePtr = 0;
00940     return 1;
00941   }
00942   if (!isSpace(toAscii(enc, ptr, end))) {
00943     *nextTokPtr = ptr;
00944     return 0;
00945   }
00946   do {
00947     ptr += enc->minBytesPerChar;
00948   } while (isSpace(toAscii(enc, ptr, end)));
00949   if (ptr == end) {
00950     *namePtr = 0;
00951     return 1;
00952   }
00953   *namePtr = ptr;
00954   for (;;) {
00955     c = toAscii(enc, ptr, end);
00956     if (c == -1) {
00957       *nextTokPtr = ptr;
00958       return 0;
00959     }
00960     if (c == '=')
00961       break;
00962     if (isSpace(c)) {
00963       do {
00964         ptr += enc->minBytesPerChar;
00965       } while (isSpace(c = toAscii(enc, ptr, end)));
00966       if (c != '=') {
00967         *nextTokPtr = ptr;
00968         return 0;
00969       }
00970       break;
00971     }
00972     ptr += enc->minBytesPerChar;
00973   }
00974   if (ptr == *namePtr) {
00975     *nextTokPtr = ptr;
00976     return 0;
00977   }
00978   ptr += enc->minBytesPerChar;
00979   c = toAscii(enc, ptr, end);
00980   while (isSpace(c)) {
00981     ptr += enc->minBytesPerChar;
00982     c = toAscii(enc, ptr, end);
00983   }
00984   if (c != '"' && c != '\'') {
00985     *nextTokPtr = ptr;
00986     return 0;
00987   }
00988   open = c;
00989   ptr += enc->minBytesPerChar;
00990   *valPtr = ptr;
00991   for (;; ptr += enc->minBytesPerChar) {
00992     c = toAscii(enc, ptr, end);
00993     if (c == open)
00994       break;
00995     if (!('a' <= c && c <= 'z')
00996         && !('A' <= c && c <= 'Z')
00997         && !('0' <= c && c <= '9')
00998         && c != '.'
00999         && c != '-'
01000         && c != '_') {
01001       *nextTokPtr = ptr;
01002       return 0;
01003     }
01004   }
01005   *nextTokPtr = ptr + enc->minBytesPerChar;
01006   return 1;
01007 }

static int streqci ( const char *  s1,
const char *  s2 
) [static]

Definition at line 877 of file xmltok.c.

Referenced by findEncoding(), and getEncodingIndex().

00878 {
00879   for (;;) {
00880     char c1 = *s1++;
00881     char c2 = *s2++;
00882     if ('a' <= c1 && c1 <= 'z')
00883       c1 += 'A' - 'a';
00884     if ('a' <= c2 && c2 <= 'z')
00885       c2 += 'A' - 'a';
00886     if (c1 != c2)
00887       return 0;
00888     if (!c1)
00889       break;
00890   }
00891   return 1;
00892 }

static int toAscii ( const ENCODING *  enc,
const char *  ptr,
const char *  end 
) [static]

Definition at line 902 of file xmltok.c.

Referenced by doParseXmlDecl(), and parsePseudoAttribute().

00903 {
00904   char buf[1];
00905   char *p = buf;
00906   XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
00907   if (p == buf)
00908     return -1;
00909   else
00910     return buf[0];
00911 }

static int unicode_byte_type ( char  hi,
char  lo 
) [static]

Definition at line 478 of file xmltok.c.

00479 {
00480   switch ((unsigned char)hi) {
00481   case 0xD8: case 0xD9: case 0xDA: case 0xDB:
00482     return BT_LEAD4;
00483   case 0xDC: case 0xDD: case 0xDE: case 0xDF:
00484     return BT_TRAIL;
00485   case 0xFF:
00486     switch ((unsigned char)lo) {
00487     case 0xFF:
00488     case 0xFE:
00489       return BT_NONXML;
00490     }
00491     break;
00492   }
00493   return BT_NONASCII;
00494 }

static int unknown_isInvalid ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 1201 of file xmltok.c.

References checkCharRefNumber(), and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01202 {
01203   int c = ((const struct unknown_encoding *)enc)
01204            ->convert(((const struct unknown_encoding *)enc)->userData, p);
01205   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
01206 }

static int unknown_isName ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 1181 of file xmltok.c.

References UCS2_GET_NAMING, and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01182 {
01183   int c = ((const struct unknown_encoding *)enc)
01184           ->convert(((const struct unknown_encoding *)enc)->userData, p);
01185   if (c & ~0xFFFF)
01186     return 0;
01187   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
01188 }

static int unknown_isNmstrt ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 1191 of file xmltok.c.

References UCS2_GET_NAMING, and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01192 {
01193   int c = ((const struct unknown_encoding *)enc)
01194           ->convert(((const struct unknown_encoding *)enc)->userData, p);
01195   if (c & ~0xFFFF)
01196     return 0;
01197   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
01198 }

static void unknown_toUtf16 ( const ENCODING *  enc,
const char **  fromP,
const char *  fromLim,
unsigned short **  toP,
const unsigned short *  toLim 
) [static]

Definition at line 1243 of file xmltok.c.

References eformat::old::convert(), normal_encoding::enc, and unknown_encoding::userData.

Referenced by XmlInitUnknownEncoding().

01246 {
01247   while (*fromP != fromLim && *toP != toLim) {
01248     unsigned short c
01249       = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
01250     if (c == 0) {
01251       c = (unsigned short)((const struct unknown_encoding *)enc)
01252            ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
01253       *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
01254                  - (BT_LEAD2 - 2);
01255     }
01256     else
01257       (*fromP)++;
01258     *(*toP)++ = c;
01259   }
01260 }

static void unknown_toUtf8 ( const ENCODING *  enc,
const char **  fromP,
const char *  fromLim,
char **  toP,
const char *  toLim 
) [static]

Definition at line 1209 of file xmltok.c.

References unknown_encoding::userData, unknown_encoding::utf8, and XmlUtf8Encode().

Referenced by XmlInitUnknownEncoding().

01212 {
01213   char buf[XML_UTF8_ENCODE_MAX];
01214   for (;;) {
01215     const char *utf8;
01216     int n;
01217     if (*fromP == fromLim)
01218       break;
01219     utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
01220     n = *utf8++;
01221     if (n == 0) {
01222       int c = ((const struct unknown_encoding *)enc)
01223               ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
01224       n = XmlUtf8Encode(c, buf);
01225       if (n > toLim - *toP)
01226         break;
01227       utf8 = buf;
01228       *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
01229                  - (BT_LEAD2 - 2);
01230     }
01231     else {
01232       if (n > toLim - *toP)
01233         break;
01234       (*fromP)++;
01235     }
01236     do {
01237       *(*toP)++ = *utf8++;
01238     } while (--n != 0);
01239   }
01240 }

static int utf8_isInvalid3 ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 132 of file xmltok.c.

References UTF8_INVALID3.

00133 {
00134   return UTF8_INVALID3((const unsigned char *)p);
00135 }

static int utf8_isInvalid4 ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 138 of file xmltok.c.

References UTF8_INVALID4.

00139 {
00140   return UTF8_INVALID4((const unsigned char *)p);
00141 }

static int utf8_isName2 ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 102 of file xmltok.c.

References UTF8_GET_NAMING2.

00103 {
00104   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
00105 }

static int utf8_isName3 ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 108 of file xmltok.c.

References UTF8_GET_NAMING3.

00109 {
00110   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
00111 }

static int utf8_isNmstrt2 ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 116 of file xmltok.c.

References UTF8_GET_NAMING2.

00117 {
00118   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
00119 }

static int utf8_isNmstrt3 ( const ENCODING *  enc,
const char *  p 
) [static]

Definition at line 122 of file xmltok.c.

References UTF8_GET_NAMING3.

00123 {
00124   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
00125 }

static void utf8_toUtf16 ( const ENCODING *  enc,
const char **  fromP,
const char *  fromLim,
unsigned short **  toP,
const unsigned short *  toLim 
) [static]

Definition at line 303 of file xmltok.c.

References type.

00306 {
00307   unsigned short *to = *toP;
00308   const char *from = *fromP;
00309   while (from != fromLim && to != toLim) {
00310     switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
00311     case BT_LEAD2:
00312       *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
00313       from += 2;
00314       break;
00315     case BT_LEAD3:
00316       *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
00317       from += 3;
00318       break;
00319     case BT_LEAD4:
00320       {
00321         unsigned long n;
00322         if (to + 1 == toLim)
00323           break;
00324         n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
00325         n -= 0x10000;
00326         to[0] = (unsigned short)((n >> 10) | 0xD800);
00327         to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
00328         to += 2;
00329         from += 4;
00330       }
00331       break;
00332     default:
00333       *to++ = *from++;
00334       break;
00335     }
00336   }
00337   *fromP = from;
00338   *toP = to;
00339 }

static void utf8_toUtf8 ( const ENCODING *  enc,
const char **  fromP,
const char *  fromLim,
char **  toP,
const char *  toLim 
) [static]

Definition at line 284 of file xmltok.c.

00287 {
00288   char *to;
00289   const char *from;
00290   if (fromLim - *fromP > toLim - *toP) {
00291     /* Avoid copying partial characters. */
00292     for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
00293       if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
00294         break;
00295   }
00296   for (to = *toP, from = *fromP; from != fromLim; from++, to++)
00297     *to = *from;
00298   *fromP = from;
00299   *toP = to;
00300 }

ENCODING* XmlInitUnknownEncoding ( void *  mem,
int *  table,
int(*)(void *userData, const char *p)  convert,
void *  userData 
)

Definition at line 1263 of file xmltok.c.

References checkCharRefNumber(), unknown_encoding::convert, normal_encoding::enc, for, genRecEmupikp::i, if(), normal_encoding::isInvalid2, normal_encoding::isInvalid3, normal_encoding::isInvalid4, normal_encoding::isName2, normal_encoding::isName3, normal_encoding::isName4, normal_encoding::isNmstrt2, normal_encoding::isNmstrt3, normal_encoding::isNmstrt4, latin1_encoding, unknown_encoding::normal, normal_encoding::type, UCS2_GET_NAMING, unknown_isInvalid(), unknown_isName(), unknown_isNmstrt(), unknown_toUtf16(), unknown_toUtf8(), unknown_encoding::userData, unknown_encoding::utf16, unknown_encoding::utf8, and XmlUtf8Encode().

01267 {
01268   int i;
01269   struct unknown_encoding *e = mem;
01270   for (i = 0; i < sizeof(struct normal_encoding); i++)
01271     ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
01272   for (i = 0; i < 128; i++)
01273     if (latin1_encoding.type[i] != BT_OTHER
01274         && latin1_encoding.type[i] != BT_NONXML
01275         && table[i] != i)
01276       return 0;
01277   for (i = 0; i < 256; i++) {
01278     int c = table[i];
01279     if (c == -1) {
01280       e->normal.type[i] = BT_MALFORM;
01281       /* This shouldn't really get used. */
01282       e->utf16[i] = 0xFFFF;
01283       e->utf8[i][0] = 1;
01284       e->utf8[i][1] = 0;
01285     }
01286     else if (c < 0) {
01287       if (c < -4)
01288         return 0;
01289       e->normal.type[i] = BT_LEAD2 - (c + 2);
01290       e->utf8[i][0] = 0;
01291       e->utf16[i] = 0;
01292     }
01293     else if (c < 0x80) {
01294       if (latin1_encoding.type[c] != BT_OTHER
01295           && latin1_encoding.type[c] != BT_NONXML
01296           && c != i)
01297         return 0;
01298       e->normal.type[i] = latin1_encoding.type[c];
01299       e->utf8[i][0] = 1;
01300       e->utf8[i][1] = (char)c;
01301       e->utf16[i] = c == 0 ? 0xFFFF : c;
01302     }
01303     else if (checkCharRefNumber(c) < 0) {
01304       e->normal.type[i] = BT_NONXML;
01305       /* This shouldn't really get used. */
01306       e->utf16[i] = 0xFFFF;
01307       e->utf8[i][0] = 1;
01308       e->utf8[i][1] = 0;
01309     }
01310     else {
01311       if (c > 0xFFFF)
01312         return 0;
01313       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
01314         e->normal.type[i] = BT_NMSTRT;
01315       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
01316         e->normal.type[i] = BT_NAME;
01317       else
01318         e->normal.type[i] = BT_OTHER;
01319       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
01320       e->utf16[i] = c;
01321     }
01322   }
01323   e->userData = userData;
01324   e->convert = convert;
01325   if (convert) {
01326     e->normal.isName2 = unknown_isName;
01327     e->normal.isName3 = unknown_isName;
01328     e->normal.isName4 = unknown_isName;
01329     e->normal.isNmstrt2 = unknown_isNmstrt;
01330     e->normal.isNmstrt3 = unknown_isNmstrt;
01331     e->normal.isNmstrt4 = unknown_isNmstrt;
01332     e->normal.isInvalid2 = unknown_isInvalid;
01333     e->normal.isInvalid3 = unknown_isInvalid;
01334     e->normal.isInvalid4 = unknown_isInvalid;
01335   }
01336   e->normal.enc.utf8Convert = unknown_toUtf8;
01337   e->normal.enc.utf16Convert = unknown_toUtf16;
01338   return &(e->normal.enc);
01339 }

int XmlSizeOfUnknownEncoding (  ) 

Definition at line 1175 of file xmltok.c.

01176 {
01177   return sizeof(struct unknown_encoding);
01178 }

int XmlUtf16Encode ( int  charNum,
unsigned short *  buf 
)

Definition at line 1150 of file xmltok.c.

01151 {
01152   if (charNum < 0)
01153     return 0;
01154   if (charNum < 0x10000) {
01155     buf[0] = charNum;
01156     return 1;
01157   }
01158   if (charNum < 0x110000) {
01159     charNum -= 0x10000;
01160     buf[0] = (charNum >> 10) + 0xD800;
01161     buf[1] = (charNum & 0x3FF) + 0xDC00;
01162     return 2;
01163   }
01164   return 0;
01165 }

int XmlUtf8Encode ( int  c,
char *  buf 
)

Definition at line 1114 of file xmltok.c.

References UTF8_cval1, UTF8_cval2, UTF8_cval3, and UTF8_cval4.

Referenced by unknown_toUtf8(), and XmlInitUnknownEncoding().

01115 {
01116   enum {
01117     /* minN is minimum legal resulting value for N byte sequence */
01118     min2 = 0x80,
01119     min3 = 0x800,
01120     min4 = 0x10000
01121   };
01122 
01123   if (c < 0)
01124     return 0;
01125   if (c < min2) {
01126     buf[0] = (c | UTF8_cval1);
01127     return 1;
01128   }
01129   if (c < min3) {
01130     buf[0] = ((c >> 6) | UTF8_cval2);
01131     buf[1] = ((c & 0x3f) | 0x80);
01132     return 2;
01133   }
01134   if (c < min4) {
01135     buf[0] = ((c >> 12) | UTF8_cval3);
01136     buf[1] = (((c >> 6) & 0x3f) | 0x80);
01137     buf[2] = ((c & 0x3f) | 0x80);
01138     return 3;
01139   }
01140   if (c < 0x110000) {
01141     buf[0] = ((c >> 18) | UTF8_cval4);
01142     buf[1] = (((c >> 12) & 0x3f) | 0x80);
01143     buf[2] = (((c >> 6) & 0x3f) | 0x80);
01144     buf[3] = ((c & 0x3f) | 0x80);
01145     return 4;
01146   }
01147   return 0;
01148 }


Variable Documentation

struct normal_encoding ascii_encoding [static]

Initial value:

 {
  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
  {
#define BT_COLON



  },

}

Definition at line 467 of file xmltok.c.

struct normal_encoding big2_encoding [static]

Initial value:

 {
  { VTABLE, 2, 0,



  0

  },
  {
#define BT_COLON



  },

}

Definition at line 829 of file xmltok.c.

struct normal_encoding internal_big2_encoding [static]

Initial value:

 {
  { VTABLE, 2, 0, 1 },
  {
#define BT_COLON



  },

}

Definition at line 861 of file xmltok.c.

Referenced by XmlGetUtf16InternalEncoding().

struct normal_encoding internal_little2_encoding [static]

Initial value:

 { 
  { VTABLE, 2, 0, 1 },
  {
#define BT_COLON



  },

}

Definition at line 722 of file xmltok.c.

Referenced by XmlGetUtf16InternalEncoding().

struct normal_encoding internal_utf8_encoding [static]

Initial value:

 {
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
  {
#define BT_COLON



  },
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
}

Definition at line 376 of file xmltok.c.

Referenced by XmlGetUtf8InternalEncoding().

struct normal_encoding latin1_encoding [static]

Initial value:

 {
  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
  {
#define BT_COLON



  },

}

Definition at line 434 of file xmltok.c.

Referenced by checkCharRefNumber(), and XmlInitUnknownEncoding().

struct normal_encoding little2_encoding [static]

Initial value:

 { 
  { VTABLE, 2, 0,



    0

  },
  {
#define BT_COLON



  },

}

Definition at line 690 of file xmltok.c.

struct normal_encoding utf8_encoding [static]

Initial value:

 {
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
  {
#define BT_COLON



  },
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
}

Definition at line 352 of file xmltok.c.

Referenced by initUpdatePosition().


Generated on Tue Nov 29 23:14:53 2016 for BOSS_7.0.2 by  doxygen 1.4.7