src/parsec/disk-image/parsec/parsec-benchmark/pkgs/tools/cmake/src/Utilities/cmxmlrpc/xmlrpc_utf8.c - public/gem5-resources - Git at Google

 /* Copyright (C) 2001 by Eric Kidd. All rights reserved.
 **
 ** Redistribution and use in source and binary forms, with or without
 ** modification, are permitted provided that the following conditions
 ** are met:
 ** 1. Redistributions of source code must retain the above copyright
 **    notice, this list of conditions and the following disclaimer.
 ** 2. Redistributions in binary form must reproduce the above copyright
 **    notice, this list of conditions and the following disclaimer in the
 **    documentation and/or other materials provided with the distribution.
 ** 3. The name of the author may not be used to endorse or promote products
 **    derived from this software without specific prior written permission.
 **
 ** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 ** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ** ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 ** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 ** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 ** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 ** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 ** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 ** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 ** SUCH DAMAGE. */


 /*=========================================================================
 **  XML-RPC UTF-8 Utilities
 **=========================================================================
 **  Routines for validating, encoding and decoding UTF-8 data.  We try to
 **  be very, very strict about invalid UTF-8 data.
 **
 **  All of the code in this file assumes that your machine represents
 **  wchar_t as a 16-bit (or wider) character containing UCS-2 data.  If this
 **  assumption is incorrect, you may need to replace this file.
 **
 **  For lots of information on Unicode and UTF-8 decoding, see:
 **    http://www.cl.cam.ac.uk/~mgk25/unicode.html
 */

 #include "xmlrpc_config.h"

 #include "xmlrpc.h"

 #ifdef HAVE_UNICODE_WCHAR

 /*=========================================================================
 **  Tables and Constants
 **=========================================================================
 **  We use a variety of tables and constants to help decode and validate
 **  UTF-8 data.
 */

 /* The number of bytes in a UTF-8 sequence starting with the character used
 ** as the array index.  A zero entry indicates an illegal initial byte.
 ** This table was generated using a Perl script and information from the
 ** UTF-8 standard.
 **
 ** Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table.  But
 ** since Python 2.0 has the icky CNRI license, I regenerated this
 ** table from scratch and wrote my own decoder. */
 static unsigned char utf8_seq_length[256] = {
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
 };

 /* The minimum legal character value for a UTF-8 sequence of the given
 ** length.  We have to check this to avoid accepting "overlong" UTF-8
 ** sequences, which use more bytes than necessary to encode a given
 ** character.  Such sequences are commonly used by evil people to bypass
 ** filters and security checks.  This table is based on the UTF-8-test.txt
 ** file by Markus Kuhn <mkuhn@acm.org>. */
 static wchar_t utf8_min_char_for_length[4] = {
     0,          /* Length 0: Not used (meaningless) */
     0x0000,     /* Length 1: Not used (special-cased) */
     0x0080,     /* Length 2 */
     0x0800      /* Length 3 */

 #if 0
     /* These are only useful on systems where wchar_t is 32-bits wide
     ** and supports full UCS-4. */
     0x00010000, /* Length 4 */
     0x00200000, /* Length 5 */
     0x04000000  /* Length 6 */
 #endif
 };

 /* This is the maximum legal 16-byte (UCS-2) character.  Again, this
 ** information is based on UTF-8-test.txt. */
 #define UCS2_MAX_LEGAL_CHARACTER (0xFFFD)

 /* First and last UTF-16 surrogate characters.  These are *not* legal UCS-2
 ** characters--they're used to code for UCS-4 characters when using
 ** UTF-16.  They should never appear in decoded UTF-8 data!  Again, these
 ** could hypothetically be used to bypass security measures on some machines.
 ** Based on UTF-8-test.txt. */
 #define UTF16_FIRST_SURROGATE (0xD800)
 #define UTF16_LAST_SURROGATE  (0xDFFF)

 /* Is the character 'c' a UTF-8 continuation character? */
 #define IS_CONTINUATION(c) (((c) & 0xC0) == 0x80)

 /* Maximum number of bytes needed to encode a supported character. */
 #define MAX_ENCODED_BYTES (3)


 /*=========================================================================
 **  decode_utf8
 **=========================================================================
 **  Internal routine which decodes (or validates) a UTF-8 string.
 **  To validate, set io_buff and out_buff_len to NULL.  To decode, allocate
 **  a sufficiently large buffer, pass it as io_buff, and pass a pointer as
 **  as out_buff_len.  The data will be written to the buffer, and the
 **  length to out_buff_len.
 **
 **  We assume that wchar_t holds a single UCS-2 character in native-endian
 **  byte ordering.
 */

 static void
 decode_utf8(xmlrpc_env * const env,
             const char * const utf8_data,
             size_t       const utf8_len,
             wchar_t *    const io_buff,
             size_t *     const out_buff_len) {

     size_t i, length, out_pos;
     char init, con1, con2;
     wchar_t wc;

     XMLRPC_ASSERT_ENV_OK(env);
     XMLRPC_ASSERT_PTR_OK(utf8_data);
     XMLRPC_ASSERT((!io_buff && !out_buff_len) ||
                   (io_buff && out_buff_len));

     /* Suppress GCC warning about possibly undefined variable. */
     wc = 0;

     i = 0;
     out_pos = 0;
     while (i < utf8_len) {
         init = utf8_data[i];
         if ((init & 0x80) == 0x00) {
             /* Convert ASCII character to wide character. */
             wc = init;
             i++;
         } else {
             /* Look up the length of this UTF-8 sequence. */
             length = utf8_seq_length[(unsigned char) init];

             /* Check to make sure we have enough bytes to convert. */
             if (i + length > utf8_len)
                 XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                             "Truncated UTF-8 sequence");

             /* Decode a multibyte UTF-8 sequence. */
             switch (length) {
             case 0:
                 XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                             "Invalid UTF-8 initial byte");

             case 2:
                 /* 110xxxxx 10xxxxxx */
                 con1 = utf8_data[i+1];
                 if (!IS_CONTINUATION(con1))
                     XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                                 "UTF-8 sequence too short");
                 wc = ((((wchar_t) (init & 0x1F)) <<  6) |
                       (((wchar_t) (con1 & 0x3F))));
                 break;

             case 3:
                 /* 1110xxxx 10xxxxxx 10xxxxxx */
                 con1 = utf8_data[i+1];
                 con2 = utf8_data[i+2];
                 if (!IS_CONTINUATION(con1) || !IS_CONTINUATION(con2))
                     XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                                 "UTF-8 sequence too short");
                 wc = ((((wchar_t) (init & 0x0F)) << 12) |
                       (((wchar_t) (con1 & 0x3F)) <<  6) |
                       (((wchar_t) (con2 & 0x3F))));
                 break;

             case 4:
                 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
             case 5:
                 /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
             case 6:
                 /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
                 XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                             "UCS-4 characters not supported");

             default:
                 XMLRPC_ASSERT("Error in UTF-8 decoder tables");
             }

             /* Advance to the end of the sequence. */
             i += length;

             /* Check for illegal UCS-2 characters. */
             if (wc > UCS2_MAX_LEGAL_CHARACTER)
                 XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                             "UCS-2 characters > U+FFFD are illegal");

             /* Check for UTF-16 surrogates. */
             if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
                 XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                             "UTF-16 surrogates may not appear in UTF-8 data");

             /* Check for overlong sequences. */
             if (wc < utf8_min_char_for_length[length])
                 XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                             "Overlong UTF-8 sequence not allowed");
         }

         /* If we have a buffer, write our character to it. */
         if (io_buff) {
             io_buff[out_pos++] = wc;
         }
     }

     /* Record the number of characters we found. */
     if (out_buff_len)
         *out_buff_len = out_pos;

  cleanup:
     if (env->fault_occurred) {
         if (out_buff_len)
             *out_buff_len = 0;
     }
 }


 /*=========================================================================
 **  xmlrpc_validate_utf8
 **=========================================================================
 **  Make sure that a UTF-8 string is valid.
 */

 void
 xmlrpc_validate_utf8 (xmlrpc_env * const env,
                       const char * const utf8_data,
                       size_t       const utf8_len) {

     decode_utf8(env, utf8_data, utf8_len, NULL, NULL);
 }


 /*=========================================================================
 **  xmlrpc_utf8_to_wcs
 **=========================================================================
 **  Decode UTF-8 string to a "wide character string".  This function
 **  returns an xmlrpc_mem_block with an element type of wchar_t.  Don't
 **  try to intepret the block in a bytewise fashion--it won't work in
 **  any useful or portable fashion.
 */

 xmlrpc_mem_block *xmlrpc_utf8_to_wcs (xmlrpc_env *env,
                                       char *utf8_data,
                                       size_t utf8_len)
 {
     xmlrpc_mem_block *output;
     size_t wcs_length;

     /* Allocate a memory block large enough to hold any possible output.
     ** We assume that each byte of the input may decode to a whcar_t. */
     output = XMLRPC_TYPED_MEM_BLOCK_NEW(wchar_t, env, utf8_len);
     XMLRPC_FAIL_IF_FAULT(env);

     /* Decode the UTF-8 data. */
     decode_utf8(env, utf8_data, utf8_len,
                 XMLRPC_TYPED_MEM_BLOCK_CONTENTS(wchar_t, output),
                 &wcs_length);
     XMLRPC_FAIL_IF_FAULT(env);

     /* Make sure we didn't overrun our buffer. */
     XMLRPC_ASSERT(wcs_length <= utf8_len);

     /* Correct the length of the memory block. */
     XMLRPC_TYPED_MEM_BLOCK_RESIZE(wchar_t, env, output, wcs_length);
     XMLRPC_FAIL_IF_FAULT(env);

  cleanup:
     if (env->fault_occurred) {
         if (output)
             xmlrpc_mem_block_free(output);
         return NULL;
     }
     return output;
 }


 /*=========================================================================
 **  xmlrpc_utf8_to_wcs
 **=========================================================================
 **  Encode a "wide character string" as UTF-8.
 */

 xmlrpc_mem_block *xmlrpc_wcs_to_utf8 (xmlrpc_env *env,
                                       wchar_t *wcs_data,
                                       size_t wcs_len)
 {
     size_t estimate, bytes_used, i;
     xmlrpc_mem_block *output;
     unsigned char *buffer;
     wchar_t wc;
     int cwc;

     XMLRPC_ASSERT_ENV_OK(env);
     XMLRPC_ASSERT_PTR_OK(wcs_data);

     /* Allocate a memory block large enough to hold any possible output.
     ** We assume that every wchar might encode to the maximum length. */
     estimate = wcs_len * MAX_ENCODED_BYTES;
     output = XMLRPC_TYPED_MEM_BLOCK_NEW(char, env, estimate);
     XMLRPC_FAIL_IF_FAULT(env);

     /* Output our characters. */
     buffer = (unsigned char*) XMLRPC_TYPED_MEM_BLOCK_CONTENTS(char, output);
     bytes_used = 0;
     for (i = 0; i < wcs_len; i++) {
         wc = wcs_data[i];
         cwc = wc;
         if (cwc <= 0x007F) {
             buffer[bytes_used++] = wc & 0x7F;
         } else if (cwc <= 0x07FF) {
             /* 110xxxxx 10xxxxxx */
             buffer[bytes_used++] = 0xC0 | (wc >> 6);
             buffer[bytes_used++] = 0x80 | (wc & 0x3F);
         } else if (cwc <= 0xFFFF) {
             /* 1110xxxx 10xxxxxx 10xxxxxx */
             buffer[bytes_used++] = 0xE0 | (wc >> 12);
             buffer[bytes_used++] = 0x80 | ((wc >> 6) & 0x3F);
             buffer[bytes_used++] = 0x80 | (wc & 0x3F);
         } else {
             XMLRPC_FAIL(env, XMLRPC_INTERNAL_ERROR,
                         "Don't know how to encode UCS-4 characters yet");
         }
     }

     /* Make sure we didn't overrun our buffer. */
     XMLRPC_ASSERT(bytes_used <= estimate);

     /* Correct the length of the memory block. */
     XMLRPC_TYPED_MEM_BLOCK_RESIZE(char, env, output, bytes_used);
     XMLRPC_FAIL_IF_FAULT(env);

  cleanup:
     if (env->fault_occurred) {
         if (output)
             xmlrpc_mem_block_free(output);
         return NULL;
     }
     return output;
 }

 #endif /* HAVE_UNICODE_WCHAR */
	/* Copyright (C) 2001 by Eric Kidd. All rights reserved.
	**
	** Redistribution and use in source and binary forms, with or without
	** modification, are permitted provided that the following conditions
	** are met:
	** 1. Redistributions of source code must retain the above copyright
	** notice, this list of conditions and the following disclaimer.
	** 2. Redistributions in binary form must reproduce the above copyright
	** notice, this list of conditions and the following disclaimer in the
	** documentation and/or other materials provided with the distribution.
	** 3. The name of the author may not be used to endorse or promote products
	** derived from this software without specific prior written permission.
	**
	** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	** ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	** SUCH DAMAGE. */


	/*=========================================================================
	** XML-RPC UTF-8 Utilities
	**=========================================================================
	** Routines for validating, encoding and decoding UTF-8 data. We try to
	** be very, very strict about invalid UTF-8 data.
	**
	** All of the code in this file assumes that your machine represents
	** wchar_t as a 16-bit (or wider) character containing UCS-2 data. If this
	** assumption is incorrect, you may need to replace this file.
	**
	** For lots of information on Unicode and UTF-8 decoding, see:
	** http://www.cl.cam.ac.uk/~mgk25/unicode.html
	*/

	#include "xmlrpc_config.h"

	#include "xmlrpc.h"

	#ifdef HAVE_UNICODE_WCHAR

	/*=========================================================================
	** Tables and Constants
	**=========================================================================
	** We use a variety of tables and constants to help decode and validate
	** UTF-8 data.
	*/

	/* The number of bytes in a UTF-8 sequence starting with the character used
	** as the array index. A zero entry indicates an illegal initial byte.
	** This table was generated using a Perl script and information from the
	** UTF-8 standard.
	**
	** Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table. But
	** since Python 2.0 has the icky CNRI license, I regenerated this
	** table from scratch and wrote my own decoder. */
	static unsigned char utf8_seq_length[256] = {
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
	4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
	};

	/* The minimum legal character value for a UTF-8 sequence of the given
	** length. We have to check this to avoid accepting "overlong" UTF-8
	** sequences, which use more bytes than necessary to encode a given
	** character. Such sequences are commonly used by evil people to bypass
	** filters and security checks. This table is based on the UTF-8-test.txt
	** file by Markus Kuhn <mkuhn@acm.org>. */
	static wchar_t utf8_min_char_for_length[4] = {
	0, /* Length 0: Not used (meaningless) */
	0x0000, /* Length 1: Not used (special-cased) */
	0x0080, /* Length 2 */
	0x0800 /* Length 3 */

	#if 0
	/* These are only useful on systems where wchar_t is 32-bits wide
	** and supports full UCS-4. */
	0x00010000, /* Length 4 */
	0x00200000, /* Length 5 */
	0x04000000 /* Length 6 */
	#endif
	};

	/* This is the maximum legal 16-byte (UCS-2) character. Again, this
	** information is based on UTF-8-test.txt. */
	#define UCS2_MAX_LEGAL_CHARACTER (0xFFFD)

	/* First and last UTF-16 surrogate characters. These are not legal UCS-2
	** characters--they're used to code for UCS-4 characters when using
	** UTF-16. They should never appear in decoded UTF-8 data! Again, these
	** could hypothetically be used to bypass security measures on some machines.
	** Based on UTF-8-test.txt. */
	#define UTF16_FIRST_SURROGATE (0xD800)
	#define UTF16_LAST_SURROGATE (0xDFFF)

	/* Is the character 'c' a UTF-8 continuation character? */
	#define IS_CONTINUATION(c) (((c) & 0xC0) == 0x80)

	/* Maximum number of bytes needed to encode a supported character. */
	#define MAX_ENCODED_BYTES (3)


	/*=========================================================================
	** decode_utf8
	**=========================================================================
	** Internal routine which decodes (or validates) a UTF-8 string.
	** To validate, set io_buff and out_buff_len to NULL. To decode, allocate
	** a sufficiently large buffer, pass it as io_buff, and pass a pointer as
	** as out_buff_len. The data will be written to the buffer, and the
	** length to out_buff_len.
	**
	** We assume that wchar_t holds a single UCS-2 character in native-endian
	** byte ordering.
	*/

	static void
	decode_utf8(xmlrpc_env * const env,
	const char * const utf8_data,
	size_t const utf8_len,
	wchar_t * const io_buff,
	size_t * const out_buff_len) {

	size_t i, length, out_pos;
	char init, con1, con2;
	wchar_t wc;

	XMLRPC_ASSERT_ENV_OK(env);
	XMLRPC_ASSERT_PTR_OK(utf8_data);
	XMLRPC_ASSERT((!io_buff && !out_buff_len) \|\|
	(io_buff && out_buff_len));

	/* Suppress GCC warning about possibly undefined variable. */
	wc = 0;

	i = 0;
	out_pos = 0;
	while (i < utf8_len) {
	init = utf8_data[i];
	if ((init & 0x80) == 0x00) {
	/* Convert ASCII character to wide character. */
	wc = init;
	i++;
	} else {
	/* Look up the length of this UTF-8 sequence. */
	length = utf8_seq_length[(unsigned char) init];

	/* Check to make sure we have enough bytes to convert. */
	if (i + length > utf8_len)
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"Truncated UTF-8 sequence");

	/* Decode a multibyte UTF-8 sequence. */
	switch (length) {
	case 0:
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"Invalid UTF-8 initial byte");

	case 2:
	/* 110xxxxx 10xxxxxx */
	con1 = utf8_data[i+1];
	if (!IS_CONTINUATION(con1))
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"UTF-8 sequence too short");
	wc = ((((wchar_t) (init & 0x1F)) << 6) \|
	(((wchar_t) (con1 & 0x3F))));
	break;

	case 3:
	/* 1110xxxx 10xxxxxx 10xxxxxx */
	con1 = utf8_data[i+1];
	con2 = utf8_data[i+2];
	if (!IS_CONTINUATION(con1) \|\| !IS_CONTINUATION(con2))
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"UTF-8 sequence too short");
	wc = ((((wchar_t) (init & 0x0F)) << 12) \|
	(((wchar_t) (con1 & 0x3F)) << 6) \|
	(((wchar_t) (con2 & 0x3F))));
	break;

	case 4:
	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	case 5:
	/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
	case 6:
	/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"UCS-4 characters not supported");

	default:
	XMLRPC_ASSERT("Error in UTF-8 decoder tables");
	}

	/* Advance to the end of the sequence. */
	i += length;

	/* Check for illegal UCS-2 characters. */
	if (wc > UCS2_MAX_LEGAL_CHARACTER)
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"UCS-2 characters > U+FFFD are illegal");

	/* Check for UTF-16 surrogates. */
	if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"UTF-16 surrogates may not appear in UTF-8 data");

	/* Check for overlong sequences. */
	if (wc < utf8_min_char_for_length[length])
	XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
	"Overlong UTF-8 sequence not allowed");
	}

	/* If we have a buffer, write our character to it. */
	if (io_buff) {
	io_buff[out_pos++] = wc;
	}
	}

	/* Record the number of characters we found. */
	if (out_buff_len)
	*out_buff_len = out_pos;

	cleanup:
	if (env->fault_occurred) {
	if (out_buff_len)
	*out_buff_len = 0;
	}
	}



	/*=========================================================================
	** xmlrpc_validate_utf8
	**=========================================================================
	** Make sure that a UTF-8 string is valid.
	*/

	void
	xmlrpc_validate_utf8 (xmlrpc_env * const env,
	const char * const utf8_data,
	size_t const utf8_len) {

	decode_utf8(env, utf8_data, utf8_len, NULL, NULL);
	}


	/*=========================================================================
	** xmlrpc_utf8_to_wcs
	**=========================================================================
	** Decode UTF-8 string to a "wide character string". This function
	** returns an xmlrpc_mem_block with an element type of wchar_t. Don't
	** try to intepret the block in a bytewise fashion--it won't work in
	** any useful or portable fashion.
	*/

	xmlrpc_mem_block xmlrpc_utf8_to_wcs (xmlrpc_env env,
	char *utf8_data,
	size_t utf8_len)
	{
	xmlrpc_mem_block *output;
	size_t wcs_length;

	/* Allocate a memory block large enough to hold any possible output.
	** We assume that each byte of the input may decode to a whcar_t. */
	output = XMLRPC_TYPED_MEM_BLOCK_NEW(wchar_t, env, utf8_len);
	XMLRPC_FAIL_IF_FAULT(env);

	/* Decode the UTF-8 data. */
	decode_utf8(env, utf8_data, utf8_len,
	XMLRPC_TYPED_MEM_BLOCK_CONTENTS(wchar_t, output),
	&wcs_length);
	XMLRPC_FAIL_IF_FAULT(env);

	/* Make sure we didn't overrun our buffer. */
	XMLRPC_ASSERT(wcs_length <= utf8_len);

	/* Correct the length of the memory block. */
	XMLRPC_TYPED_MEM_BLOCK_RESIZE(wchar_t, env, output, wcs_length);
	XMLRPC_FAIL_IF_FAULT(env);

	cleanup:
	if (env->fault_occurred) {
	if (output)
	xmlrpc_mem_block_free(output);
	return NULL;
	}
	return output;
	}


	/*=========================================================================
	** xmlrpc_utf8_to_wcs
	**=========================================================================
	** Encode a "wide character string" as UTF-8.
	*/

	xmlrpc_mem_block xmlrpc_wcs_to_utf8 (xmlrpc_env env,
	wchar_t *wcs_data,
	size_t wcs_len)
	{
	size_t estimate, bytes_used, i;
	xmlrpc_mem_block *output;
	unsigned char *buffer;
	wchar_t wc;
	int cwc;

	XMLRPC_ASSERT_ENV_OK(env);
	XMLRPC_ASSERT_PTR_OK(wcs_data);

	/* Allocate a memory block large enough to hold any possible output.
	** We assume that every wchar might encode to the maximum length. */
	estimate = wcs_len * MAX_ENCODED_BYTES;
	output = XMLRPC_TYPED_MEM_BLOCK_NEW(char, env, estimate);
	XMLRPC_FAIL_IF_FAULT(env);

	/* Output our characters. */
	buffer = (unsigned char*) XMLRPC_TYPED_MEM_BLOCK_CONTENTS(char, output);
	bytes_used = 0;
	for (i = 0; i < wcs_len; i++) {
	wc = wcs_data[i];
	cwc = wc;
	if (cwc <= 0x007F) {
	buffer[bytes_used++] = wc & 0x7F;
	} else if (cwc <= 0x07FF) {
	/* 110xxxxx 10xxxxxx */
	buffer[bytes_used++] = 0xC0 \| (wc >> 6);
	buffer[bytes_used++] = 0x80 \| (wc & 0x3F);
	} else if (cwc <= 0xFFFF) {
	/* 1110xxxx 10xxxxxx 10xxxxxx */
	buffer[bytes_used++] = 0xE0 \| (wc >> 12);
	buffer[bytes_used++] = 0x80 \| ((wc >> 6) & 0x3F);
	buffer[bytes_used++] = 0x80 \| (wc & 0x3F);
	} else {
	XMLRPC_FAIL(env, XMLRPC_INTERNAL_ERROR,
	"Don't know how to encode UCS-4 characters yet");
	}
	}

	/* Make sure we didn't overrun our buffer. */
	XMLRPC_ASSERT(bytes_used <= estimate);

	/* Correct the length of the memory block. */
	XMLRPC_TYPED_MEM_BLOCK_RESIZE(char, env, output, bytes_used);
	XMLRPC_FAIL_IF_FAULT(env);

	cleanup:
	if (env->fault_occurred) {
	if (output)
	xmlrpc_mem_block_free(output);
	return NULL;
	}
	return output;
	}

	#endif /* HAVE_UNICODE_WCHAR */