[Expat-checkins] CVS: expat/tests runtests.c,1.6,1.7

Fri Apr 19 11:26:01 2002

Update of /cvsroot/expat/expat/tests
In directory usw-pr-cvs1:/tmp/cvs-serv10976/tests

Modified Files:
	runtests.c 
Log Message:
basic_setup():  Remove bogus assumption that all tests would use US-ASCII
    data; let Expat determine the proper encoding from the XML source text.

Added several tests relating to supposed mis-interpretation of Latin-1;
these show that Expat (at least in the CVS version) is properly decoding
the Latin-1 text and generating the proper UTF-8 output.

This closes SF bug #491986, #514281.

Index: runtests.c
===================================================================
RCS file: /cvsroot/expat/expat/tests/runtests.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** runtests.c	18 Nov 2001 13:56:01 -0000	1.6
--- runtests.c	19 Apr 2002 18:25:07 -0000	1.7
***************
*** 3,6 ****
--- 3,7 ----
  #include <stdlib.h>
  #include <stdio.h>
+ #include <string.h>

  #include "expat.h"
***************
*** 13,17 ****
  basic_setup(void)
  {
!     parser = XML_ParserCreate("us-ascii");
      if (parser == NULL)
          fail("Parser not created.");
--- 14,18 ----
  basic_setup(void)
  {
!     parser = XML_ParserCreate(NULL);
      if (parser == NULL)
          fail("Parser not created.");
***************
*** 30,43 ****
   */
  static void
! xml_failure(void)
  {
!     char buffer[256];
!     sprintf(buffer, "%s (line %d, offset %d)",
              XML_ErrorString(XML_GetErrorCode(parser)),
              XML_GetCurrentLineNumber(parser),
!             XML_GetCurrentColumnNumber(parser));
      fail(buffer);
  }

  START_TEST(test_nul_byte)
  {
--- 31,47 ----
   */
  static void
! _xml_failure(const char *file, int line)
  {
!     char buffer[1024];
!     sprintf(buffer, "%s (line %d, offset %d)\n    reported from %s, line %d",
              XML_ErrorString(XML_GetErrorCode(parser)),
              XML_GetCurrentLineNumber(parser),
!             XML_GetCurrentColumnNumber(parser),
!             file, line);
      fail(buffer);
  }

+ #define xml_failure() _xml_failure(__FILE__, __LINE__)
+ 
  START_TEST(test_nul_byte)
  {
***************
*** 112,115 ****
--- 116,217 ----

+ typedef struct 
+ {
+     int count;
+     XML_Char data[1024];
+ } CharData;
+ 
+ static void
+ accumulate_characters(void *userData, const XML_Char *s, int len)
+ {
+     CharData *storage = (CharData *)userData;
+     if (len + storage->count < sizeof(storage->data)) {
+         memcpy(storage->data + storage->count, s, len);
+         storage->count += len;
+     }
+ }
+ 
+ static void
+ check_characters(CharData *storage, XML_Char *expected)
+ {
+     char buffer[1024];
+     int len = strlen(expected);
+     if (len != storage->count) {
+         sprintf(buffer, "wrong number of data characters: got %d, expected %d",
+                 storage->count, len);
+         fail(buffer);
+         return;
+     }
+     if (memcmp(expected, storage->data, len) != 0)
+         fail("got bad data bytes");
+ }
+ 
+ static void
+ run_character_check(XML_Char *text, XML_Char *expected)
+ {
+     CharData storage;
+     storage.count = 0;
+     XML_SetUserData(parser, &storage);
+     XML_SetCharacterDataHandler(parser, accumulate_characters);
+     if (!XML_Parse(parser, text, strlen(text), 1))
+         xml_failure();
+     check_characters(&storage, expected);
+ }
+ 
+ /* Regression test for SF bug #491986. */
+ START_TEST(test_danish_latin1)
+ {
+     char *text =
+         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+         "<e>Jørgen æøåÆØÅ</e>";
+     run_character_check(text,
+              "J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
+ }
+ END_TEST
+ /* End regression test for SF bug #491986. */
+ 
+ 
+ /* Regression test for SF bug #514281. */
+ START_TEST(test_french_charref_hexidecimal)
+ {
+     char *text =
+         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+         "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
+     run_character_check(text,
+                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
+ }
+ END_TEST
+ 
+ START_TEST(test_french_charref_decimal)
+ {
+     char *text =
+         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+         "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
+     run_character_check(text,
+                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
+ }
+ END_TEST
+ 
+ START_TEST(test_french_latin1)
+ {
+     char *text =
+         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+         "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
+     run_character_check(text,
+                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
+ }
+ END_TEST
+ 
+ START_TEST(test_french_utf8)
+ {
+     char *text =
+         "<?xml version='1.0' encoding='utf-8'?>\n"
+         "<doc>\xC3\xA9</doc>";
+     run_character_check(text, "\xC3\xA9");
+ }
+ END_TEST
+ /* End regression test for SF bug #514281. */
+ 
+ 
  /* Helpers used by the following test; this checks any "attr" and "refs"
   * attributes to make sure whitespace has been normalized.
***************
*** 236,239 ****
--- 338,348 ----
      tcase_add_test(tc_chars, test_bom_utf16_be);
      tcase_add_test(tc_chars, test_bom_utf16_le);
+     /* Regression test for SF bug #491986. */
+     tcase_add_test(tc_chars, test_danish_latin1);
+     /* Regression test for SF bug #514281. */
+     tcase_add_test(tc_attrs, test_french_charref_hexidecimal);
+     tcase_add_test(tc_attrs, test_french_charref_decimal);
+     tcase_add_test(tc_attrs, test_french_latin1);
+     tcase_add_test(tc_attrs, test_french_utf8);

      suite_add_tcase(s, tc_attrs);