[Expat-checkins] CVS: expat/tests runtests.c,1.6,1.7
Fred L. Drake
fdrake@users.sourceforge.net
Fri Apr 19 11:26:01 2002
Update of /cvsroot/expat/expat/tests
In directory usw-pr-cvs1:/tmp/cvs-serv10976/tests
Modified Files:
runtests.c
Log Message:
basic_setup(): Remove bogus assumption that all tests would use US-ASCII
data; let Expat determine the proper encoding from the XML source text.
Added several tests relating to supposed mis-interpretation of Latin-1;
these show that Expat (at least in the CVS version) is properly decoding
the Latin-1 text and generating the proper UTF-8 output.
This closes SF bug #491986, #514281.
Index: runtests.c
===================================================================
RCS file: /cvsroot/expat/expat/tests/runtests.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** runtests.c 18 Nov 2001 13:56:01 -0000 1.6
--- runtests.c 19 Apr 2002 18:25:07 -0000 1.7
***************
*** 3,6 ****
--- 3,7 ----
#include <stdlib.h>
#include <stdio.h>
+ #include <string.h>
#include "expat.h"
***************
*** 13,17 ****
basic_setup(void)
{
! parser = XML_ParserCreate("us-ascii");
if (parser == NULL)
fail("Parser not created.");
--- 14,18 ----
basic_setup(void)
{
! parser = XML_ParserCreate(NULL);
if (parser == NULL)
fail("Parser not created.");
***************
*** 30,43 ****
*/
static void
! xml_failure(void)
{
! char buffer[256];
! sprintf(buffer, "%s (line %d, offset %d)",
XML_ErrorString(XML_GetErrorCode(parser)),
XML_GetCurrentLineNumber(parser),
! XML_GetCurrentColumnNumber(parser));
fail(buffer);
}
START_TEST(test_nul_byte)
{
--- 31,47 ----
*/
static void
! _xml_failure(const char *file, int line)
{
! char buffer[1024];
! sprintf(buffer, "%s (line %d, offset %d)\n reported from %s, line %d",
XML_ErrorString(XML_GetErrorCode(parser)),
XML_GetCurrentLineNumber(parser),
! XML_GetCurrentColumnNumber(parser),
! file, line);
fail(buffer);
}
+ #define xml_failure() _xml_failure(__FILE__, __LINE__)
+
START_TEST(test_nul_byte)
{
***************
*** 112,115 ****
--- 116,217 ----
+ typedef struct
+ {
+ int count;
+ XML_Char data[1024];
+ } CharData;
+
+ static void
+ accumulate_characters(void *userData, const XML_Char *s, int len)
+ {
+ CharData *storage = (CharData *)userData;
+ if (len + storage->count < sizeof(storage->data)) {
+ memcpy(storage->data + storage->count, s, len);
+ storage->count += len;
+ }
+ }
+
+ static void
+ check_characters(CharData *storage, XML_Char *expected)
+ {
+ char buffer[1024];
+ int len = strlen(expected);
+ if (len != storage->count) {
+ sprintf(buffer, "wrong number of data characters: got %d, expected %d",
+ storage->count, len);
+ fail(buffer);
+ return;
+ }
+ if (memcmp(expected, storage->data, len) != 0)
+ fail("got bad data bytes");
+ }
+
+ static void
+ run_character_check(XML_Char *text, XML_Char *expected)
+ {
+ CharData storage;
+ storage.count = 0;
+ XML_SetUserData(parser, &storage);
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+ if (!XML_Parse(parser, text, strlen(text), 1))
+ xml_failure();
+ check_characters(&storage, expected);
+ }
+
+ /* Regression test for SF bug #491986. */
+ START_TEST(test_danish_latin1)
+ {
+ char *text =
+ "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+ "<e>Jørgen æøåÆØÅ</e>";
+ run_character_check(text,
+ "J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
+ }
+ END_TEST
+ /* End regression test for SF bug #491986. */
+
+
+ /* Regression test for SF bug #514281. */
+ START_TEST(test_french_charref_hexidecimal)
+ {
+ char *text =
+ "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+ "<doc>éèàçêÈ</doc>";
+ run_character_check(text,
+ "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
+ }
+ END_TEST
+
+ START_TEST(test_french_charref_decimal)
+ {
+ char *text =
+ "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+ "<doc>éèàçêÈ</doc>";
+ run_character_check(text,
+ "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
+ }
+ END_TEST
+
+ START_TEST(test_french_latin1)
+ {
+ char *text =
+ "<?xml version='1.0' encoding='iso-8859-1'?>\n"
+ "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
+ run_character_check(text,
+ "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
+ }
+ END_TEST
+
+ START_TEST(test_french_utf8)
+ {
+ char *text =
+ "<?xml version='1.0' encoding='utf-8'?>\n"
+ "<doc>\xC3\xA9</doc>";
+ run_character_check(text, "\xC3\xA9");
+ }
+ END_TEST
+ /* End regression test for SF bug #514281. */
+
+
/* Helpers used by the following test; this checks any "attr" and "refs"
* attributes to make sure whitespace has been normalized.
***************
*** 236,239 ****
--- 338,348 ----
tcase_add_test(tc_chars, test_bom_utf16_be);
tcase_add_test(tc_chars, test_bom_utf16_le);
+ /* Regression test for SF bug #491986. */
+ tcase_add_test(tc_chars, test_danish_latin1);
+ /* Regression test for SF bug #514281. */
+ tcase_add_test(tc_attrs, test_french_charref_hexidecimal);
+ tcase_add_test(tc_attrs, test_french_charref_decimal);
+ tcase_add_test(tc_attrs, test_french_latin1);
+ tcase_add_test(tc_attrs, test_french_utf8);
suite_add_tcase(s, tc_attrs);