Character set conversion between mac and pc

François Pinard pinard at iro.umontreal.ca
Sun Oct 1 10:46:51 EDT 2000


[Pieter Claerhout]

> does anyone has a module which is able to convert text in a macintosh
> characterset to a windows characterset?  If not, how would one accomplish
> this in Python?  I think you will have to use the string.translate
> function, but I couldn't find out how this one works?  Any examples?
> Source code?  Help?

This is not what you ask for, but if it could not help nevertheless,
who knows!  My `recode' program can generate conversion tables, either
meant for C or for Perl.  I append examples below, for both languages,
to convert from Macintosh to CP850.

I would surely like to have an option generating them for Python, but I
wonder what the most useful format would be.  I'm not sure that a table,
like I do for C, is the best suited approach, as some massaging is needed
to use string.translate out of it.  On the other hand, a table could have
its own different uses, but maybe it would be easy to derive from something
that would be oriented for string.translate.  Suggestions most welcome!
If we can decide on this, I'll happily add a Python option to `recode'...


$ recode -h mac..850
/* Conversion table generated mechanically by Free `recode' 3.5d
   for sequence macintosh..IBM850 (reversible).  */

unsigned char const macintosh_IBM850[256] =
  {
      0,   1,   2,   3,   4,   5,   6,   7,	/*   0 -   7  */
      8,   9,  10,  11,  12,  13,  14,  15,	/*   8 -  15  */
     16,  17,  18,  19,  20,  21,  22,  23,	/*  16 -  23  */
     24,  25,  26,  27,  28,  29,  30,  31,	/*  24 -  31  */
     32,  33,  34,  35,  36,  37,  38,  39,	/*  32 -  39  */
     40,  41,  42,  43,  44,  45,  46,  47,	/*  40 -  47  */
     48,  49,  50,  51,  52,  53,  54,  55,	/*  48 -  55  */
     56,  57,  58,  59,  60,  61,  62,  63,	/*  56 -  63  */
     64,  65,  66,  67,  68,  69,  70,  71,	/*  64 -  71  */
     72,  73,  74,  75,  76,  77,  78,  79,	/*  72 -  79  */
     80,  81,  82,  83,  84,  85,  86,  87,	/*  80 -  87  */
     88,  89,  90,  91,  92,  93,  94,  95,	/*  88 -  95  */
     96,  97,  98,  99, 100, 101, 102, 103,	/*  96 - 103  */
    104, 105, 106, 107, 108, 109, 110, 111,	/* 104 - 111  */
    112, 113, 114, 115, 116, 117, 118, 119,	/* 112 - 119  */
    120, 121, 122, 123, 124, 125, 126, 127,	/* 120 - 127  */
    142, 143, 128, 144, 165, 153, 154, 160,	/* 128 - 135  */
    133, 131, 132, 198, 134, 135, 130, 138,	/* 136 - 143  */
    136, 137, 161, 141, 140, 139, 164, 162,	/* 144 - 151  */
    149, 147, 148, 228, 163, 151, 150, 129,	/* 152 - 159  */
    180, 248, 189, 156, 245, 196, 244, 225,	/* 160 - 167  */
    169, 184, 194, 239, 249, 193, 146, 157,	/* 168 - 175  */
    176, 241, 178, 179, 190, 230, 229, 203,	/* 176 - 183  */
    192, 185, 186, 166, 167, 200, 145, 155,	/* 184 - 191  */
    168, 173, 170, 195, 159, 197, 236, 174,	/* 192 - 199  */
    175, 201, 255, 183, 199, 205, 206, 219,	/* 200 - 207  */
    208, 209, 231, 232, 242, 158, 246, 187,	/* 208 - 215  */
    152, 217, 218, 207, 220, 221, 237, 223,	/* 216 - 223  */
    204, 250, 171, 177, 191, 182, 210, 181,	/* 224 - 231  */
    211, 212, 214, 215, 216, 222, 224, 226,	/* 232 - 239  */
    240, 227, 233, 234, 235, 213, 243, 252,	/* 240 - 247  */
    238, 172, 188, 251, 247, 253, 254, 202,	/* 248 - 255  */
  };

$ recode -hperl/ mac..850
# Conversion table generated mechanically by Free `recode' 3.5d
# for sequence macintosh..IBM850 (reversible).

@macintosh_IBM850 =
  (
      0,   1,   2,   3,   4,   5,   6,   7,	#   0 -   7
      8,   9,  10,  11,  12,  13,  14,  15,	#   8 -  15
     16,  17,  18,  19,  20,  21,  22,  23,	#  16 -  23
     24,  25,  26,  27,  28,  29,  30,  31,	#  24 -  31
     32,  33,  34,  35,  36,  37,  38,  39,	#  32 -  39
     40,  41,  42,  43,  44,  45,  46,  47,	#  40 -  47
     48,  49,  50,  51,  52,  53,  54,  55,	#  48 -  55
     56,  57,  58,  59,  60,  61,  62,  63,	#  56 -  63
     64,  65,  66,  67,  68,  69,  70,  71,	#  64 -  71
     72,  73,  74,  75,  76,  77,  78,  79,	#  72 -  79
     80,  81,  82,  83,  84,  85,  86,  87,	#  80 -  87
     88,  89,  90,  91,  92,  93,  94,  95,	#  88 -  95
     96,  97,  98,  99, 100, 101, 102, 103,	#  96 - 103
    104, 105, 106, 107, 108, 109, 110, 111,	# 104 - 111
    112, 113, 114, 115, 116, 117, 118, 119,	# 112 - 119
    120, 121, 122, 123, 124, 125, 126, 127,	# 120 - 127
    142, 143, 128, 144, 165, 153, 154, 160,	# 128 - 135
    133, 131, 132, 198, 134, 135, 130, 138,	# 136 - 143
    136, 137, 161, 141, 140, 139, 164, 162,	# 144 - 151
    149, 147, 148, 228, 163, 151, 150, 129,	# 152 - 159
    180, 248, 189, 156, 245, 196, 244, 225,	# 160 - 167
    169, 184, 194, 239, 249, 193, 146, 157,	# 168 - 175
    176, 241, 178, 179, 190, 230, 229, 203,	# 176 - 183
    192, 185, 186, 166, 167, 200, 145, 155,	# 184 - 191
    168, 173, 170, 195, 159, 197, 236, 174,	# 192 - 199
    175, 201, 255, 183, 199, 205, 206, 219,	# 200 - 207
    208, 209, 231, 232, 242, 158, 246, 187,	# 208 - 215
    152, 217, 218, 207, 220, 221, 237, 223,	# 216 - 223
    204, 250, 171, 177, 191, 182, 210, 181,	# 224 - 231
    211, 212, 214, 215, 216, 222, 224, 226,	# 232 - 239
    240, 227, 233, 234, 235, 213, 243, 252,	# 240 - 247
    238, 172, 188, 251, 247, 253, 254, 202,	# 248 - 255
  );

-- 
François Pinard   http://www.iro.umontreal.ca/~pinard




More information about the Python-list mailing list