[Python-Dev] help wanted: windows locales

Fredrik Lundh Fredrik Lundh" <effbot@telia.com
Wed, 5 Jul 2000 21:17:20 +0200


Here's a tentative version of a mapping between windows
identifiers and locale codes.  There's probably bugs in here;
I've tried to match data from a number of sources with=20
Microsoft's descriptions (as given in the comments).  Extra
bonus for filling in "??".

Most notably, those norwegians caused some problems here
(don't they always? ;-).  I'm pretty sure one of these should
be "no_NO":

    0x0414: "nb_NO", # Norwegian (Bokmal)
    0x0814: "nn_NO", # Norwegian (Nynorsk)

But which one?

Same goes for the spaniards: which one should be "es_ES"?

    0x040a: "es_??", # Spanish (Traditional Sort)
    0x0c0a: "es_??", # Spanish (Modern Sort)

thanks /F

#
# Map Windows language code to ISO 3166/ISO 639 code.  This is only
# used on older Windows platforms (Windows 95, most notable).
#
# NOTE: this table isn't complete.  If you have time and knowledge,
# please fill in the missing mappings (marked with ??) and send to
# the Python core team (or directly to effbot@telia.com).
#
# Thanks /F

windows_lang =3D {
    0x0436: "af_AF", # Afrikaans
    0x041c: "sq_AL", # Albanian
    0x0401: "ar_SA", # Arabic (Saudi Arabia)
    0x0801: "ar_IQ", # Arabic (Iraq)
    0x0c01: "ar_EG", # Arabic (Egypt)
    0x1001: "ar_LY", # Arabic (Libya)
    0x1401: "ar_DZ", # Arabic (Algeria)
    0x1801: "ar_MA", # Arabic (Morocco)
    0x1c01: "ar_TN", # Arabic (Tunisia)
    0x2001: "ar_OM", # Arabic (Oman)
    0x2401: "ar_YE", # Arabic (Yemen)
    0x2801: "ar_SY", # Arabic (Syria)
    0x2c01: "ar_JO", # Arabic (Jordan)
    0x3001: "ar_LB", # Arabic (Lebanon)
    0x3401: "ar_KW", # Arabic (Kuwait)
    0x3801: "ar_AE", # Arabic (U.A.E.)
    0x3c01: "ar_BH", # Arabic (Bahrain)
    0x4001: "ar_QA", # Arabic (Qatar)
    0x042b: "hy_AM", # Armenian
    0x044d: "as_??", # Assamese
    0x042c: "az_AZ", # Azeri (Latin)
    0x082c: "az_??", # Azeri (Cyrillic)
    0x042d: "eu_ES", # Basque
    0x0423: "be_BY", # Belarussian
    0x0445: "bn_??", # Bengali
    0x0402: "bg_BG", # Bulgarian
    0x0455: "my_MM", # Burmese
    0x0403: "ca_ES", # Catalan
    0x0404: "zh_TW", # Chinese (Taiwan)
    0x0804: "zh_CN", # Chinese (PRC)
    0x0c04: "zh_HK", # Chinese (Hong Kong SAR, PRC)
    0x1004: "zh_SG", # Chinese (Singapore)
    0x1404: "zh_MO", # Chinese (Macau SAR)
    0x041a: "hr_HR", # Croatian (Hrvatska)
    0x0405: "cs_CZ", # Czech
    0x0406: "da_DK", # Danish
    0x0413: "nl_NL", # Dutch (Netherlands)
    0x0813: "nl_BE", # Dutch (Belgium)
    0x0409: "en_US", # English (United States)
    0x0809: "en_UK", # English (United Kingdom)
    0x0c09: "en_AU", # English (Australian)
    0x1009: "en_CA", # English (Canadian)
    0x1409: "en_NZ", # English (New Zealand)
    0x1809: "en_IE", # English (Ireland)
    0x1c09: "en_ZA", # English (South Africa)
    0x2009: "en_JM", # English (Jamaica)
    0x2409: "en_??", # English (Caribbean)
    0x2809: "en"_BZ, # English (Belize)
    0x2c09: "en_TT", # English (Trinidad)
    0x3009: "en_ZW", # English (Zimbabwe)
    0x3409: "en_PH", # English (Philippines)
    0x0425: "et_EE", # Estonian
    0x0438: "fo_FO", # Faeroese
    0x0429: "fa_IR", # Farsi
    0x040b: "fi_FI", # Finnish
    0x040c: "fr_FR", # French (Standard)
    0x080c: "fr_BE", # French (Belgian)
    0x0c0c: "fr_CA", # French (Canadian)
    0x100c: "fr_CH", # French (Switzerland)
    0x140c: "fr_LU", # French (Luxembourg)
    0x180c: "fr_MC", # French (Monaco)
    0x0437: "ka_GE", # Georgian
    0x0407: "de_DE", # German (Standard)
    0x0807: "de_CH", # German (Switzerland)
    0x0c07: "de_AT", # German (Austria)
    0x1007: "de_LU", # German (Luxembourg)
    0x1407: "de_LI", # German (Liechtenstein)
    0x0408: "el_GR", # Greek
    0x0447: "gu_??", # Gujarati
    0x040d: "iw_IL", # Hebrew
    0x0439: "hi_IN", # Hindi
    0x040e: "hu_HU", # Hungarian
    0x040f: "is_IS", # Icelandic
    0x0421: "id_ID", # Indonesian
    0x0410: "it_IT", # Italian (Standard)
    0x0810: "it_CH", # Italian (Switzerland)
    0x0411: "ja_JA", # Japanese
    0x044b: "kn_??", # Kannada
    0x0860: "ks_IN", # Kashmiri (India)
    0x043f: "kk_??", # Kazakh
    0x0457: "??_??", # Konkani
    0x0412: "ko_??", # Korean
    0x0812: "ko_??", # Korean (Johab)
    0x0426: "lv_LV", # Latvian
    0x0427: "lt_LT", # Lithuanian
    0x0827: "lt_??", # Lithuanian (Classic)
    0x042f: "mk_MK", # Macedonian
    0x043e: "ms_MY", # Malay (Malaysian)
    0x083e: "ms_??", # Malay (Brunei Darussalam)
    0x044c: "ml_??", # Malayalam
    0x0458: "??_??", # Manipuri
    0x044e: "mr_??", # Marathi
    0x0861: "ne_IN", # Nepali (India)
    0x0414: "nb_NO", # Norwegian (Bokmal)
    0x0814: "nn_NO", # Norwegian (Nynorsk)
    0x0448: "or_??", # Oriya
    0x0415: "pl_PL", # Polish
    0x0416: "pt_BR", # Portuguese (Brazil)
    0x0816: "pt_PT", # Portuguese (Standard)
    0x0446: "pa_??", # Punjabi
    0x0418: "ro_RO", # Romanian
    0x0419: "ru_RU", # Russian
    0x044f: "sa_??", # Sanskrit
    0x0c1a: "sr_??", # Serbian (Cyrillic)
    0x081a: "sr_??", # Serbian (Latin)
    0x0459: "sd_??", # Sindhi
    0x041b: "sk_??", # Slovak
    0x0424: "sl_SL", # Slovenian
    0x040a: "es_??", # Spanish (Traditional Sort)
    0x080a: "es_MX", # Spanish (Mexican)
    0x0c0a: "es_??", # Spanish (Modern Sort)
    0x100a: "es_GT", # Spanish (Guatemala)
    0x140a: "es_??", # Spanish (Costa Rica)
    0x180a: "es_PA", # Spanish (Panama)
    0x1c0a: "es_DO", # Spanish (Dominican Republic)
    0x200a: "es_VE", # Spanish (Venezuela)
    0x240a: "es_CO", # Spanish (Colombia)
    0x280a: "es_PE", # Spanish (Peru)
    0x2c0a: "es_AR", # Spanish (Argentina)
    0x300a: "es_EC", # Spanish (Ecuador)
    0x340a: "es_CL", # Spanish (Chile)
    0x380a: "es_UY", # Spanish (Uruguay)
    0x3c0a: "es_PY", # Spanish (Paraguay)
    0x400a: "es_BO", # Spanish (Bolivia)
    0x440a: "es_SV", # Spanish (El Salvador)
    0x480a: "es_HN", # Spanish (Honduras)
    0x4c0a: "es_NI", # Spanish (Nicaragua)
    0x500a: "es_PR", # Spanish (Puerto Rico)
    0x0430: "??_??", # Sutu
    0x0441: "sw_KE", # Swahili (Kenya)
    0x041d: "sv_SE", # Swedish
    0x081d: "sv_FI", # Swedish (Finland)
    0x0449: "ta_??", # Tamil
    0x0444: "tt_??", # Tatar (Tatarstan)
    0x044a: "te_??", # Telugu
    0x041e: "th_TH", # Thai
    0x041f: "tr_TR", # Turkish
    0x0422: "tk_UA", # Ukrainian
    0x0420: "ur_PK", # Urdu (Pakistan)
    0x0820: "ur_IN", # Urdu (India)
    0x0443: "uz_UZ", # Uzbek (Latin)
    0x0843: "uz_??", # Uzbek (Cyrillic)
    0x042a: "vi_VN", # Vietnamese
}