[Python-3000-checkins] r59581 - python/branches/py3k/Tools/world/world

Thu Dec 20 16:55:58 CET 2007

Author: barry.warsaw
Date: Thu Dec 20 16:55:58 2007
New Revision: 59581

Modified:
   python/branches/py3k/Tools/world/world
Log:
Update to the world tool for Python 3.  Provided by quentin.gallet-gilles via
tracker issue 1671:

http://bugs.python.org/issue1671

In addition to updating the code for Py3k, this updates ccTLDs to their
10-Oct-2006 revision.

(Minor stylistic additions and whitespace normalization by Barry.)


Modified: python/branches/py3k/Tools/world/world
==============================================================================

--- python/branches/py3k/Tools/world/world	(original)
+++ python/branches/py3k/Tools/world/world	Thu Dec 20 16:55:58 2007
@@ -42,7 +42,7 @@
 
 The latest known change to this information was:
 
-    Friday, 5 April 2002, 12.00 CET 2002
+    Monday, 10 October 2006, 17:59:51 UTC 2006
 
 This script also knows about non-geographic top-level domains, and the
 additional ccTLDs reserved by IANA.
@@ -91,9 +91,9 @@
 
 
 def usage(code, msg=''):
-    print __doc__ % globals()
+    print(__doc__ % globals())
     if msg:
-        print msg
+        print(msg)
     sys.exit(code)
 
 
@@ -104,11 +104,11 @@
         # no top level domain found, bounce it to the next step
         return rawaddr
     addr = parts[-1]
-    if nameorgs.has_key(addr):
-        print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
+    if addr in nameorgs:
+        print(rawaddr, 'is in the', nameorgs[addr], 'top level domain')
         return None
-    elif countries.has_key(addr):
-        print rawaddr, 'originated from', countries[addr]
+    elif addr in countries:
+        print(rawaddr, 'originated from', countries[addr])
         return None
     else:
         # Not resolved, bounce it to the next step
@@ -129,11 +129,11 @@
         return regexp
     if len(matches) == 1:
         code = matches[0]
-        print regexp, "matches code `%s', %s" % (code, all[code])
+        print(regexp, "matches code `%s', %s" % (code, all[code]))
     else:
-        print regexp, 'matches %d countries:' % len(matches)
+        print(regexp, 'matches %d countries:' % len(matches))
         for code in matches:
-            print "    %s: %s" % (code, all[code])
+            print("    %s: %s" % (code, all[code]))
     return None
 
 
@@ -141,14 +141,16 @@
 def parse(file, normalize):
     try:
         fp = open(file)
-    except IOError, (err, msg):
-        print msg, ':', file
+    except IOError as err:
+        errno, msg = err.args
+        print(msg, ':', file)
+        return
 
     cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
     scanning = 0
 
     if normalize:
-        print 'countries = {'
+        print('countries = {')
 
     while 1:
         line = fp.readline()
@@ -163,7 +165,7 @@
                 elif line[0] == '-':
                     break
                 else:
-                    print 'Could not parse line:', line
+                    print('Could not parse line:', line)
                     continue
             country, code = mo.group(1, 2)
             if normalize:
@@ -173,30 +175,30 @@
                     # XXX special cases
                     if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
                         words[i] = w.lower()
-                    elif w == 'THE' and i <> 1:
+                    elif w == 'THE' and i != 1:
                         words[i] = w.lower()
                     elif len(w) > 3 and w[1] == "'":
                         words[i] = w[0:3].upper() + w[3:].lower()
                     elif w in ('(U.S.)', 'U.S.'):
                         pass
-                    elif w[0] == '(' and w <> '(local':
+                    elif w[0] == '(' and w != '(local':
                         words[i] = '(' + w[1:].capitalize()
-                    elif w.find('-') <> -1:
+                    elif w.find('-') != -1:
                         words[i] = '-'.join(
                             [s.capitalize() for s in w.split('-')])
                     else:
                         words[i] = w.capitalize()
                 code = code.lower()
                 country = ' '.join(words)
-                print '    "%s": "%s",' % (code, country)
+                print('    "%s": "%s",' % (code, country))
             else:
-                print code, country
-            
+                print(code, country)
+
         elif line[0] == '-':
             scanning = 1
 
     if normalize:
-        print '    }'
+        print('    }')
 
 
 def main():
@@ -212,7 +214,7 @@
             sys.argv[1:],
             'p:rohd',
             ['parse=', 'reverse', 'outputdict', 'help', 'dump'])
-    except getopt.error, msg:
+    except getopt.error as msg:
         usage(1, msg)
 
     for opt, arg in opts:
@@ -231,17 +233,15 @@
         usage(status)
 
     if dump:
-        print 'Non-geographic domains:'
-        codes = nameorgs.keys()
-        codes.sort()
+        print('Official country coded domains:')
+        codes = sorted(countries)
         for code in codes:
-            print '    %4s:' % code, nameorgs[code]
+            print('      %2s:' % code, countries[code])
 
-        print '\nCountry coded domains:'
-        codes = countries.keys()
-        codes.sort()
+        print('\nOther top-level domains:')
+        codes = sorted(nameorgs)
         for code in codes:
-            print '    %2s:' % code, countries[code]
+            print('  %6s:' % code, nameorgs[code])
     elif parsefile:
         parse(parsefile, normalize)
     else:
@@ -249,7 +249,7 @@
             args = filter(None, map(resolve, args))
         args = filter(None, map(reverse, args))
         for arg in args:
-            print 'Where in the world is %s?' % arg
+            print('Where in the world is %s?' % arg)
 
 
 
@@ -258,26 +258,30 @@
     # New top level domains as described by ICANN
     # http://www.icann.org/tlds/
     "aero": "air-transport industry",
+    "asia": "from Asia/for Asia",
     "arpa": "Arpanet",
     "biz": "business",
+    "cat": "Catalan community",
     "com": "commercial",
     "coop": "cooperatives",
     "edu": "educational",
     "gov": "government",
     "info": "unrestricted `info'",
     "int": "international",
+    "jobs": "employment-related",
     "mil": "military",
+    "mobi": "mobile specific",
     "museum": "museums",
     "name": "`name' (for registration by individuals)",
     "net": "networking",
     "org": "non-commercial",
     "pro": "professionals",
+    "tel": "business telecommunications",
+    "travel": "travel and tourism",
     # These additional ccTLDs are included here even though they are not part
-    # of ISO 3166.  IANA has 5 reserved ccTLDs as described here:
-    #
-    # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html
+    # of ISO 3166.  IANA has a decoding table listing all reserved ccTLDs:
     #
-    # but I can't find an official list anywhere.
+    # http://www.iso.org/iso/iso-3166-1_decoding_table
     #
     # Note that `uk' is the common practice country code for the United
     # Kingdom.  AFAICT, the official `gb' code is routinely ignored!
@@ -292,9 +296,13 @@
     #
     # Also, `su', while obsolete is still in limited use.
     "ac": "Ascension Island",
-    "gg": "Guernsey",
-    "im": "Isle of Man",
-    "je": "Jersey",
+    "cp": "Clipperton Island",
+    "dg": "Diego Garcia",
+    "ea": "Ceuta, Melilla",
+    "eu": "European Union",
+    "fx": "Metropolitan France",
+    "ic": "Canary Islands",
+    "ta": "Tristan da Cunha",
     "uk": "United Kingdom (common practice)",
     "su": "Soviet Union (still in limited use)",
     }
@@ -303,6 +311,7 @@
 
 countries = {
     "af": "Afghanistan",
+    "ax": "Aland Islands",
     "al": "Albania",
     "dz": "Algeria",
     "as": "American Samoa",
@@ -328,7 +337,7 @@
     "bm": "Bermuda",
     "bt": "Bhutan",
     "bo": "Bolivia",
-    "ba": "Bosnia and Herzegowina",
+    "ba": "Bosnia and Herzegovina",
     "bw": "Botswana",
     "bv": "Bouvet Island",
     "br": "Brazil",
@@ -363,7 +372,6 @@
     "dj": "Djibouti",
     "dm": "Dominica",
     "do": "Dominican Republic",
-    "tp": "East Timor",
     "ec": "Ecuador",
     "eg": "Egypt",
     "sv": "El Salvador",
@@ -391,6 +399,7 @@
     "gp": "Guadeloupe",
     "gu": "Guam",
     "gt": "Guatemala",
+    "gg": "Guernsey",
     "gn": "Guinea",
     "gw": "Guinea-Bissau",
     "gy": "Guyana",
@@ -403,15 +412,17 @@
     "is": "Iceland",
     "in": "India",
     "id": "Indonesia",
-    "ir": "Iran, Islamic Republic of",
+    "ir": "Iran (Islamic Republic of)",
     "iq": "Iraq",
     "ie": "Ireland",
+    "im": "Isle of Man",
     "il": "Israel",
     "it": "Italy",
     "jm": "Jamaica",
     "jp": "Japan",
+    "je": "Jersey",
     "jo": "Jordan",
-    "kz": "Kazakstan",
+    "kz": "Kazakhstan",
     "ke": "Kenya",
     "ki": "Kiribati",
     "kp": "Korea, Democratic People's Republic of",
@@ -427,7 +438,7 @@
     "li": "Liechtenstein",
     "lt": "Lithuania",
     "lu": "Luxembourg",
-    "mo": "Macau",
+    "mo": "Macao",
     "mk": "Macedonia, The Former Yugoslav Republic of",
     "mg": "Madagascar",
     "mw": "Malawi",
@@ -445,6 +456,7 @@
     "md": "Moldova, Republic of",
     "mc": "Monaco",
     "mn": "Mongolia",
+    "me": "Montenegro",
     "ms": "Montserrat",
     "ma": "Morocco",
     "mz": "Mozambique",
@@ -491,6 +503,7 @@
     "st": "Sao Tome and Principe",
     "sa": "Saudi Arabia",
     "sn": "Senegal",
+    "rs": "Serbia",
     "sc": "Seychelles",
     "sl": "Sierra Leone",
     "sg": "Singapore",
@@ -505,6 +518,8 @@
     "sd": "Sudan",
     "sr": "Suriname",
     "sj": "Svalbard and Jan Mayen",
+    "sh": "St. Helena",
+    "pm": "St. Pierre and Miquelon",
     "sz": "Swaziland",
     "se": "Sweden",
     "ch": "Switzerland",
@@ -513,6 +528,7 @@
     "tj": "Tajikistan",
     "tz": "Tanzania, United Republic of",
     "th": "Thailand",
+    "tl": "Timor-Leste",
     "tg": "Togo",
     "tk": "Tokelau",
     "to": "Tonga",
@@ -531,10 +547,11 @@
     "uy": "Uruguay",
     "uz": "Uzbekistan",
     "vu": "Vanuatu",
+    "va": "Vatican City State (Holy See)",
     "ve": "Venezuela",
     "vn": "Viet Nam",
-    "vg": "Virgin Islands, British",
-    "vi": "Virgin Islands, U.S.",
+    "vg": "Virgin Islands (British)",
+    "vi": "Virgin Islands (U.S.)",
     "wf": "Wallis and Futuna",
     "eh": "Western Sahara",
     "ye": "Yemen",