[Tutor] Beautiful soup

David Holland davholla2002 at yahoo.co.uk
Tue Oct 4 18:41:25 CEST 2005


I tried to use this script which I found on the web :-
import urllib2, pprint 
from BeautifulSoup import BeautifulSoup 


def cellToWord(cell): 
   """Given a table cell, return the word in that
cell.""" 
   # Some words are in bold. 
   if cell('b'): 
      return cell.first('b').string.strip()      #
Return the bold piece. 
   else: 
      return cell.string.split('.')[1].strip()   #
Remove the number. 


def parse(url): 
   """Parse the given URL and return a dictionary
mapping US words to 
   foreign words.""" 


   # Read the URL and pass it to BeautifulSoup. 
   html = urllib2.urlopen(url).read() 
   soup = BeautifulSoup() 
   soup.feed(html) 


   # Read the main table, extracting the words from
the table cells. 
   USToForeign = {} 
   mainTable = soup.first('table') 
   rows = mainTable('tr') 
   for row in rows[1:]:        # Exclude the first
(headings) row. 
      cells = row('td') 
      if len(cells) == 3:      # Some rows have a
single colspan="3" cell. 
         US = cellToWord(cells[0]) 
         foreign = cellToWord(cells[1]) 
         USToForeign[US] = foreign 


   return USToForeign 


if __name__ == '__main__': 
   url =
'http://msdn.microsoft.com/library/en-us/dnwue/html/FRE_word_list.htm'

   USToForeign = parse(url) 
   pairs = USToForeign.items() 
   pairs.sort(lambda a, b: cmp(a[0].lower(),
b[0].lower()))  # Web page order 
   pprint.pprint(pairs)

and it works well.  However I change it to get it to
look at a file on my PC, then I get this message :-
Traceback (most recent call last):
  File "C:\Python24\beaexp2", line 43, in -toplevel-
    USToForeign = parse(url)
  File "C:\Python24\beaexp2", line 20, in parse
    html = urllib2.urlopen(url).read()
  File "C:\Python24\lib\urllib2.py", line 130, in
urlopen
    return _opener.open(url, data)
  File "C:\Python24\lib\urllib2.py", line 358, in open
    response = self._open(req, data)
  File "C:\Python24\lib\urllib2.py", line 376, in
_open
    '_open', req)
  File "C:\Python24\lib\urllib2.py", line 337, in
_call_chain
    result = func(*args)
  File "C:\Python24\lib\urllib2.py", line 1119, in
file_open
    return self.open_local_file(req)
  File "C:\Python24\lib\urllib2.py", line 1135, in
open_local_file
    stats = os.stat(localfile)
OSError: [Errno 2] No such file or directory:
'\\C:\\Python24\\FRE_word_list.htm
Any idea how to solve it ?  The file is on my PC.

I am using Python 2.4 on Win XP.

Thanks in advance.

David


		
___________________________________________________________ 
How much free photo storage do you get? Store your holiday 
snaps for FREE with Yahoo! Photos http://uk.photos.yahoo.com


More information about the Tutor mailing list