cannot get html content of tag with BeautifulSoup

Fri Jun 18 11:41:02 EDT 2010

Hello,

does anyone know how to get html contents of an tag with
BeautifulSoup? In example I'd like to get all html which is in first
 tag, i.e. This is paragraph one. as
unicode object

p.contents gives me a list which I cannot join TypeError: sequence
item 0: expected string, Tag found

Thanks!

from BeautifulSoup import BeautifulSoup
import re

doc = ['<html><head><title>Page title</title></head>',
       '<body><p id="firstpara" align="center"><span id="foo">This is
paragraph</span> <b>one</b>.</p>',
       '<p id="secondpara" align="blah">This is paragraph <b>two</b>.</
p>',
       '</body></html>']
soup = BeautifulSoup(''.join(doc))
#print soup.prettify()
r = re.compile(r'<[^<]*?/?>')
for i, p in enumerate(soup.findAll('p')):
    #print type(p) #<class 'BeautifulSoup.Tag'>
    #print type(p.contents) #list
    content = "".join(p.contents) #fails

    p_without_html = r.sub(' ', content)
    print p_without_html