Sanitizing untrusted code for eval()

Paul McGuire ptmcg at austin.rr.com
Tue Aug 23 07:29:30 EDT 2005


Here's the pyparsing rendition - about 24 lines of code, and another 30
for testing.
For reference, here's the JSON "bnf":

object
    { members }
    {}
members
    string : value
    members , string : value
array
    [ elements ]
    []
elements
    value
    elements , value
value
    string
    number
    object
    array
    true
    false
    null

Download pyparsing at http://pyparsing.sourceforge.net.

-- Paul

from pyparsing import *

TRUE = Keyword("true")
FALSE = Keyword("false")
NULL = Keyword("null")

jsonString = dblQuotedString.setParseAction( removeQuotes )
jsonNumber = Combine( Optional('-') + ( '0' | Word('123456789',nums) )
+
                    Optional( '.' + Word(nums) ) +
                    Optional( Word('eE',exact=1) + Word(nums+'+-',nums)
) )

jsonObject = Forward()
jsonValue = Forward()
jsonElements = delimitedList( jsonValue )
jsonArray = Group( Suppress('[') + jsonElements + Suppress(']') )
jsonValue << ( jsonString | jsonNumber | jsonObject  | jsonArray | TRUE
| FALSE | NULL )
memberDef = Group( jsonString + Suppress(':') + jsonValue )
jsonMembers = delimitedList( memberDef )
jsonObject << Dict( Suppress('{') + jsonMembers + Suppress('}') )

lineComment = '//' + restOfLine
jsonComment = FollowedBy('/') + ( cStyleComment | lineComment )
jsonObject.ignore( jsonComment )

testdata = """
{
        "glossary": {
            "title": "example glossary",
                "GlossDiv": {
                        "title": "S",
                        "GlossList": [{
                "ID": "SGML",
                "SortAs": "SGML",
                "GlossTerm": "Standard Generalized Markup Language",
                "Acronym": "SGML",
                "Abbrev": "ISO 8879:1986",
                "GlossDef":
"A meta-markup language, used to create markup languages such as
DocBook.",
                "GlossSeeAlso": ["GML", "XML", "markup"]
            }]
        }
    }
}
"""

results = jsonObject.parseString(testdata)

import pprint
pprint.pprint( results.asList() )
print results.glossary.title
print results.glossary.GlossDiv
print results.glossary.GlossDiv.GlossList.keys()

Prints out (I've inserted blank lines to separate the output from the
different print statements):
[['glossary',
  ['title', 'example glossary'],
  ['GlossDiv',
   ['title', 'S'],
   ['GlossList',
    [['ID', 'SGML'],
     ['SortAs', 'SGML'],
     ['GlossTerm', 'Standard Generalized Markup Language'],
     ['Acronym', 'SGML'],
     ['Abbrev', 'ISO 8879:1986'],
     ['GlossDef',
      'A meta-markup language, used to create markup languages such as
DocBook.'],
     ['GlossSeeAlso', ['GML', 'XML', 'markup']]]]]]]

example glossary

[['title', 'S'], ['GlossList', [['ID', 'SGML'], ['SortAs', 'SGML'],
['GlossTerm', 'Standard Generalized Markup Language'], ['Acronym',
'SGML'], ['Abbrev', 'ISO 8879:1986'], ['GlossDef', 'A meta-markup
language, used to create markup languages such as DocBook.'],
['GlossSeeAlso', ['GML', 'XML', 'markup']]]]]

['GlossSeeAlso', 'GlossDef', 'Acronym', 'GlossTerm', 'SortAs',
'Abbrev', 'ID']




More information about the Python-list mailing list