[pypy-commit] pypy stdlib-2.7.6: merge default
bdkearns
noreply at buildbot.pypy.org
Fri Mar 7 01:38:57 CET 2014
Author: Brian Kearns <bdkearns at gmail.com>
Branch: stdlib-2.7.6
Changeset: r69769:f3ee559b99c5
Date: 2014-03-06 19:38 -0500
http://bitbucket.org/pypy/pypy/changeset/f3ee559b99c5/
Log: merge default
diff too long, truncating to 2000 out of 4725 lines
diff --git a/lib_pypy/cffi/_pycparser/README b/lib_pypy/cffi/_pycparser/README
new file mode 100644
--- /dev/null
+++ b/lib_pypy/cffi/_pycparser/README
@@ -0,0 +1,12 @@
+This is a copy of pycparser. See __init__.py for the version.
+
+Note that the following two lines have been modified in c_parser.py:
+
+
+class CParser(PLYParser):
+ def __init__(
+ ...
+ lextab='cffi._pycparser.lextab',
+ ^^^^^^^^^^^^^^^
+ yacctab='cffi._pycparser.yacctab',
+ ^^^^^^^^^^^^^^^
diff --git a/lib_pypy/cffi/_pycparser/__init__.py b/lib_pypy/cffi/_pycparser/__init__.py
--- a/lib_pypy/cffi/_pycparser/__init__.py
+++ b/lib_pypy/cffi/_pycparser/__init__.py
@@ -1,14 +1,14 @@
#-----------------------------------------------------------------
# pycparser: __init__.py
#
-# This package file exports some convenience functions for
+# This package file exports some convenience functions for
# interacting with pycparser
#
# Copyright (C) 2008-2012, Eli Bendersky
# License: BSD
#-----------------------------------------------------------------
__all__ = ['c_lexer', 'c_parser', 'c_ast']
-__version__ = '2.09.1'
+__version__ = '2.10'
from subprocess import Popen, PIPE
from .c_parser import CParser
@@ -26,12 +26,12 @@
arguments.
When successful, returns the preprocessed file's contents.
- Errors from cpp will be printed out.
+ Errors from cpp will be printed out.
"""
path_list = [cpp_path]
if isinstance(cpp_args, list):
path_list += cpp_args
- elif cpp_args != '':
+ elif cpp_args != '':
path_list += [cpp_args]
path_list += [filename]
@@ -39,8 +39,8 @@
# Note the use of universal_newlines to treat all newlines
# as \n for Python's purpose
#
- pipe = Popen( path_list,
- stdout=PIPE,
+ pipe = Popen( path_list,
+ stdout=PIPE,
universal_newlines=True)
text = pipe.communicate()[0]
except OSError as e:
@@ -77,10 +77,10 @@
parser:
Optional parser object to be used instead of the default CParser
- When successful, an AST is returned. ParseError can be
+ When successful, an AST is returned. ParseError can be
thrown if the file doesn't parse successfully.
- Errors from cpp will be printed out.
+ Errors from cpp will be printed out.
"""
if use_cpp:
text = preprocess_file(filename, cpp_path, cpp_args)
diff --git a/lib_pypy/cffi/_pycparser/_build_tables.py b/lib_pypy/cffi/_pycparser/_build_tables.py
--- a/lib_pypy/cffi/_pycparser/_build_tables.py
+++ b/lib_pypy/cffi/_pycparser/_build_tables.py
@@ -1,7 +1,7 @@
#-----------------------------------------------------------------
# pycparser: _build_tables.py
#
-# A dummy for generating the lexing/parsing tables and and
+# A dummy for generating the lexing/parsing tables and and
# compiling them into .pyc for faster execution in optimized mode.
# Also generates AST code from the configuration file.
# Should be called from the pycparser directory.
@@ -17,14 +17,14 @@
ast_gen.generate(open('c_ast.py', 'w'))
import sys
-sys.path.extend(['.', '..'])
+sys.path[0:0] = ['.', '..']
from pycparser import c_parser
# Generates the tables
#
c_parser.CParser(
- lex_optimize=True,
- yacc_debug=False,
+ lex_optimize=True,
+ yacc_debug=False,
yacc_optimize=True)
# Load to compile into .pyc
diff --git a/lib_pypy/cffi/_pycparser/_c_ast.cfg b/lib_pypy/cffi/_pycparser/_c_ast.cfg
--- a/lib_pypy/cffi/_pycparser/_c_ast.cfg
+++ b/lib_pypy/cffi/_pycparser/_c_ast.cfg
@@ -29,7 +29,7 @@
Cast: [to_type*, expr*]
-# Compound statement in C99 is a list of block items (declarations or
+# Compound statement in C99 is a list of block items (declarations or
# statements).
#
Compound: [block_items**]
@@ -37,7 +37,7 @@
# Compound literal (anonymous aggregate) for C99.
# (type-name) {initializer_list}
# type: the typename
-# init: InitExprList for the initializer list
+# init: InitList for the initializer list
#
CompoundLiteral: [type*, init*]
diff --git a/lib_pypy/cffi/_pycparser/c_generator.py b/lib_pypy/cffi/_pycparser/c_generator.py
--- a/lib_pypy/cffi/_pycparser/c_generator.py
+++ b/lib_pypy/cffi/_pycparser/c_generator.py
@@ -11,34 +11,34 @@
class CGenerator(object):
""" Uses the same visitor pattern as c_ast.NodeVisitor, but modified to
- return a value from each visit method, using string accumulation in
+ return a value from each visit method, using string accumulation in
generic_visit.
"""
def __init__(self):
self.output = ''
-
+
# Statements start with indentation of self.indent_level spaces, using
# the _make_indent method
#
self.indent_level = 0
-
+
def _make_indent(self):
return ' ' * self.indent_level
-
+
def visit(self, node):
method = 'visit_' + node.__class__.__name__
return getattr(self, method, self.generic_visit)(node)
-
+
def generic_visit(self, node):
#~ print('generic:', type(node))
if node is None:
return ''
else:
return ''.join(self.visit(c) for c in node.children())
-
+
def visit_Constant(self, n):
return n.value
-
+
def visit_ID(self, n):
return n.name
@@ -61,22 +61,22 @@
elif n.op == 'p--':
return '%s--' % operand
elif n.op == 'sizeof':
- # Always parenthesize the argument of sizeof since it can be
+ # Always parenthesize the argument of sizeof since it can be
# a name.
return 'sizeof(%s)' % self.visit(n.expr)
else:
return '%s%s' % (n.op, operand)
def visit_BinaryOp(self, n):
- lval_str = self._parenthesize_if(n.left,
+ lval_str = self._parenthesize_if(n.left,
lambda d: not self._is_simple_node(d))
- rval_str = self._parenthesize_if(n.right,
+ rval_str = self._parenthesize_if(n.right,
lambda d: not self._is_simple_node(d))
return '%s %s %s' % (lval_str, n.op, rval_str)
def visit_Assignment(self, n):
rval_str = self._parenthesize_if(
- n.rvalue,
+ n.rvalue,
lambda n: isinstance(n, c_ast.Assignment))
return '%s %s %s' % (self.visit(n.lvalue), n.op, rval_str)
@@ -101,7 +101,7 @@
def visit_DeclList(self, n):
s = self.visit(n.decls[0])
if len(n.decls) > 1:
- s += ', ' + ', '.join(self.visit_Decl(decl, no_type=True)
+ s += ', ' + ', '.join(self.visit_Decl(decl, no_type=True)
for decl in n.decls[1:])
return s
@@ -112,7 +112,7 @@
return s
def visit_Cast(self, n):
- s = '(' + self._generate_type(n.to_type) + ')'
+ s = '(' + self._generate_type(n.to_type) + ')'
return s + ' ' + self._parenthesize_unless_simple(n.expr)
def visit_ExprList(self, n):
@@ -127,8 +127,10 @@
def visit_InitList(self, n):
visited_subexprs = []
for expr in n.exprs:
- if isinstance(expr, c_ast.InitList):
+ if isinstance(expr, c_ast.ExprList):
visited_subexprs.append('(' + self.visit(expr) + ')')
+ elif isinstance(expr, c_ast.InitList):
+ visited_subexprs.append('{' + self.visit(expr) + '}')
else:
visited_subexprs.append(self.visit(expr))
return ', '.join(visited_subexprs)
@@ -140,9 +142,9 @@
s += ' {'
for i, enumerator in enumerate(n.values.enumerators):
s += enumerator.name
- if enumerator.value:
+ if enumerator.value:
s += ' = ' + self.visit(enumerator.value)
- if i != len(n.values.enumerators) - 1:
+ if i != len(n.values.enumerators) - 1:
s += ', '
s += '}'
return s
@@ -203,7 +205,7 @@
if n.cond: s += self.visit(n.cond)
s += ')\n'
s += self._generate_stmt(n.iftrue, add_indent=True)
- if n.iffalse:
+ if n.iffalse:
s += self._make_indent() + 'else\n'
s += self._generate_stmt(n.iffalse, add_indent=True)
return s
@@ -265,7 +267,7 @@
def visit_Typename(self, n):
return self._generate_type(n.type)
-
+
def visit_Union(self, n):
return self._generate_struct_union(n, 'union')
@@ -280,13 +282,13 @@
return s
def _generate_struct_union(self, n, name):
- """ Generates code for structs and unions. name should be either
+ """ Generates code for structs and unions. name should be either
'struct' or union.
"""
s = name + ' ' + (n.name or '')
if n.decls:
s += '\n'
- s += self._make_indent()
+ s += self._make_indent()
self.indent_level += 2
s += '{\n'
for decl in n.decls:
@@ -297,25 +299,26 @@
def _generate_stmt(self, n, add_indent=False):
""" Generation from a statement node. This method exists as a wrapper
- for individual visit_* methods to handle different treatment of
+ for individual visit_* methods to handle different treatment of
some statements in this context.
"""
typ = type(n)
if add_indent: self.indent_level += 2
indent = self._make_indent()
if add_indent: self.indent_level -= 2
-
- if typ in (
+
+ if typ in (
c_ast.Decl, c_ast.Assignment, c_ast.Cast, c_ast.UnaryOp,
c_ast.BinaryOp, c_ast.TernaryOp, c_ast.FuncCall, c_ast.ArrayRef,
- c_ast.StructRef, c_ast.Constant, c_ast.ID, c_ast.Typedef):
+ c_ast.StructRef, c_ast.Constant, c_ast.ID, c_ast.Typedef,
+ c_ast.ExprList):
# These can also appear in an expression context so no semicolon
# is added to them automatically
#
return indent + self.visit(n) + ';\n'
elif typ in (c_ast.Compound,):
- # No extra indentation required before the opening brace of a
- # compound - because it consists of multiple lines it has to
+ # No extra indentation required before the opening brace of a
+ # compound - because it consists of multiple lines it has to
# compute its own indentation.
#
return self.visit(n)
@@ -330,21 +333,21 @@
if n.storage: s += ' '.join(n.storage) + ' '
s += self._generate_type(n.type)
return s
-
+
def _generate_type(self, n, modifiers=[]):
- """ Recursive generation from a type node. n is the type node.
- modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers
+ """ Recursive generation from a type node. n is the type node.
+ modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers
encountered on the way down to a TypeDecl, to allow proper
generation from it.
"""
typ = type(n)
#~ print(n, modifiers)
-
+
if typ == c_ast.TypeDecl:
s = ''
if n.quals: s += ' '.join(n.quals) + ' '
s += self.visit(n.type)
-
+
nstr = n.declname if n.declname else ''
# Resolve modifiers.
# Wrap in parens to distinguish pointer to array and pointer to
@@ -396,7 +399,7 @@
""" Returns True for nodes that are "simple" - i.e. nodes that always
have higher precedence than operators.
"""
- return isinstance(n,( c_ast.Constant, c_ast.ID, c_ast.ArrayRef,
+ return isinstance(n,( c_ast.Constant, c_ast.ID, c_ast.ArrayRef,
c_ast.StructRef, c_ast.FuncCall))
diff --git a/lib_pypy/cffi/_pycparser/c_lexer.py b/lib_pypy/cffi/_pycparser/c_lexer.py
--- a/lib_pypy/cffi/_pycparser/c_lexer.py
+++ b/lib_pypy/cffi/_pycparser/c_lexer.py
@@ -1,11 +1,11 @@
+#------------------------------------------------------------------------------
# pycparser: c_lexer.py
#
# CLexer class: lexer for the C language
#
-# Copyright (C) 2008-2011, Eli Bendersky
+# Copyright (C) 2008-2013, Eli Bendersky
# License: BSD
-#-----------------------------------------------------------------
-
+#------------------------------------------------------------------------------
import re
import sys
@@ -15,41 +15,50 @@
class CLexer(object):
""" A lexer for the C language. After building it, set the
- input text with input(), and call token() to get new
+ input text with input(), and call token() to get new
tokens.
-
+
The public attribute filename can be set to an initial
- filaneme, but the lexer will update it upon #line
+ filaneme, but the lexer will update it upon #line
directives.
"""
- def __init__(self, error_func, type_lookup_func):
+ def __init__(self, error_func, on_lbrace_func, on_rbrace_func,
+ type_lookup_func):
""" Create a new Lexer.
-
+
error_func:
An error function. Will be called with an error
- message, line and column as arguments, in case of
+ message, line and column as arguments, in case of
an error during lexing.
-
+
+ on_lbrace_func, on_rbrace_func:
+ Called when an LBRACE or RBRACE is encountered
+ (likely to push/pop type_lookup_func's scope)
+
type_lookup_func:
A type lookup function. Given a string, it must
return True IFF this string is a name of a type
that was defined with a typedef earlier.
"""
self.error_func = error_func
+ self.on_lbrace_func = on_lbrace_func
+ self.on_rbrace_func = on_rbrace_func
self.type_lookup_func = type_lookup_func
self.filename = ''
-
+
+ # Keeps track of the last token returned from self.token()
+ self.last_token = None
+
# Allow either "# line" or "# <num>" to support GCC's
# cpp output
#
self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
-
self.pragma_pattern = re.compile('[ \t]*pragma\W')
def build(self, **kwargs):
""" Builds the lexer from the specification. Must be
- called after the lexer object is created.
-
+ called after the lexer object is created.
+
This method exists separately, because the PLY
manual warns against calling lex.lex inside
__init__
@@ -63,10 +72,10 @@
def input(self, text):
self.lexer.input(text)
-
+
def token(self):
- g = self.lexer.token()
- return g
+ self.last_token = self.lexer.token()
+ return self.last_token
def find_tok_column(self, token):
""" Find the column of the token in its line.
@@ -75,7 +84,7 @@
return token.lexpos - last_cr
######################-- PRIVATE --######################
-
+
##
## Internal auxiliary methods
##
@@ -83,10 +92,10 @@
location = self._make_tok_location(token)
self.error_func(msg, location[0], location[1])
self.lexer.skip(1)
-
+
def _make_tok_location(self, token):
return (token.lineno, self.find_tok_column(token))
-
+
##
## Reserved keywords
##
@@ -113,35 +122,35 @@
##
tokens = keywords + (
# Identifiers
- 'ID',
-
- # Type identifiers (identifiers previously defined as
+ 'ID',
+
+ # Type identifiers (identifiers previously defined as
# types with typedef)
'TYPEID',
-
- # constants
+
+ # constants
'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
'FLOAT_CONST', 'HEX_FLOAT_CONST',
'CHAR_CONST',
'WCHAR_CONST',
-
+
# String literals
'STRING_LITERAL',
'WSTRING_LITERAL',
- # Operators
+ # Operators
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
-
+
# Assignment
- 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
+ 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
'PLUSEQUAL', 'MINUSEQUAL',
- 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
+ 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
'OREQUAL',
- # Increment/decrement
+ # Increment/decrement
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
@@ -149,18 +158,18 @@
# Conditional operator (?)
'CONDOP',
-
- # Delimeters
+
+ # Delimeters
'LPAREN', 'RPAREN', # ( )
'LBRACKET', 'RBRACKET', # [ ]
- 'LBRACE', 'RBRACE', # { }
+ 'LBRACE', 'RBRACE', # { }
'COMMA', 'PERIOD', # . ,
'SEMI', 'COLON', # ; :
# Ellipsis (...)
'ELLIPSIS',
-
- # pre-processor
+
+ # pre-processor
'PPHASH', # '#'
)
@@ -169,18 +178,18 @@
##
##
- # valid C identifiers (K&R2: A.2.3)
- identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
+ # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
+ identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
hex_prefix = '0[xX]'
hex_digits = '[0-9a-fA-F]+'
# integer constants (K&R2: A.2.5.1)
- integer_suffix_opt = r'(u?ll|U?LL|([uU][lL])|([lL][uU])|[uU]|[lL])?'
+ integer_suffix_opt = r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
octal_constant = '0[0-7]*'+integer_suffix_opt
hex_constant = hex_prefix+hex_digits+integer_suffix_opt
-
+
bad_octal_constant = '0[0-7]*[89]'
# character constants (K&R2: A.2.5.2)
@@ -196,14 +205,14 @@
bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
- cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
+ cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
char_const = "'"+cconst_char+"'"
wchar_const = 'L'+char_const
unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
# string literals (K&R2: A.2.6)
- string_char = r"""([^"\\\n]|"""+escape_sequence+')'
+ string_char = r"""([^"\\\n]|"""+escape_sequence+')'
string_literal = '"'+string_char+'*"'
wstring_literal = 'L'+string_literal
bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
@@ -221,14 +230,14 @@
##
states = (
# ppline: preprocessor line directives
- #
+ #
('ppline', 'exclusive'),
# pppragma: pragma
#
('pppragma', 'exclusive'),
)
-
+
def t_PPHASH(self, t):
r'[ \t]*\#'
if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
@@ -239,7 +248,7 @@
else:
t.type = 'PPHASH'
return t
-
+
##
## Rules for the ppline state
##
@@ -261,21 +270,21 @@
def t_ppline_NEWLINE(self, t):
r'\n'
-
+
if self.pp_line is None:
self._error('line number missing in #line', t)
else:
self.lexer.lineno = int(self.pp_line)
-
+
if self.pp_filename is not None:
self.filename = self.pp_filename
-
+
t.lexer.begin('INITIAL')
def t_ppline_PPLINE(self, t):
r'line'
pass
-
+
t_ppline_ignore = ' \t'
def t_ppline_error(self, t):
@@ -292,7 +301,7 @@
def t_pppragma_PPPRAGMA(self, t):
r'pragma'
pass
-
+
t_pppragma_ignore = ' \t<>.-{}();+-*/$%@&^~!?:,0123456789'
@TOKEN(string_literal)
@@ -364,17 +373,36 @@
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
- t_LBRACE = r'\{'
- t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
- t_STRING_LITERAL = string_literal
-
- # The following floating and integer constants are defined as
+ # Scope delimiters
+ # To see why on_lbrace_func is needed, consider:
+ # typedef char TT;
+ # void foo(int TT) { TT = 10; }
+ # TT x = 5;
+ # Outside the function, TT is a typedef, but inside (starting and ending
+ # with the braces) it's a parameter. The trouble begins with yacc's
+ # lookahead token. If we open a new scope in brace_open, then TT has
+ # already been read and incorrectly interpreted as TYPEID. So, we need
+ # to open and close scopes from within the lexer.
+ # Similar for the TT immediately outside the end of the function.
+ #
+ @TOKEN(r'\{')
+ def t_LBRACE(self, t):
+ self.on_lbrace_func()
+ return t
+ @TOKEN(r'\}')
+ def t_RBRACE(self, t):
+ self.on_rbrace_func()
+ return t
+
+ t_STRING_LITERAL = string_literal
+
+ # The following floating and integer constants are defined as
# functions to impose a strict order (otherwise, decimal
# is placed before the others because its regex is longer,
# and this is bad)
@@ -404,17 +432,17 @@
def t_INT_CONST_DEC(self, t):
return t
- # Must come before bad_char_const, to prevent it from
+ # Must come before bad_char_const, to prevent it from
# catching valid char constants as invalid
- #
+ #
@TOKEN(char_const)
def t_CHAR_CONST(self, t):
return t
-
+
@TOKEN(wchar_const)
def t_WCHAR_CONST(self, t):
return t
-
+
@TOKEN(unmatched_quote)
def t_UNMATCHED_QUOTE(self, t):
msg = "Unmatched '"
@@ -428,12 +456,12 @@
@TOKEN(wstring_literal)
def t_WSTRING_LITERAL(self, t):
return t
-
+
# unmatched string literals are caught by the preprocessor
-
+
@TOKEN(bad_string_literal)
def t_BAD_STRING_LITERAL(self, t):
- msg = "String contains invalid escape code"
+ msg = "String contains invalid escape code"
self._error(msg, t)
@TOKEN(identifier)
@@ -442,40 +470,8 @@
if t.type == 'ID' and self.type_lookup_func(t.value):
t.type = "TYPEID"
return t
-
+
def t_error(self, t):
msg = 'Illegal character %s' % repr(t.value[0])
self._error(msg, t)
-
-if __name__ == "__main__":
- filename = '../zp.c'
- text = open(filename).read()
-
- #~ text = '"'+r"""ka \p ka"""+'"'
- text = r"""
- 546
- #line 66 "kwas\df.h"
- id 4
- # 5
- dsf
- """
-
- def errfoo(msg, a, b):
- sys.write(msg + "\n")
- sys.exit()
-
- def typelookup(namd):
- return False
-
- clex = CLexer(errfoo, typelookup)
- clex.build()
- clex.input(text)
-
- while 1:
- tok = clex.token()
- if not tok: break
-
- printme([tok.value, tok.type, tok.lineno, clex.filename, tok.lexpos])
-
-
diff --git a/lib_pypy/cffi/_pycparser/c_parser.py b/lib_pypy/cffi/_pycparser/c_parser.py
--- a/lib_pypy/cffi/_pycparser/c_parser.py
+++ b/lib_pypy/cffi/_pycparser/c_parser.py
@@ -3,7 +3,7 @@
#
# CParser class: Parser and AST builder for the C language
#
-# Copyright (C) 2008-2012, Eli Bendersky
+# Copyright (C) 2008-2013, Eli Bendersky
# License: BSD
#------------------------------------------------------------------------------
import re
@@ -16,64 +16,66 @@
from .ast_transforms import fix_switch_cases
-class CParser(PLYParser):
+class CParser(PLYParser):
def __init__(
- self,
+ self,
lex_optimize=True,
lextab='cffi._pycparser.lextab',
yacc_optimize=True,
yacctab='cffi._pycparser.yacctab',
yacc_debug=False):
""" Create a new CParser.
-
+
Some arguments for controlling the debug/optimization
- level of the parser are provided. The defaults are
- tuned for release/performance mode.
+ level of the parser are provided. The defaults are
+ tuned for release/performance mode.
The simple rules for using them are:
*) When tweaking CParser/CLexer, set these to False
*) When releasing a stable parser, set to True
-
+
lex_optimize:
Set to False when you're modifying the lexer.
Otherwise, changes in the lexer won't be used, if
some lextab.py file exists.
When releasing with a stable lexer, set to True
- to save the re-generation of the lexer table on
+ to save the re-generation of the lexer table on
each run.
-
+
lextab:
Points to the lex table that's used for optimized
mode. Only if you're modifying the lexer and want
- some tests to avoid re-generating the table, make
+ some tests to avoid re-generating the table, make
this point to a local lex table file (that's been
earlier generated with lex_optimize=True)
-
+
yacc_optimize:
Set to False when you're modifying the parser.
Otherwise, changes in the parser won't be used, if
some parsetab.py file exists.
When releasing with a stable parser, set to True
- to save the re-generation of the parser table on
+ to save the re-generation of the parser table on
each run.
-
+
yacctab:
Points to the yacc table that's used for optimized
- mode. Only if you're modifying the parser, make
+ mode. Only if you're modifying the parser, make
this point to a local yacc table file
-
+
yacc_debug:
Generate a parser.out file that explains how yacc
built the parsing table from the grammar.
"""
self.clex = CLexer(
error_func=self._lex_error_func,
+ on_lbrace_func=self._lex_on_lbrace_func,
+ on_rbrace_func=self._lex_on_rbrace_func,
type_lookup_func=self._lex_type_lookup_func)
-
+
self.clex.build(
optimize=lex_optimize,
lextab=lextab)
self.tokens = self.clex.tokens
-
+
rules_with_opt = [
'abstract_declarator',
'assignment_expression',
@@ -89,74 +91,118 @@
'type_qualifier_list',
'struct_declarator_list'
]
-
+
for rule in rules_with_opt:
self._create_opt_rule(rule)
-
+
self.cparser = yacc.yacc(
- module=self,
+ module=self,
start='translation_unit_or_empty',
debug=yacc_debug,
optimize=yacc_optimize,
tabmodule=yacctab)
-
- # Stack of scopes for keeping track of typedefs. _scope_stack[-1] is
- # the current (topmost) scope.
- #
- self._scope_stack = [set()]
-
+
+ # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
+ # the current (topmost) scope. Each scope is a dictionary that
+ # specifies whether a name is a type. If _scope_stack[n][name] is
+ # True, 'name' is currently a type in the scope. If it's False,
+ # 'name' is used in the scope but not as a type (for instance, if we
+ # saw: int name;
+ # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
+ # in this scope at all.
+ self._scope_stack = [dict()]
+
+ # Keeps track of the last token given to yacc (the lookahead token)
+ self._last_yielded_token = None
+
def parse(self, text, filename='', debuglevel=0):
""" Parses C code and returns an AST.
-
+
text:
A string containing the C source code
-
+
filename:
Name of the file being parsed (for meaningful
error messages)
-
+
debuglevel:
Debug level to yacc
"""
self.clex.filename = filename
self.clex.reset_lineno()
- self._scope_stack = [set()]
- return self.cparser.parse(text, lexer=self.clex, debug=debuglevel)
-
+ self._scope_stack = [dict()]
+ self._last_yielded_token = None
+ return self.cparser.parse(
+ input=text,
+ lexer=self.clex,
+ debug=debuglevel)
+
######################-- PRIVATE --######################
-
+
def _push_scope(self):
- self._scope_stack.append(set())
+ self._scope_stack.append(dict())
def _pop_scope(self):
assert len(self._scope_stack) > 1
self._scope_stack.pop()
- def _add_typedef_type(self, name):
- """ Add a new typedef-name to the current scope
+ def _add_typedef_name(self, name, coord):
+ """ Add a new typedef name (ie a TYPEID) to the current scope
"""
- self._scope_stack[-1].add(name)
- #~ print(self._scope_stack)
+ if not self._scope_stack[-1].get(name, True):
+ self._parse_error(
+ "Typedef %r previously declared as non-typedef "
+ "in this scope" % name, coord)
+ self._scope_stack[-1][name] = True
+
+ def _add_identifier(self, name, coord):
+ """ Add a new object, function, or enum member name (ie an ID) to the
+ current scope
+ """
+ if self._scope_stack[-1].get(name, False):
+ self._parse_error(
+ "Non-typedef %r previously declared as typedef "
+ "in this scope" % name, coord)
+ self._scope_stack[-1][name] = False
def _is_type_in_scope(self, name):
""" Is *name* a typedef-name in the current scope?
"""
- return any(name in scope for scope in self._scope_stack)
+ for scope in reversed(self._scope_stack):
+ # If name is an identifier in this scope it shadows typedefs in
+ # higher scopes.
+ in_scope = scope.get(name)
+ if in_scope is not None: return in_scope
+ return False
def _lex_error_func(self, msg, line, column):
self._parse_error(msg, self._coord(line, column))
-
+
+ def _lex_on_lbrace_func(self):
+ self._push_scope()
+
+ def _lex_on_rbrace_func(self):
+ self._pop_scope()
+
def _lex_type_lookup_func(self, name):
""" Looks up types that were previously defined with
- typedef.
+ typedef.
Passed to the lexer for recognizing identifiers that
are types.
"""
- return self._is_type_in_scope(name)
-
- # To understand what's going on here, read sections A.8.5 and
+ is_type = self._is_type_in_scope(name)
+ return is_type
+
+ def _get_yacc_lookahead_token(self):
+ """ We need access to yacc's lookahead token in certain cases.
+ This is the last token yacc requested from the lexer, so we
+ ask the lexer.
+ """
+ return self.clex.last_token
+
+ # To understand what's going on here, read sections A.8.5 and
# A.8.6 of K&R2 very carefully.
- #
+ #
# A C type consists of a basic type declaration, with a list
# of modifiers. For example:
#
@@ -166,7 +212,7 @@
# the array are the modifiers.
#
# Basic declarations are represented by TypeDecl (from module
- # c_ast) and the modifiers are FuncDecl, PtrDecl and
+ # c_ast) and the modifiers are FuncDecl, PtrDecl and
# ArrayDecl.
#
# The standard states that whenever a new modifier is parsed,
@@ -175,41 +221,41 @@
#
# K&R2 A.8.6.2: Array Declarators
#
- # In a declaration T D where D has the form
- # D1 [constant-expression-opt]
- # and the type of the identifier in the declaration T D1 is
- # "type-modifier T", the type of the
+ # In a declaration T D where D has the form
+ # D1 [constant-expression-opt]
+ # and the type of the identifier in the declaration T D1 is
+ # "type-modifier T", the type of the
# identifier of D is "type-modifier array of T"
#
# This is what this method does. The declarator it receives
- # can be a list of declarators ending with TypeDecl. It
- # tacks the modifier to the end of this list, just before
+ # can be a list of declarators ending with TypeDecl. It
+ # tacks the modifier to the end of this list, just before
# the TypeDecl.
#
- # Additionally, the modifier may be a list itself. This is
+ # Additionally, the modifier may be a list itself. This is
# useful for pointers, that can come as a chain from the rule
- # p_pointer. In this case, the whole modifier list is spliced
+ # p_pointer. In this case, the whole modifier list is spliced
# into the new location.
#
def _type_modify_decl(self, decl, modifier):
""" Tacks a type modifier on a declarator, and returns
the modified declarator.
-
+
Note: the declarator and modifier may be modified
"""
#~ print '****'
#~ decl.show(offset=3)
#~ modifier.show(offset=3)
#~ print '****'
-
+
modifier_head = modifier
modifier_tail = modifier
-
+
# The modifier may be a nested list. Reach its tail.
#
- while modifier_tail.type:
+ while modifier_tail.type:
modifier_tail = modifier_tail.type
-
+
# If the decl is a basic type, just tack the modifier onto
# it
#
@@ -222,29 +268,29 @@
# pointing to the underlying basic type.
#
decl_tail = decl
-
+
while not isinstance(decl_tail.type, c_ast.TypeDecl):
decl_tail = decl_tail.type
-
+
modifier_tail.type = decl_tail.type
decl_tail.type = modifier_head
return decl
# Due to the order in which declarators are constructed,
# they have to be fixed in order to look like a normal AST.
- #
+ #
# When a declaration arrives from syntax construction, it has
# these problems:
# * The innermost TypeDecl has no type (because the basic
# type is only known at the uppermost declaration level)
# * The declaration has no variable name, since that is saved
# in the innermost TypeDecl
- # * The typename of the declaration is a list of type
+ # * The typename of the declaration is a list of type
# specifiers, and not a node. Here, basic identifier types
# should be separated from more complex types like enums
# and structs.
#
- # This method fixes these problem.
+ # This method fixes these problems.
#
def _fix_decl_name_type(self, decl, typename):
""" Fixes a declaration. Modifies decl.
@@ -254,13 +300,13 @@
type = decl
while not isinstance(type, c_ast.TypeDecl):
type = type.type
-
+
decl.name = type.declname
type.quals = decl.quals
-
- # The typename is a list of types. If any type in this
+
+ # The typename is a list of types. If any type in this
# list isn't an IdentifierType, it must be the only
- # type in the list (it's illegal to declare "int enum .."
+ # type in the list (it's illegal to declare "int enum ..")
# If all the types are basic, they're collected in the
# IdentifierType holder.
#
@@ -272,14 +318,25 @@
else:
type.type = tn
return decl
-
- # At this point, we know that typename is a list of IdentifierType
- # nodes. Concatenate all the names into a single list.
- type.type = c_ast.IdentifierType(
- [name for id in typename for name in id.names],
- coord=typename[0].coord)
+
+ if not typename:
+ # Functions default to returning int
+ #
+ if not isinstance(decl.type, c_ast.FuncDecl):
+ self._parse_error(
+ "Missing type in declaration", decl.coord)
+ type.type = c_ast.IdentifierType(
+ ['int'],
+ coord=decl.coord)
+ else:
+ # At this point, we know that typename is a list of IdentifierType
+ # nodes. Concatenate all the names into a single list.
+ #
+ type.type = c_ast.IdentifierType(
+ [name for id in typename for name in id.names],
+ coord=typename[0].coord)
return decl
-
+
def _add_declaration_specifier(self, declspec, newspec, kind):
""" Declaration specifiers are represented by a dictionary
with the entries:
@@ -287,31 +344,115 @@
* storage: a list of storage type qualifiers
* type: a list of type specifiers
* function: a list of function specifiers
-
- This method is given a declaration specifier, and a
+
+ This method is given a declaration specifier, and a
new specifier of a given kind.
- Returns the declaration specifier, with the new
+ Returns the declaration specifier, with the new
specifier incorporated.
"""
spec = declspec or dict(qual=[], storage=[], type=[], function=[])
spec[kind].insert(0, newspec)
return spec
-
- def _build_function_definition(self, decl, spec, param_decls, body):
+
+ def _build_declarations(self, spec, decls, typedef_namespace=False):
+ """ Builds a list of declarations all sharing the given specifiers.
+ If typedef_namespace is true, each declared name is added
+ to the "typedef namespace", which also includes objects,
+ functions, and enum constants.
+ """
+ is_typedef = 'typedef' in spec['storage']
+ declarations = []
+
+ # Bit-fields are allowed to be unnamed.
+ #
+ if decls[0].get('bitsize') is not None:
+ pass
+
+ # When redeclaring typedef names as identifiers in inner scopes, a
+ # problem can occur where the identifier gets grouped into
+ # spec['type'], leaving decl as None. This can only occur for the
+ # first declarator.
+ #
+ elif decls[0]['decl'] is None:
+ if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
+ not self._is_type_in_scope(spec['type'][-1].names[0]):
+ coord = '?'
+ for t in spec['type']:
+ if hasattr(t, 'coord'):
+ coord = t.coord
+ break
+ self._parse_error('Invalid declaration', coord)
+
+ # Make this look as if it came from "direct_declarator:ID"
+ decls[0]['decl'] = c_ast.TypeDecl(
+ declname=spec['type'][-1].names[0],
+ type=None,
+ quals=None,
+ coord=spec['type'][-1].coord)
+ # Remove the "new" type's name from the end of spec['type']
+ del spec['type'][-1]
+
+ # A similar problem can occur where the declaration ends up looking
+ # like an abstract declarator. Give it a name if this is the case.
+ #
+ elif not isinstance(decls[0]['decl'],
+ (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
+ decls_0_tail = decls[0]['decl']
+ while not isinstance(decls_0_tail, c_ast.TypeDecl):
+ decls_0_tail = decls_0_tail.type
+ if decls_0_tail.declname is None:
+ decls_0_tail.declname = spec['type'][-1].names[0]
+ del spec['type'][-1]
+
+ for decl in decls:
+ assert decl['decl'] is not None
+ if is_typedef:
+ declaration = c_ast.Typedef(
+ name=None,
+ quals=spec['qual'],
+ storage=spec['storage'],
+ type=decl['decl'],
+ coord=decl['decl'].coord)
+ else:
+ declaration = c_ast.Decl(
+ name=None,
+ quals=spec['qual'],
+ storage=spec['storage'],
+ funcspec=spec['function'],
+ type=decl['decl'],
+ init=decl.get('init'),
+ bitsize=decl.get('bitsize'),
+ coord=decl['decl'].coord)
+
+ if isinstance(declaration.type,
+ (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
+ fixed_decl = declaration
+ else:
+ fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
+
+ # Add the type name defined by typedef to a
+ # symbol table (for usage in the lexer)
+ #
+ if typedef_namespace:
+ if is_typedef:
+ self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
+ else:
+ self._add_identifier(fixed_decl.name, fixed_decl.coord)
+
+ declarations.append(fixed_decl)
+
+ return declarations
+
+ def _build_function_definition(self, spec, decl, param_decls, body):
""" Builds a function definition.
"""
- declaration = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=decl,
- init=None,
- bitsize=None,
- coord=decl.coord)
-
- typename = spec['type']
- declaration = self._fix_decl_name_type(declaration, typename)
+ assert 'typedef' not in spec['storage']
+
+ declaration = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=decl, init=None)],
+ typedef_namespace=True)[0]
+
return c_ast.FuncDef(
decl=declaration,
param_decls=param_decls,
@@ -361,29 +502,29 @@
p[0] = c_ast.FileAST(p[1])
def p_translation_unit_1(self, p):
- """ translation_unit : external_declaration
+ """ translation_unit : external_declaration
"""
# Note: external_declaration is already a list
#
p[0] = p[1]
-
+
def p_translation_unit_2(self, p):
""" translation_unit : translation_unit external_declaration
"""
if p[2] is not None:
p[1].extend(p[2])
p[0] = p[1]
-
+
# Declarations always come as lists (because they can be
- # several in one line), so we wrap the function definition
- # into a list as well, to make the return value of
+ # several in one line), so we wrap the function definition
+ # into a list as well, to make the return value of
# external_declaration homogenous.
#
def p_external_declaration_1(self, p):
""" external_declaration : function_definition
"""
p[0] = [p[1]]
-
+
def p_external_declaration_2(self, p):
""" external_declaration : declaration
"""
@@ -393,16 +534,16 @@
""" external_declaration : pp_directive
"""
p[0] = p[1]
-
+
def p_external_declaration_4(self, p):
""" external_declaration : SEMI
"""
p[0] = None
def p_pp_directive(self, p):
- """ pp_directive : PPHASH
+ """ pp_directive : PPHASH
"""
- self._parse_error('Directives not supported yet',
+ self._parse_error('Directives not supported yet',
self._coord(p.lineno(1)))
# In function definitions, the declarator can be followed by
@@ -411,32 +552,37 @@
def p_function_definition_1(self, p):
""" function_definition : declarator declaration_list_opt compound_statement
"""
- # no declaration specifiers
- spec = dict(qual=[], storage=[], type=[])
+ # no declaration specifiers - 'int' becomes the default type
+ spec = dict(
+ qual=[],
+ storage=[],
+ type=[c_ast.IdentifierType(['int'],
+ coord=self._coord(p.lineno(1)))],
+ function=[])
p[0] = self._build_function_definition(
+ spec=spec,
decl=p[1],
- spec=spec,
param_decls=p[2],
body=p[3])
-
+
def p_function_definition_2(self, p):
""" function_definition : declaration_specifiers declarator declaration_list_opt compound_statement
"""
spec = p[1]
p[0] = self._build_function_definition(
+ spec=spec,
decl=p[2],
- spec=spec,
param_decls=p[3],
body=p[4])
-
+
def p_statement(self, p):
""" statement : labeled_statement
| expression_statement
| compound_statement
| selection_statement
- | iteration_statement
+ | iteration_statement
| jump_statement
"""
p[0] = p[1]
@@ -454,66 +600,43 @@
""" decl_body : declaration_specifiers init_declarator_list_opt
"""
spec = p[1]
- is_typedef = 'typedef' in spec['storage']
- decls = []
-
+
# p[2] (init_declarator_list_opt) is either a list or None
#
if p[2] is None:
- # Then it's a declaration of a struct / enum tag,
- # without an actual declarator.
+ # By the standard, you must have at least one declarator unless
+ # declaring a structure tag, a union tag, or the members of an
+ # enumeration.
#
ty = spec['type']
- if len(ty) > 1:
- coord = '?'
- for t in ty:
- if hasattr(t, 'coord'):
- coord = t.coord
- break
-
- self._parse_error('Multiple type specifiers with a type tag',
- coord)
-
- decl = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=ty[0],
- init=None,
- bitsize=None,
- coord=ty[0].coord)
- decls = [decl]
+ s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
+ if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
+ decls = [c_ast.Decl(
+ name=None,
+ quals=spec['qual'],
+ storage=spec['storage'],
+ funcspec=spec['function'],
+ type=ty[0],
+ init=None,
+ bitsize=None,
+ coord=ty[0].coord)]
+
+ # However, this case can also occur on redeclared identifiers in
+ # an inner scope. The trouble is that the redeclared type's name
+ # gets grouped into declaration_specifiers; _build_declarations
+ # compensates for this.
+ #
+ else:
+ decls = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=None, init=None)],
+ typedef_namespace=True)
+
else:
- for decl, init in p[2] or []:
- if is_typedef:
- decl = c_ast.Typedef(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- type=decl,
- coord=decl.coord)
- else:
- decl = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=decl,
- init=init,
- bitsize=None,
- coord=decl.coord)
-
- typename = spec['type']
- fixed_decl = self._fix_decl_name_type(decl, typename)
-
- # Add the type name defined by typedef to a
- # symbol table (for usage in the lexer)
- #
- if is_typedef:
- self._add_typedef_type(fixed_decl.name)
-
- decls.append(fixed_decl)
+ decls = self._build_declarations(
+ spec=spec,
+ decls=p[2],
+ typedef_namespace=True)
p[0] = decls
@@ -522,7 +645,7 @@
# for defining typedefs.
#
# If a typedef line was directly followed by a line using the
- # type defined with the typedef, the type would not be
+ # type defined with the typedef, the type would not be
# recognized. This is because to reduce the declaration rule,
# the parser's lookahead asked for the token after SEMI, which
# was the type from the next line, and the lexer had no chance
@@ -532,42 +655,41 @@
# the parser reduces decl_body, which actually adds the new
# type into the table to be seen by the lexer before the next
# line is reached.
- #
def p_declaration(self, p):
- """ declaration : decl_body SEMI
+ """ declaration : decl_body SEMI
"""
p[0] = p[1]
# Since each declaration is a list of declarations, this
# rule will combine all the declarations and return a single
# list
- #
+ #
def p_declaration_list(self, p):
""" declaration_list : declaration
| declaration_list declaration
"""
p[0] = p[1] if len(p) == 2 else p[1] + p[2]
-
+
def p_declaration_specifiers_1(self, p):
- """ declaration_specifiers : type_qualifier declaration_specifiers_opt
+ """ declaration_specifiers : type_qualifier declaration_specifiers_opt
"""
p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
-
+
def p_declaration_specifiers_2(self, p):
""" declaration_specifiers : type_specifier declaration_specifiers_opt
"""
p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
-
+
def p_declaration_specifiers_3(self, p):
""" declaration_specifiers : storage_class_specifier declaration_specifiers_opt
"""
p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
-
+
def p_declaration_specifiers_4(self, p):
""" declaration_specifiers : function_specifier declaration_specifiers_opt
"""
p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
-
+
def p_storage_class_specifier(self, p):
""" storage_class_specifier : AUTO
| REGISTER
@@ -576,12 +698,12 @@
| TYPEDEF
"""
p[0] = p[1]
-
+
def p_function_specifier(self, p):
""" function_specifier : INLINE
"""
p[0] = p[1]
-
+
def p_type_specifier_1(self, p):
""" type_specifier : VOID
| _BOOL
@@ -603,34 +725,52 @@
| struct_or_union_specifier
"""
p[0] = p[1]
-
+
def p_type_qualifier(self, p):
""" type_qualifier : CONST
| RESTRICT
| VOLATILE
"""
p[0] = p[1]
-
- def p_init_declarator_list(self, p):
+
+ def p_init_declarator_list_1(self, p):
""" init_declarator_list : init_declarator
| init_declarator_list COMMA init_declarator
"""
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
- # Returns a (declarator, initializer) pair
- # If there's no initializer, returns (declarator, None)
+ # If the code is declaring a variable that was declared a typedef in an
+ # outer scope, yacc will think the name is part of declaration_specifiers,
+ # not init_declarator, and will then get confused by EQUALS. Pass None
+ # up in place of declarator, and handle this at a higher level.
+ #
+ def p_init_declarator_list_2(self, p):
+ """ init_declarator_list : EQUALS initializer
+ """
+ p[0] = [dict(decl=None, init=p[2])]
+
+ # Similarly, if the code contains duplicate typedefs of, for example,
+ # array types, the array portion will appear as an abstract declarator.
+ #
+ def p_init_declarator_list_3(self, p):
+ """ init_declarator_list : abstract_declarator
+ """
+ p[0] = [dict(decl=p[1], init=None)]
+
+ # Returns a {decl=<declarator> : init=<initializer>} dictionary
+ # If there's no initializer, uses None
#
def p_init_declarator(self, p):
""" init_declarator : declarator
| declarator EQUALS initializer
"""
- p[0] = (p[1], p[3] if len(p) > 2 else None)
-
+ p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
+
def p_specifier_qualifier_list_1(self, p):
""" specifier_qualifier_list : type_qualifier specifier_qualifier_list_opt
"""
p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
-
+
def p_specifier_qualifier_list_2(self, p):
""" specifier_qualifier_list : type_specifier specifier_qualifier_list_opt
"""
@@ -645,8 +785,8 @@
"""
klass = self._select_struct_union_class(p[1])
p[0] = klass(
- name=p[2],
- decls=None,
+ name=p[2],
+ decls=None,
coord=self._coord(p.lineno(2)))
def p_struct_or_union_specifier_2(self, p):
@@ -669,7 +809,7 @@
coord=self._coord(p.lineno(2)))
def p_struct_or_union(self, p):
- """ struct_or_union : STRUCT
+ """ struct_or_union : STRUCT
| UNION
"""
p[0] = p[1]
@@ -686,59 +826,60 @@
""" struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
"""
spec = p[1]
- decls = []
-
+ assert 'typedef' not in spec['storage']
+
if p[2] is not None:
- for struct_decl in p[2]:
- if struct_decl['decl'] is not None:
- decl_coord = struct_decl['decl'].coord
- else:
- decl_coord = struct_decl['bitsize'].coord
-
- decl = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- funcspec=spec['function'],
- storage=spec['storage'],
- type=struct_decl['decl'],
- init=None,
- bitsize=struct_decl['bitsize'],
- coord=decl_coord)
-
- typename = spec['type']
- decls.append(self._fix_decl_name_type(decl, typename))
- else:
+ decls = self._build_declarations(
+ spec=spec,
+ decls=p[2])
+
+ elif len(spec['type']) == 1:
# Anonymous struct/union, gcc extension, C1x feature.
- # Although the standard only allows structs/unions here, I see no
+ # Although the standard only allows structs/unions here, I see no
# reason to disallow other types since some compilers have typedefs
# here, and pycparser isn't about rejecting all invalid code.
- #
+ #
node = spec['type'][0]
-
if isinstance(node, c_ast.Node):
decl_type = node
else:
decl_type = c_ast.IdentifierType(node)
-
- decl = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- funcspec=spec['function'],
- storage=spec['storage'],
- type=decl_type,
- init=None,
- bitsize=None,
- coord=self._coord(p.lineno(3)))
- decls.append(decl)
-
+
+ decls = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=decl_type)])
+
+ else:
+ # Structure/union members can have the same names as typedefs.
+ # The trouble is that the member's name gets grouped into
+ # specifier_qualifier_list; _build_declarations compensates.
+ #
+ decls = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=None, init=None)])
+
p[0] = decls
-
+
+ def p_struct_declaration_2(self, p):
+ """ struct_declaration : specifier_qualifier_list abstract_declarator SEMI
+ """
+ # "Abstract declarator?!", you ask? Structure members can have the
+ # same names as typedefs. The trouble is that the member's name gets
+ # grouped into specifier_qualifier_list, leaving any remainder to
+ # appear as an abstract declarator, as in:
+ # typedef int Foo;
+ # struct { Foo Foo[3]; };
+ #
+ p[0] = self._build_declarations(
+ spec=p[1],
+ decls=[dict(decl=p[2], init=None)])
+
def p_struct_declarator_list(self, p):
""" struct_declarator_list : struct_declarator
| struct_declarator_list COMMA struct_declarator
"""
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
-
+
# struct_declarator passes up a dict with the keys: decl (for
# the underlying declarator) and bitsize (for the bitsize)
#
@@ -746,7 +887,7 @@
""" struct_declarator : declarator
"""
p[0] = {'decl': p[1], 'bitsize': None}
-
+
def p_struct_declarator_2(self, p):
""" struct_declarator : declarator COLON constant_expression
| COLON constant_expression
@@ -755,24 +896,24 @@
p[0] = {'decl': p[1], 'bitsize': p[3]}
else:
p[0] = {'decl': c_ast.TypeDecl(None, None, None), 'bitsize': p[2]}
-
+
def p_enum_specifier_1(self, p):
""" enum_specifier : ENUM ID
| ENUM TYPEID
"""
p[0] = c_ast.Enum(p[2], None, self._coord(p.lineno(1)))
-
+
def p_enum_specifier_2(self, p):
""" enum_specifier : ENUM brace_open enumerator_list brace_close
"""
p[0] = c_ast.Enum(None, p[3], self._coord(p.lineno(1)))
-
+
def p_enum_specifier_3(self, p):
""" enum_specifier : ENUM ID brace_open enumerator_list brace_close
| ENUM TYPEID brace_open enumerator_list brace_close
"""
p[0] = c_ast.Enum(p[2], p[4], self._coord(p.lineno(1)))
-
+
def p_enumerator_list(self, p):
""" enumerator_list : enumerator
| enumerator_list COMMA
@@ -791,95 +932,130 @@
| ID EQUALS constant_expression
"""
if len(p) == 2:
- p[0] = c_ast.Enumerator(
- p[1], None,
+ enumerator = c_ast.Enumerator(
+ p[1], None,
self._coord(p.lineno(1)))
else:
- p[0] = c_ast.Enumerator(
- p[1], p[3],
+ enumerator = c_ast.Enumerator(
+ p[1], p[3],
self._coord(p.lineno(1)))
-
+ self._add_identifier(enumerator.name, enumerator.coord)
+
+ p[0] = enumerator
+
def p_declarator_1(self, p):
- """ declarator : direct_declarator
+ """ declarator : direct_declarator
"""
p[0] = p[1]
-
+
def p_declarator_2(self, p):
- """ declarator : pointer direct_declarator
+ """ declarator : pointer direct_declarator
"""
p[0] = self._type_modify_decl(p[2], p[1])
-
+
+ # Since it's impossible for a type to be specified after a pointer, assume
+ # it's intended to be the name for this declaration. _add_identifier will
+ # raise an error if this TYPEID can't be redeclared.
+ #
+ def p_declarator_3(self, p):
+ """ declarator : pointer TYPEID
+ """
+ decl = c_ast.TypeDecl(
+ declname=p[2],
+ type=None,
+ quals=None,
+ coord=self._coord(p.lineno(2)))
+
+ p[0] = self._type_modify_decl(decl, p[1])
+
def p_direct_declarator_1(self, p):
- """ direct_declarator : ID
+ """ direct_declarator : ID
"""
p[0] = c_ast.TypeDecl(
- declname=p[1],
- type=None,
+ declname=p[1],
+ type=None,
quals=None,
coord=self._coord(p.lineno(1)))
-
+
def p_direct_declarator_2(self, p):
- """ direct_declarator : LPAREN declarator RPAREN
+ """ direct_declarator : LPAREN declarator RPAREN
"""
p[0] = p[2]
-
+
def p_direct_declarator_3(self, p):
- """ direct_declarator : direct_declarator LBRACKET assignment_expression_opt RBRACKET
+ """ direct_declarator : direct_declarator LBRACKET assignment_expression_opt RBRACKET
"""
arr = c_ast.ArrayDecl(
type=None,
dim=p[3],
coord=p[1].coord)
-
+
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
# Special for VLAs
#
def p_direct_declarator_4(self, p):
- """ direct_declarator : direct_declarator LBRACKET TIMES RBRACKET
+ """ direct_declarator : direct_declarator LBRACKET TIMES RBRACKET
"""
arr = c_ast.ArrayDecl(
type=None,
dim=c_ast.ID(p[3], self._coord(p.lineno(3))),
coord=p[1].coord)
-
+
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
def p_direct_declarator_5(self, p):
- """ direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN
+ """ direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN
| direct_declarator LPAREN identifier_list_opt RPAREN
"""
func = c_ast.FuncDecl(
args=p[3],
type=None,
coord=p[1].coord)
-
+
+ # To see why _get_yacc_lookahead_token is needed, consider:
+ # typedef char TT;
+ # void foo(int TT) { TT = 10; }
+ # Outside the function, TT is a typedef, but inside (starting and
+ # ending with the braces) it's a parameter. The trouble begins with
+ # yacc's lookahead token. We don't know if we're declaring or
+ # defining a function until we see LBRACE, but if we wait for yacc to
+ # trigger a rule on that token, then TT will have already been read
+ # and incorrectly interpreted as TYPEID. We need to add the
+ # parameters to the scope the moment the lexer sees LBRACE.
+ #
+ if self._get_yacc_lookahead_token().type == "LBRACE":
+ if func.args is not None:
+ for param in func.args.params:
+ if isinstance(param, c_ast.EllipsisParam): break
+ self._add_identifier(param.name, param.coord)
+
p[0] = self._type_modify_decl(decl=p[1], modifier=func)
-
+
def p_pointer(self, p):
""" pointer : TIMES type_qualifier_list_opt
| TIMES type_qualifier_list_opt pointer
"""
coord = self._coord(p.lineno(1))
-
+
p[0] = c_ast.PtrDecl(
quals=p[2] or [],
type=p[3] if len(p) > 3 else None,
coord=coord)
-
+
def p_type_qualifier_list(self, p):
""" type_qualifier_list : type_qualifier
| type_qualifier_list type_qualifier
"""
p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
-
+
def p_parameter_type_list(self, p):
""" parameter_type_list : parameter_list
| parameter_list COMMA ELLIPSIS
"""
- if len(p) > 2:
+ if len(p) > 2:
p[1].params.append(c_ast.EllipsisParam(self._coord(p.lineno(3))))
-
+
p[0] = p[1]
def p_parameter_list(self, p):
@@ -896,33 +1072,43 @@
""" parameter_declaration : declaration_specifiers declarator
"""
spec = p[1]
- decl = p[2]
-
- decl = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=decl,
- init=None,
- bitsize=None,
- coord=decl.coord)
-
- typename = spec['type'] or ['int']
- p[0] = self._fix_decl_name_type(decl, typename)
-
+ if not spec['type']:
+ spec['type'] = [c_ast.IdentifierType(['int'],
+ coord=self._coord(p.lineno(1)))]
+ p[0] = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=p[2])])[0]
+
def p_parameter_declaration_2(self, p):
""" parameter_declaration : declaration_specifiers abstract_declarator_opt
"""
spec = p[1]
- decl = c_ast.Typename(
- quals=spec['qual'],
- type=p[2] or c_ast.TypeDecl(None, None, None),
- coord=self._coord(p.lineno(2)))
-
- typename = spec['type'] or ['int']
- p[0] = self._fix_decl_name_type(decl, typename)
-
+ if not spec['type']:
+ spec['type'] = [c_ast.IdentifierType(['int'],
+ coord=self._coord(p.lineno(1)))]
+
+ # Parameters can have the same names as typedefs. The trouble is that
+ # the parameter's name gets grouped into declaration_specifiers, making
+ # it look like an old-style declaration; compensate.
+ #
+ if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
+ self._is_type_in_scope(spec['type'][-1].names[0]):
+ decl = self._build_declarations(
+ spec=spec,
+ decls=[dict(decl=p[2], init=None)])[0]
+
+ # This truly is an old-style parameter declaration
+ #
+ else:
+ decl = c_ast.Typename(
+ quals=spec['qual'],
+ type=p[2] or c_ast.TypeDecl(None, None, None),
+ coord=self._coord(p.lineno(2)))
+ typename = spec['type']
+ decl = self._fix_decl_name_type(decl, typename)
+
+ p[0] = decl
+
def p_identifier_list(self, p):
""" identifier_list : identifier
| identifier_list COMMA identifier
@@ -937,7 +1123,7 @@
""" initializer : assignment_expression
"""
p[0] = p[1]
More information about the pypy-commit
mailing list