[Python-checkins] bpo-26439 Fix ctypes.util.find_library failure on AIX (#4507)

Victor Stinner webhook-mailer at python.org
Tue Dec 19 07:58:53 EST 2017


https://github.com/python/cpython/commit/c5ae169e1b73315672770517bf51cf8464286c76
commit: c5ae169e1b73315672770517bf51cf8464286c76
branch: master
author: Michael Felt <aixtools at users.noreply.github.com>
committer: Victor Stinner <victor.stinner at gmail.com>
date: 2017-12-19T13:58:49+01:00
summary:

bpo-26439 Fix ctypes.util.find_library failure on AIX (#4507)

Implement find_library() support in ctypes/util for AIX.

Add some AIX specific tests.

files:
A Lib/ctypes/_aix.py
A Misc/NEWS.d/next/Library/2017-11-24-08-35-43.bpo-26439.IC45_f.rst
M .gitignore
M Lib/ctypes/__init__.py
M Lib/ctypes/util.py
M Modules/posixmodule.c
M configure
M configure.ac
M pyconfig.h.in

diff --git a/.gitignore b/.gitignore
index 59206541ee4..05fb6cba087 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,10 @@
+# added for local development
+.buildaix/
+Modules/python.exp
+buildaix/
+installp/
+.gitignore
+
 # Two-trick pony for OSX and other case insensitive file systems:
 # Ignore ./python binary on Unix but still look into ./Python/ directory.
 /python
diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py
index 972ea0ac879..61467739886 100644
--- a/Lib/ctypes/__init__.py
+++ b/Lib/ctypes/__init__.py
@@ -338,6 +338,14 @@ def __init__(self, name, mode=DEFAULT_MODE, handle=None,
             flags |= _FUNCFLAG_USE_ERRNO
         if use_last_error:
             flags |= _FUNCFLAG_USE_LASTERROR
+        if _sys.platform.startswith("aix"):
+            """When the name contains ".a(" and ends with ")",
+               e.g., "libFOO.a(libFOO.so)" - this is taken to be an
+               archive(member) syntax for dlopen(), and the mode is adjusted.
+               Otherwise, name is presented to dlopen() as a file argument.
+            """
+            if name and name.endswith(")") and ".a(" in name:
+                mode |= ( _os.RTLD_MEMBER | _os.RTLD_NOW )
 
         class _FuncPtr(_CFuncPtr):
             _flags_ = flags
diff --git a/Lib/ctypes/_aix.py b/Lib/ctypes/_aix.py
new file mode 100644
index 00000000000..5c4d8d9cc91
--- /dev/null
+++ b/Lib/ctypes/_aix.py
@@ -0,0 +1,331 @@
+"""
+Lib/ctypes.util.find_library() support for AIX
+Similar approach as done for Darwin support by using separate files
+but unlike Darwin - no extension such as ctypes.macholib.*
+
+dlopen() is an interface to AIX initAndLoad() - primary documentation at:
+https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm
+https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm
+
+AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix
+platforms, but also a BSD style - aka SVR3.
+
+From AIX 5.3 Difference Addendum (December 2004)
+2.9 SVR4 linking affinity
+Nowadays, there are two major object file formats used by the operating systems:
+XCOFF: The COFF enhanced by IBM and others. The original COFF (Common
+Object File Format) was the base of SVR3 and BSD 4.2 systems.
+ELF:   Executable and Linking Format that was developed by AT&T and is a
+base for SVR4 UNIX.
+
+While the shared library content is identical on AIX - one is located as a filepath name
+(svr4 style) and the other is located as a member of an archive (and the archive
+is located as a filepath name).
+
+The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit).
+For svr4 either only one ABI is supported, or there are two directories, or there
+are different file names. The most common solution for multiple ABI is multiple
+directories.
+
+For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient
+as multiple shared libraries can be in the archive - even sharing the same name.
+In documentation the archive is also referred to as the "base" and the shared
+library object is referred to as the "member".
+
+For dlopen() on AIX (read initAndLoad()) the calls are similiar.
+Default activity occurs when no path information is provided. When path
+information is provided dlopen() does not search any other directories.
+
+For SVR4 - the shared library name is the name of the file expected: libFOO.so
+For AIX - the shared library is expressed as base(member). The search is for the
+base (e.g., libFOO.a) and once the base is found the shared library - identified by
+member (e.g., libFOO.so, or shr.o) is located and loaded.
+
+The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3)
+naming style.
+"""
+__author__ = "Michael Felt <aixtools at felt.demon.nl>"
+
+import re
+from os import environ, path
+from sys import executable
+from ctypes import c_void_p, sizeof
+from subprocess import Popen, PIPE, DEVNULL
+
+# Executable bit size - 32 or 64
+# Used to filter the search in an archive by size, e.g., -X64
+AIX_ABI = sizeof(c_void_p) * 8
+
+
+from sys import maxsize
+def _last_version(libnames, sep):
+    def _num_version(libname):
+        # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR]
+        parts = libname.split(sep)
+        nums = []
+        try:
+            while parts:
+                nums.insert(0, int(parts.pop()))
+        except ValueError:
+            pass
+        return nums or [maxsize]
+    return max(reversed(libnames), key=_num_version)
+
+def get_ld_header(p):
+    # "nested-function, but placed at module level
+    ld_header = None
+    for line in p.stdout:
+        if line.startswith(('/', './', '../')):
+            ld_header = line
+        elif "INDEX" in line:
+            return ld_header.rstrip('\n')
+    return None
+
+def get_ld_header_info(p):
+    # "nested-function, but placed at module level
+    # as an ld_header was found, return known paths, archives and members
+    # these lines start with a digit
+    info = []
+    for line in p.stdout:
+        if re.match("[0-9]", line):
+            info.append(line)
+        else:
+            # blank line (seperator), consume line and end for loop
+            break
+    return info
+
+def get_ld_headers(file):
+    """
+    Parse the header of the loader section of executable and archives
+    This function calls /usr/bin/dump -H as a subprocess
+    and returns a list of (ld_header, ld_header_info) tuples.
+    """
+    # get_ld_headers parsing:
+    # 1. Find a line that starts with /, ./, or ../ - set as ld_header
+    # 2. If "INDEX" in occurs in a following line - return ld_header
+    # 3. get info (lines starting with [0-9])
+    ldr_headers = []
+    p = Popen(["/usr/bin/dump", "-X%s" % AIX_ABI, "-H", file],
+        universal_newlines=True, stdout=PIPE, stderr=DEVNULL)
+    # be sure to read to the end-of-file - getting all entries
+    while True:
+        ld_header = get_ld_header(p)
+        if ld_header:
+            ldr_headers.append((ld_header, get_ld_header_info(p)))
+        else:
+            break
+    p.stdout.close()
+    p.wait
+    return ldr_headers
+
+def get_shared(ld_headers):
+    """
+    extract the shareable objects from ld_headers
+    character "[" is used to strip off the path information.
+    Note: the "[" and "]" characters that are part of dump -H output
+    are not removed here.
+    """
+    shared = []
+    for (line, _) in ld_headers:
+        # potential member lines contain "["
+        # otherwise, no processing needed
+        if "[" in line:
+            # Strip off trailing colon (:)
+            shared.append(line[line.index("["):-1])
+    return shared
+
+def get_one_match(expr, lines):
+    """
+    Must be only one match, otherwise result is None.
+    When there is a match, strip leading "[" and trailing "]"
+    """
+    # member names in the ld_headers output are between square brackets
+    expr = r'\[(%s)\]' % expr
+    matches = list(filter(None, (re.search(expr, line) for line in lines)))
+    if len(matches) == 1:
+        return matches[0].group(1)
+    else:
+        return None
+
+# additional processing to deal with AIX legacy names for 64-bit members
+def get_legacy(members):
+    """
+    This routine provides historical aka legacy naming schemes started
+    in AIX4 shared library support for library members names.
+    e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and
+    shr_64.o for 64-bit binary.
+    """
+    if AIX_ABI == 64:
+        # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o
+        expr = r'shr4?_?64\.o'
+        member = get_one_match(expr, members)
+        if member:
+            return member
+    else:
+        # 32-bit legacy names - both shr.o and shr4.o exist.
+        # shr.o is the preffered name so we look for shr.o first
+        #  i.e., shr4.o is returned only when shr.o does not exist
+        for name in ['shr.o', 'shr4.o']:
+            member = get_one_match(re.escape(name), members)
+            if member:
+                return member
+    return None
+
+def get_version(name, members):
+    """
+    Sort list of members and return highest numbered version - if it exists.
+    This function is called when an unversioned libFOO.a(libFOO.so) has
+    not been found.
+
+    Versioning for the member name is expected to follow
+    GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z)
+     * find [libFoo.so.X]
+     * find [libFoo.so.X.Y]
+     * find [libFoo.so.X.Y.Z]
+
+    Before the GNU convention became the standard scheme regardless of
+    binary size AIX packagers used GNU convention "as-is" for 32-bit
+    archive members but used an "distinguishing" name for 64-bit members.
+    This scheme inserted either 64 or _64 between libFOO and .so
+    - generally libFOO_64.so, but occasionally libFOO64.so
+    """
+    # the expression ending for versions must start as
+    # '.so.[0-9]', i.e., *.so.[at least one digit]
+    # while multiple, more specific expressions could be specified
+    # to search for .so.X, .so.X.Y and .so.X.Y.Z
+    # after the first required 'dot' digit
+    # any combination of additional 'dot' digits pairs are accepted
+    # anything more than libFOO.so.digits.digits.digits
+    # should be seen as a member name outside normal expectations
+    exprs = [r'lib%s\.so\.[0-9]+[0-9.]*' % name,
+        r'lib%s_?64\.so\.[0-9]+[0-9.]*' % name]
+    for expr in exprs:
+        versions = []
+        for line in members:
+            m = re.search(expr, line)
+            if m:
+                versions.append(m.group(0))
+        if versions:
+            return _last_version(versions, '.')
+    return None
+
+def get_member(name, members):
+    """
+    Return an archive member matching the request in name.
+    Name is the library name without any prefix like lib, suffix like .so,
+    or version number.
+    Given a list of members find and return the most appropriate result
+    Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c
+    and finally, legacy AIX naming scheme.
+    """
+    # look first for a generic match - prepend lib and append .so
+    expr = r'lib%s\.so' % name
+    member = get_one_match(expr, members)
+    if member:
+        return member
+    elif AIX_ABI == 64:
+        expr = r'lib%s64\.so' % name
+        member = get_one_match(expr, members)
+    if member:
+        return member
+    # since an exact match with .so as suffix was not found
+    # look for a versioned name
+    # If a versioned name is not found, look for AIX legacy member name
+    member = get_version(name, members)
+    if member:
+        return member
+    else:
+        return get_legacy(members)
+
+def get_libpaths():
+    """
+    On AIX, the buildtime searchpath is stored in the executable.
+    as "loader header information".
+    The command /usr/bin/dump -H extracts this info.
+    Prefix searched libraries with LD_LIBRARY_PATH (preferred),
+    or LIBPATH if defined. These paths are appended to the paths
+    to libraries the python executable is linked with.
+    This mimics AIX dlopen() behavior.
+    """
+    libpaths = environ.get("LD_LIBRARY_PATH")
+    if libpaths is None:
+        libpaths = environ.get("LIBPATH")
+    if libpaths is None:
+        libpaths = []
+    else:
+        libpaths = libpaths.split(":")
+    objects = get_ld_headers(executable)
+    for (_, lines) in objects:
+        for line in lines:
+            # the second (optional) argument is PATH if it includes a /
+            path = line.split()[1]
+            if "/" in path:
+                libpaths.extend(path.split(":"))
+    return libpaths
+
+def find_shared(paths, name):
+    """
+    paths is a list of directories to search for an archive.
+    name is the abbreviated name given to find_library().
+    Process: search "paths" for archive, and if an archive is found
+    return the result of get_member().
+    If an archive is not found then return None
+    """
+    for dir in paths:
+        # /lib is a symbolic link to /usr/lib, skip it
+        if dir == "/lib":
+            continue
+        # "lib" is prefixed to emulate compiler name resolution,
+        # e.g., -lc to libc
+        base = 'lib%s.a' % name
+        archive = path.join(dir, base)
+        if path.exists(archive):
+            members = get_shared(get_ld_headers(archive))
+            member = get_member(re.escape(name), members)
+            if member != None:
+                return (base, member)
+            else:
+                return (None, None)
+    return (None, None)
+
+def find_library(name):
+    """AIX implementation of ctypes.util.find_library()
+    Find an archive member that will dlopen(). If not available,
+    also search for a file (or link) with a .so suffix.
+
+    AIX supports two types of schemes that can be used with dlopen().
+    The so-called SystemV Release4 (svr4) format is commonly suffixed
+    with .so while the (default) AIX scheme has the library (archive)
+    ending with the suffix .a
+    As an archive has multiple members (e.g., 32-bit and 64-bit) in one file
+    the argument passed to dlopen must include both the library and
+    the member names in a single string.
+
+    find_library() looks first for an archive (.a) with a suitable member.
+    If no archive+member pair is found, look for a .so file.
+    """
+
+    libpaths = get_libpaths()
+    (base, member) = find_shared(libpaths, name)
+    if base != None:
+        return "%s(%s)" % (base, member)
+
+    # To get here, a member in an archive has not been found
+    # In other words, either:
+    # a) a .a file was not found
+    # b) a .a file did not have a suitable member
+    # So, look for a .so file
+    # Check libpaths for .so file
+    # Note, the installation must prepare a link from a .so
+    # to a versioned file
+    # This is common practice by GNU libtool on other platforms
+    soname = "lib%s.so" % name
+    for dir in libpaths:
+        # /lib is a symbolic link to /usr/lib, skip it
+        if dir == "/lib":
+            continue
+        shlib = path.join(dir, soname)
+        if path.exists(shlib):
+            return soname
+    # if we are here, we have not found anything plausible
+    return None
diff --git a/Lib/ctypes/util.py b/Lib/ctypes/util.py
index 8856f6594c4..e837abba5c8 100644
--- a/Lib/ctypes/util.py
+++ b/Lib/ctypes/util.py
@@ -80,6 +80,15 @@ def find_library(name):
                 continue
         return None
 
+if sys.platform.startswith("aix"):
+    # AIX has two styles of storing shared libraries
+    # GNU auto_tools refer to these as svr4 and aix
+    # svr4 (System V Release 4) is a regular file, often with .so as suffix
+    # AIX style uses an archive (suffix .a) with members (e.g., shr.o, libssl.so)
+    # see issue#26439 and _aix.py for more details
+
+    from ctypes._aix import find_library
+
 elif os.name == "posix":
     # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump
     import re, tempfile
@@ -324,6 +333,22 @@ def test():
             print(cdll.LoadLibrary("libcrypto.dylib"))
             print(cdll.LoadLibrary("libSystem.dylib"))
             print(cdll.LoadLibrary("System.framework/System"))
+        # issue-26439 - fix broken test call for AIX
+        elif sys.platform.startswith("aix"):
+            from ctypes import CDLL
+            if sys.maxsize < 2**32:
+                print("Using CDLL(name, os.RTLD_MEMBER): %s" % CDLL("libc.a(shr.o)", os.RTLD_MEMBER))
+                print("Using cdll.LoadLibrary(): %s" % cdll.LoadLibrary("libc.a(shr.o)"))
+                # librpm.so is only available as 32-bit shared library
+                print(find_library("rpm"))
+                print(cdll.LoadLibrary("librpm.so"))
+            else:
+                print("Using CDLL(name, os.RTLD_MEMBER): %s" % CDLL("libc.a(shr_64.o)", os.RTLD_MEMBER))
+                print("Using cdll.LoadLibrary(): %s" % cdll.LoadLibrary("libc.a(shr_64.o)"))
+            print("crypt\t:: %s" % find_library("crypt"))
+            print("crypt\t:: %s" % cdll.LoadLibrary(find_library("crypt")))
+            print("crypto\t:: %s" % find_library("crypto"))
+            print("crypto\t:: %s" % cdll.LoadLibrary(find_library("crypto")))
         else:
             print(cdll.LoadLibrary("libm.so"))
             print(cdll.LoadLibrary("libcrypt.so"))
diff --git a/Misc/NEWS.d/next/Library/2017-11-24-08-35-43.bpo-26439.IC45_f.rst b/Misc/NEWS.d/next/Library/2017-11-24-08-35-43.bpo-26439.IC45_f.rst
new file mode 100644
index 00000000000..91c9a45b503
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-24-08-35-43.bpo-26439.IC45_f.rst
@@ -0,0 +1,12 @@
+Fix ctypes.util.find_library() for AIX
+by implementing ctypes._aix.find_library()
+Patch by: Michael Felt aka aixtools
+
+ctypes.util.find_library has always returned None on a standard AIX.
+With this patch there is support for both AIX and svr4 shared libraries.
+None is returned only when there is nothinbg found. Normal behavior is now:
+on AIX find_library("FOO") returns either libFOO.a(libFOO.so) or libFOO.so
+while legacy names e.g., find_library("c") returns libc.a(shr.o)
+or libc.a(shr_64.o) - depending on 32 or 64-bit operations.
+Include RTLD_MEMBER to mode to support AIX legacy library(member) names
+(Modules/_ctype/posixmodule.c), ctypes/__init__.py and configure.ac)
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 448d4b7428e..38b6c80e6bd 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -13007,6 +13007,9 @@ all_ins(PyObject *m)
 #if HAVE_DECL_RTLD_DEEPBIND
     if (PyModule_AddIntMacro(m, RTLD_DEEPBIND)) return -1;
 #endif
+#if HAVE_DECL_RTLD_MEMBER
+    if (PyModule_AddIntMacro(m, RTLD_MEMBER)) return -1;
+#endif
 
 #ifdef HAVE_GETRANDOM_SYSCALL
     if (PyModule_AddIntMacro(m, GRND_RANDOM)) return -1;
diff --git a/configure b/configure
index 20d8095c252..24ae2564091 100755
--- a/configure
+++ b/configure
@@ -9872,7 +9872,6 @@ fi
 
 
 
-
 if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
 	if test -n "$ac_tool_prefix"; then
   # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
@@ -14360,6 +14359,17 @@ fi
 cat >>confdefs.h <<_ACEOF
 #define HAVE_DECL_RTLD_DEEPBIND $ac_have_decl
 _ACEOF
+ac_fn_c_check_decl "$LINENO" "RTLD_MEMBER" "ac_cv_have_decl_RTLD_MEMBER" "#include <dlfcn.h>
+"
+if test "x$ac_cv_have_decl_RTLD_MEMBER" = xyes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_RTLD_MEMBER $ac_have_decl
+_ACEOF
 
 
 # determine what size digit to use for Python's longs
diff --git a/configure.ac b/configure.ac
index 828dadb56c4..48eaa2ae7f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4480,7 +4480,7 @@ then
   [define to 1 if your sem_getvalue is broken.])
 fi
 
-AC_CHECK_DECLS([RTLD_LAZY, RTLD_NOW, RTLD_GLOBAL, RTLD_LOCAL, RTLD_NODELETE, RTLD_NOLOAD, RTLD_DEEPBIND], [], [], [[#include <dlfcn.h>]])
+AC_CHECK_DECLS([RTLD_LAZY, RTLD_NOW, RTLD_GLOBAL, RTLD_LOCAL, RTLD_NODELETE, RTLD_NOLOAD, RTLD_DEEPBIND, RTLD_MEMBER], [], [], [[#include <dlfcn.h>]])
 
 # determine what size digit to use for Python's longs
 AC_MSG_CHECKING([digit size for Python's longs])
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 66b9e888274..d828d48af8c 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -210,6 +210,10 @@
    don't. */
 #undef HAVE_DECL_RTLD_LOCAL
 
+/* Define to 1 if you have the declaration of `RTLD_MEMBER', and to 0 if you
+   don't. */
+#undef HAVE_DECL_RTLD_MEMBER
+
 /* Define to 1 if you have the declaration of `RTLD_NODELETE', and to 0 if you
    don't. */
 #undef HAVE_DECL_RTLD_NODELETE



More information about the Python-checkins mailing list