[pypy-commit] pypy code_page-utf8: start to implement code_page and oem encoder/decoder

mattip pypy.commits at gmail.com
Sun Nov 10 18:16:39 EST 2019


Author: Matti Picus <matti.picus at gmail.com>
Branch: code_page-utf8
Changeset: r98013:a55b2a60b689
Date: 2019-11-09 01:36 +0200
http://bitbucket.org/pypy/pypy/changeset/a55b2a60b689/

Log:	start to implement code_page and oem encoder/decoder

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -374,6 +374,18 @@
         res_utf8 = runicode.unicode_encode_utf_8(res, size, 'strict')
         return res_utf8, len(res), size
 
+    def utf8_encode_code_page(s, errors, errorhandler, allow_surrogates=False):
+        pass
+
+    def str_decode_code_page(s, errors, final, errorhandler, force_ignore=True):
+        pass
+
+    def utf8_encode_oem(s, errors, errorhandler, allow_surrogates=False):
+        pass
+
+    def str_decode_oem(s, errors, final, errorhandler, force_ignore=True):
+        pass
+
 def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False):
     try:
         # fast version first
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -3,7 +3,7 @@
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib.rutf8 import MAXUNICODE
-from rpython.rlib.runicode import raw_unicode_escape_helper
+from rpython.rlib import runicode
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -291,7 +291,7 @@
         obj = w_obj._utf8
         while pos < end:
             code = rutf8.codepoint_at_pos(obj, pos)
-            raw_unicode_escape_helper(builder, code)
+            unicodehelper.raw_unicode_escape_helper(builder, code)
             pos = rutf8.next_codepoint_pos(obj, pos)
         return space.newtuple([space.newtext(builder.build()), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
@@ -303,7 +303,7 @@
         pos = start
         while pos < end:
             oc = ord(obj[pos])
-            raw_unicode_escape_helper(builder, oc)
+            unicodehelper.raw_unicode_escape_helper(builder, oc)
             pos += 1
         return space.newtuple([space.newtext(builder.build()), w_end])
     else:
@@ -705,10 +705,13 @@
          ]:
     make_decoder_wrapper(decoder)
 
-from rpython.rlib import runicode
-if hasattr(runicode, 'str_decode_mbcs'):
+if getattr(unicodehelper, '_WIN32', False):
     make_encoder_wrapper('mbcs_encode')
     make_decoder_wrapper('mbcs_decode')
+    make_encoder_wrapper('code_page_encode')
+    make_decoder_wrapper('code_page_decode')
+    make_encoder_wrapper('oem_encode')
+    make_decoder_wrapper('oem_decode')
 
 # utf-8 functions are not regular, because we have to pass
 # "allow_surrogates=False"
diff --git a/pypy/module/_codecs/moduledef.py b/pypy/module/_codecs/moduledef.py
--- a/pypy/module/_codecs/moduledef.py
+++ b/pypy/module/_codecs/moduledef.py
@@ -91,7 +91,11 @@
         from rpython.rlib import runicode
         if (hasattr(runicode, 'str_decode_mbcs')):
             self.interpleveldefs['mbcs_encode'] = 'interp_codecs.mbcs_encode'
+            self.interpleveldefs['oem_encode'] = 'interp_codecs.oem_encode'
+            self.interpleveldefs['code_page_encode'] = 'interp_codecs.code_page_encode'
             self.interpleveldefs['mbcs_decode'] = 'interp_codecs.mbcs_decode'
+            self.interpleveldefs['oem_decode'] = 'interp_codecs.oem_decode'
+            self.interpleveldefs['code_page_decode'] = 'interp_codecs.code_page_decode'
 
         MixedModule.__init__(self, space, *args)
 


More information about the pypy-commit mailing list