aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'pypy/objspace/std')
-rw-r--r--pypy/objspace/std/test/test_unicodeobject.py9
-rw-r--r--pypy/objspace/std/unicodeobject.py26
2 files changed, 29 insertions, 6 deletions
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
index 6b1c7315da..e8763dc496 100644
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -207,6 +207,15 @@ class TestUnicodeObject:
for end in range(start, len(u)):
assert w_u._unicode_sliced_constant_index_jit(space, start, end)._utf8 == u[start: end].encode("utf-8")
+ def test_lower_upper_ascii(self):
+ from pypy.module.unicodedata.interp_ucd import unicodedb
+ # check that ascii chars tolower/toupper still behave sensibly in the
+ # unicodedb - unlikely to ever change, but well
+ for ch in range(128):
+ unilower, = unicodedb.tolower_full(ch)
+ assert chr(unilower) == chr(ch).lower()
+ uniupper, = unicodedb.toupper_full(ch)
+ assert chr(uniupper) == chr(ch).upper()
class AppTestUnicodeStringStdOnly:
def test_compares(self):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index 4fa1a98437..0be4a9e55c 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -503,11 +503,18 @@ class W_UnicodeObject(W_Root):
return tformat.formatter_field_name_split()
def descr_lower(self, space):
- builder = rutf8.Utf8StringBuilder(len(self._utf8))
- for ch in rutf8.Utf8StringIterator(self._utf8):
+ if self.is_ascii():
+ return space.newutf8(self._utf8.lower(), len(self._utf8))
+ return self._descr_lower(self._utf8)
+
+ @staticmethod
+ @jit.elidable
+ def _descr_lower(utf8):
+ builder = rutf8.Utf8StringBuilder(len(utf8))
+ for ch in rutf8.Utf8StringIterator(utf8):
lower = unicodedb.tolower(ch)
builder.append_code(lower)
- return self.from_utf8builder(builder)
+ return W_UnicodeObject.from_utf8builder(builder)
def descr_isdecimal(self, space):
return self._is_generic(space, '_isdecimal')
@@ -650,11 +657,18 @@ class W_UnicodeObject(W_Root):
return space.newlist(strs_w)
def descr_upper(self, space):
- builder = rutf8.Utf8StringBuilder(len(self._utf8))
- for ch in rutf8.Utf8StringIterator(self._utf8):
+ if self.is_ascii():
+ return space.newutf8(self._utf8.upper(), len(self._utf8))
+ return self._descr_upper(self._utf8)
+
+ @staticmethod
+ @jit.elidable
+ def _descr_upper(utf8):
+ builder = rutf8.Utf8StringBuilder(len(utf8))
+ for ch in rutf8.Utf8StringIterator(utf8):
ch = unicodedb.toupper(ch)
builder.append_code(ch)
- return self.from_utf8builder(builder)
+ return W_UnicodeObject.from_utf8builder(builder)
@unwrap_spec(width=int)
def descr_zfill(self, space, width):