aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2019-02-26 22:19:20 +0100
committerCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2019-02-26 22:19:20 +0100
commita35162ed6c63b05b3558ad7c7151f3ec2f59bb08 (patch)
tree041bc76888adc48c17dd73018f78146ddb4794d5 /rpython/rlib/unicodedata/triegenerator.py
parentmerge (diff)
downloadpypy-a35162ed6c63b05b3558ad7c7151f3ec2f59bb08.tar.gz
pypy-a35162ed6c63b05b3558ad7c7151f3ec2f59bb08.tar.bz2
pypy-a35162ed6c63b05b3558ad7c7151f3ec2f59bb08.zip
change generate_unicodedb to also use tries for the non-basemods
(this make pypy-c 1.5MB smaller)
Diffstat (limited to 'rpython/rlib/unicodedata/triegenerator.py')
-rw-r--r--rpython/rlib/unicodedata/triegenerator.py30
1 files changed, 22 insertions, 8 deletions
diff --git a/rpython/rlib/unicodedata/triegenerator.py b/rpython/rlib/unicodedata/triegenerator.py
index e4cb7b003b..e150ccde9d 100644
--- a/rpython/rlib/unicodedata/triegenerator.py
+++ b/rpython/rlib/unicodedata/triegenerator.py
@@ -91,7 +91,7 @@ def trie_lookup(name):
else:
parent = (parentstr & 0x7fffffff) >> %(STRIDXBITS)d
stridx = parentstr & ((1 << %(STRIDXBITS)d) - 1)
-
+
strlen = ord(_stringtable[stridx])
substring = _stringtable[stridx+1:stridx+1+strlen]
@@ -134,7 +134,7 @@ def name_of_node(charnode):
charnode = parent
return ''.join(res)
-
+
""" % globals()
def findranges(d):
@@ -165,6 +165,11 @@ def collapse_ranges(ranges):
return collapsed
def build_compression_tree(outfile, ucdata):
+ print >> outfile, "#" + "_" * 60
+ print >> outfile, "# output from build_compression_tree"
+ if not ucdata:
+ print >> outfile, empty_trie_functions
+ return
print >> outfile, classdef
reversedict = {}
@@ -180,7 +185,7 @@ def build_compression_tree(outfile, ucdata):
print >> outfile, "%r" % (chr(strlen) + string)
stringidx[string] = stridx
stridx += strlen + 1
-
+
print >> outfile, ")"
assert stridx < (1<<STRIDXBITS), "Too many strings, > %d chars" % (
@@ -190,7 +195,7 @@ def build_compression_tree(outfile, ucdata):
nodelist = []
maxidx = 0
nodes = [rootnode]
-
+
while nodes:
n = nodes.pop()
nodelist.append(n)
@@ -198,13 +203,13 @@ def build_compression_tree(outfile, ucdata):
nodes.append(n.left)
if n.right:
nodes.append(n.right)
-
+
nodelist.sort(key=lambda x: x.index)
newnodes = []
map(newnodes.extend, (n.as_list(stringidx) for n in nodelist))
print >> outfile, "_charnodes =",
pprint.pprint(newnodes, stream=outfile)
-
+
function = ["def lookup_charcode(code):",
" res = -1"]
ranges = collapse_ranges(findranges(reversedict))
@@ -234,6 +239,8 @@ def build_compression_tree(outfile, ucdata):
"",
])
print >> outfile, '\n'.join(function)
+ print >> outfile, "# end output from build_compression_tree"
+ print >> outfile, "#" + "_" * 60
return rootnode
def gen_compression_tree(stringlist, ucdata, reversedict, parent=None, parent_str="", left=False):
@@ -244,8 +251,8 @@ def gen_compression_tree(stringlist, ucdata, reversedict, parent=None, parent_st
for string in stringlist:
for stop in range(1, len(string) + 1):
codes[string[:stop]] = codes.get(string[:stop], 0) + 1
-
- s = [((freq), code) for (code, freq) in codes.iteritems()]
+
+ s = [((freq), code) for (code, freq) in codes.iteritems()]
s.sort()
if not s:
return None
@@ -306,3 +313,10 @@ if __name__ == '__main__':
import sys
build_compression_tree(sys.stdout, testdata)
+
+empty_trie_functions = """
+def trie_lookup(name):
+ raise KeyError
+def lookup_charcode(code):
+ raise KeyError
+"""