diff options
author | Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> | 2019-02-26 22:19:20 +0100 |
---|---|---|
committer | Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> | 2019-02-26 22:19:20 +0100 |
commit | a35162ed6c63b05b3558ad7c7151f3ec2f59bb08 (patch) | |
tree | 041bc76888adc48c17dd73018f78146ddb4794d5 /rpython/rlib/unicodedata/triegenerator.py | |
parent | merge (diff) | |
download | pypy-a35162ed6c63b05b3558ad7c7151f3ec2f59bb08.tar.gz pypy-a35162ed6c63b05b3558ad7c7151f3ec2f59bb08.tar.bz2 pypy-a35162ed6c63b05b3558ad7c7151f3ec2f59bb08.zip |
change generate_unicodedb to also use tries for the non-basemods
(this make pypy-c 1.5MB smaller)
Diffstat (limited to 'rpython/rlib/unicodedata/triegenerator.py')
-rw-r--r-- | rpython/rlib/unicodedata/triegenerator.py | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/rpython/rlib/unicodedata/triegenerator.py b/rpython/rlib/unicodedata/triegenerator.py index e4cb7b003b..e150ccde9d 100644 --- a/rpython/rlib/unicodedata/triegenerator.py +++ b/rpython/rlib/unicodedata/triegenerator.py @@ -91,7 +91,7 @@ def trie_lookup(name): else: parent = (parentstr & 0x7fffffff) >> %(STRIDXBITS)d stridx = parentstr & ((1 << %(STRIDXBITS)d) - 1) - + strlen = ord(_stringtable[stridx]) substring = _stringtable[stridx+1:stridx+1+strlen] @@ -134,7 +134,7 @@ def name_of_node(charnode): charnode = parent return ''.join(res) - + """ % globals() def findranges(d): @@ -165,6 +165,11 @@ def collapse_ranges(ranges): return collapsed def build_compression_tree(outfile, ucdata): + print >> outfile, "#" + "_" * 60 + print >> outfile, "# output from build_compression_tree" + if not ucdata: + print >> outfile, empty_trie_functions + return print >> outfile, classdef reversedict = {} @@ -180,7 +185,7 @@ def build_compression_tree(outfile, ucdata): print >> outfile, "%r" % (chr(strlen) + string) stringidx[string] = stridx stridx += strlen + 1 - + print >> outfile, ")" assert stridx < (1<<STRIDXBITS), "Too many strings, > %d chars" % ( @@ -190,7 +195,7 @@ def build_compression_tree(outfile, ucdata): nodelist = [] maxidx = 0 nodes = [rootnode] - + while nodes: n = nodes.pop() nodelist.append(n) @@ -198,13 +203,13 @@ def build_compression_tree(outfile, ucdata): nodes.append(n.left) if n.right: nodes.append(n.right) - + nodelist.sort(key=lambda x: x.index) newnodes = [] map(newnodes.extend, (n.as_list(stringidx) for n in nodelist)) print >> outfile, "_charnodes =", pprint.pprint(newnodes, stream=outfile) - + function = ["def lookup_charcode(code):", " res = -1"] ranges = collapse_ranges(findranges(reversedict)) @@ -234,6 +239,8 @@ def build_compression_tree(outfile, ucdata): "", ]) print >> outfile, '\n'.join(function) + print >> outfile, "# end output from build_compression_tree" + print >> outfile, "#" + "_" * 60 return rootnode def gen_compression_tree(stringlist, ucdata, reversedict, parent=None, parent_str="", left=False): @@ -244,8 +251,8 @@ def gen_compression_tree(stringlist, ucdata, reversedict, parent=None, parent_st for string in stringlist: for stop in range(1, len(string) + 1): codes[string[:stop]] = codes.get(string[:stop], 0) + 1 - - s = [((freq), code) for (code, freq) in codes.iteritems()] + + s = [((freq), code) for (code, freq) in codes.iteritems()] s.sort() if not s: return None @@ -306,3 +313,10 @@ if __name__ == '__main__': import sys build_compression_tree(sys.stdout, testdata) + +empty_trie_functions = """ +def trie_lookup(name): + raise KeyError +def lookup_charcode(code): + raise KeyError +""" |