diff options
author | Kaz Kylheku <kkylheku@sierrawireless.com> | 2019-01-31 06:52:09 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2019-01-31 06:52:09 -0800 |
commit | d3c9f43a229fec71887d9c87836074f8a4313f5a (patch) | |
tree | 30d17ec1fd5fe557332e59aa64baf3661f3450ac | |
parent | 3330b1593c5d591aecbacfdae5a06ade661d9fd6 (diff) | |
download | txr-d3c9f43a229fec71887d9c87836074f8a4313f5a.tar.gz txr-d3c9f43a229fec71887d9c87836074f8a4313f5a.tar.bz2 txr-d3c9f43a229fec71887d9c87836074f8a4313f5a.zip |
compiler: de-dupe strings and bignum literals.
Let's squash duplicate strings and bignum integers in the
virtual machine data tables. We can safely do it for these
objects. For lists and vectors, things are tricky because
these aggregates can contain circularity; so we leave those
alone for now.
Text processing code can generate a lot of duplicate strings.
For instance `@a @b @c` generates three copies of the " " literal.
* share/txr/stdlib/compiler.tl (*dedup*): New special
variable. This is our de-dupe table, but it is globally
nil, so that we don't retain cruft between compile jobs.
(dedup): New function.
(get-dreg): Map the incoming object through dedup.
(dreg-key): New function. This converts a literal object to key for
the dreg hash. Objects that can be de-duped represent themselves.
Objects that cannot be de-duped are keyed by a gensym.
(compiler get-dreg): Use dreg-key to reduce the incoming object to a
key, and work with that, with the effect that strings, characters and
numbers in the data table get de-duped: multiple occurrences of a
character, string or number in the code get the same dreg.
(usr:compile-toplevel, usr:with-compilation-unit): Establish a
dedup hash for the dynamically enclosed compile job. If one is
already established by the surroundign dynamic environment,
then use that one, otherwise create a new hash.
-rw-r--r-- | share/txr/stdlib/compiler.tl | 35 |
1 files changed, 24 insertions, 11 deletions
diff --git a/share/txr/stdlib/compiler.tl b/share/txr/stdlib/compiler.tl index 42f80c2e..f2c64f41 100644 --- a/share/txr/stdlib/compiler.tl +++ b/share/txr/stdlib/compiler.tl @@ -231,15 +231,26 @@ (defvarl assumed-fun) -(defmeth compiler get-dreg (me atom) - (condlet - ((((null atom))) '(t 0)) - (((dreg [me.dreg atom])) dreg) - ((((< me.dreg-cntr %lev-size%))) - (let ((dreg ^(d ,(pinc me.dreg-cntr)))) - (set [me.data (cadr dreg)] atom) - (set [me.dreg atom] dreg))) - (t (compile-error me.last-form "code too complex: too many literals")))) +(defvar *dedup*) + +(defun dedup (obj) + (cond + ((null obj) nil) + ((null *dedup*) obj) + ((or (stringp obj) (bignump obj)) + (or [*dedup* obj] (set [*dedup* obj] obj))) + (t obj))) + +(defmeth compiler get-dreg (me obj) + (let ((dobj (dedup obj))) + (condlet + ((((null dobj))) '(t 0)) + (((dreg [me.dreg dobj])) dreg) + ((((< me.dreg-cntr %lev-size%))) + (let ((dreg ^(d ,(pinc me.dreg-cntr)))) + (set [me.data (cadr dreg)] dobj) + (set [me.dreg dobj] dreg))) + (t (compile-error me.last-form "code too complex: too many literals"))))) (defmeth compiler alloc-dreg (me) (if (< me.dreg-cntr %lev-size%) @@ -1560,7 +1571,8 @@ (defun usr:compile-toplevel (exp : (expanded-p nil)) (let ((co (new compiler)) - (as (new assembler))) + (as (new assembler)) + (*dedup* (or *dedup* (hash)))) (let* ((oreg co.(alloc-treg)) (xexp (if expanded-p exp @@ -1637,7 +1649,8 @@ (defmacro usr:with-compilation-unit (. body) (with-gensyms (rec) ^(let* ((,rec sys:*load-recursive*) - (sys:*load-recursive* t)) + (sys:*load-recursive* t) + (*dedup* (or *dedup* (hash)))) (unwind-protect (progn ,*body) (unless ,rec |