summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kkylheku@sierrawireless.com>2019-01-31 06:52:09 -0800
committerKaz Kylheku <kaz@kylheku.com>2019-01-31 06:52:09 -0800
commitd3c9f43a229fec71887d9c87836074f8a4313f5a (patch)
tree30d17ec1fd5fe557332e59aa64baf3661f3450ac
parent3330b1593c5d591aecbacfdae5a06ade661d9fd6 (diff)
downloadtxr-d3c9f43a229fec71887d9c87836074f8a4313f5a.tar.gz
txr-d3c9f43a229fec71887d9c87836074f8a4313f5a.tar.bz2
txr-d3c9f43a229fec71887d9c87836074f8a4313f5a.zip
compiler: de-dupe strings and bignum literals.
Let's squash duplicate strings and bignum integers in the virtual machine data tables. We can safely do it for these objects. For lists and vectors, things are tricky because these aggregates can contain circularity; so we leave those alone for now. Text processing code can generate a lot of duplicate strings. For instance `@a @b @c` generates three copies of the " " literal. * share/txr/stdlib/compiler.tl (*dedup*): New special variable. This is our de-dupe table, but it is globally nil, so that we don't retain cruft between compile jobs. (dedup): New function. (get-dreg): Map the incoming object through dedup. (dreg-key): New function. This converts a literal object to key for the dreg hash. Objects that can be de-duped represent themselves. Objects that cannot be de-duped are keyed by a gensym. (compiler get-dreg): Use dreg-key to reduce the incoming object to a key, and work with that, with the effect that strings, characters and numbers in the data table get de-duped: multiple occurrences of a character, string or number in the code get the same dreg. (usr:compile-toplevel, usr:with-compilation-unit): Establish a dedup hash for the dynamically enclosed compile job. If one is already established by the surroundign dynamic environment, then use that one, otherwise create a new hash.
-rw-r--r--share/txr/stdlib/compiler.tl35
1 files changed, 24 insertions, 11 deletions
diff --git a/share/txr/stdlib/compiler.tl b/share/txr/stdlib/compiler.tl
index 42f80c2e..f2c64f41 100644
--- a/share/txr/stdlib/compiler.tl
+++ b/share/txr/stdlib/compiler.tl
@@ -231,15 +231,26 @@
(defvarl assumed-fun)
-(defmeth compiler get-dreg (me atom)
- (condlet
- ((((null atom))) '(t 0))
- (((dreg [me.dreg atom])) dreg)
- ((((< me.dreg-cntr %lev-size%)))
- (let ((dreg ^(d ,(pinc me.dreg-cntr))))
- (set [me.data (cadr dreg)] atom)
- (set [me.dreg atom] dreg)))
- (t (compile-error me.last-form "code too complex: too many literals"))))
+(defvar *dedup*)
+
+(defun dedup (obj)
+ (cond
+ ((null obj) nil)
+ ((null *dedup*) obj)
+ ((or (stringp obj) (bignump obj))
+ (or [*dedup* obj] (set [*dedup* obj] obj)))
+ (t obj)))
+
+(defmeth compiler get-dreg (me obj)
+ (let ((dobj (dedup obj)))
+ (condlet
+ ((((null dobj))) '(t 0))
+ (((dreg [me.dreg dobj])) dreg)
+ ((((< me.dreg-cntr %lev-size%)))
+ (let ((dreg ^(d ,(pinc me.dreg-cntr))))
+ (set [me.data (cadr dreg)] dobj)
+ (set [me.dreg dobj] dreg)))
+ (t (compile-error me.last-form "code too complex: too many literals")))))
(defmeth compiler alloc-dreg (me)
(if (< me.dreg-cntr %lev-size%)
@@ -1560,7 +1571,8 @@
(defun usr:compile-toplevel (exp : (expanded-p nil))
(let ((co (new compiler))
- (as (new assembler)))
+ (as (new assembler))
+ (*dedup* (or *dedup* (hash))))
(let* ((oreg co.(alloc-treg))
(xexp (if expanded-p
exp
@@ -1637,7 +1649,8 @@
(defmacro usr:with-compilation-unit (. body)
(with-gensyms (rec)
^(let* ((,rec sys:*load-recursive*)
- (sys:*load-recursive* t))
+ (sys:*load-recursive* t)
+ (*dedup* (or *dedup* (hash))))
(unwind-protect
(progn ,*body)
(unless ,rec