summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2021-12-10 23:57:00 -0800
committerKaz Kylheku <kaz@kylheku.com>2021-12-10 23:57:00 -0800
commitcaaecc2c63b65321f3cc66b0d52d9f7b0b859317 (patch)
treec25f9a5ac5dab8dc41001f688030d3432a09d557
parente0814736ec16ca7712609b96558e4102883dda2a (diff)
downloadtxr-caaecc2c63b65321f3cc66b0d52d9f7b0b859317.tar.gz
txr-caaecc2c63b65321f3cc66b0d52d9f7b0b859317.tar.bz2
txr-caaecc2c63b65321f3cc66b0d52d9f7b0b859317.zip
compiler: register-compacting optimization.
This work addresses the following issues, which cause compiled functions to use more stack space than necessary. Firstly, the compiler doesn't allocate registers tightly. Every closure's registers can start at t2, but this isn't done. Secondly, data flow optimizations eliminate registers, leaving gaps in the register allocation. Code ends up strange: you may see register t63 used in a function where the next highest register below that is t39: evidence that a large number of temporary variables got mapped to registers and eliminated. In this change, an optimization is introduced, active at *opt-level* 6 which compacts the t registers for every closure: every closure's registers are renumbered starting from t2. Then, each closure' generating close instruction is also updated to indicate the accurate number of registers ensuring no space is wasted on the stack when the closure is prepared for execution. * stdlib/compiler.tl (compiler optimize): At optimization level 6, insert a call to basic-blocks compact-tregs just before the instruction are pulled out and put through the late peephole pass. * stdlib/optimize.tl (basic-block): New slot, closer. This is pronounced "clozer", as in one who closes. For any basic block that is the head of a closure (the entry point ito the closure code), this slot is set to point to the previous block: the one which ends in the close instruction which creates this closure: the closer. This is important because the close instruction can use t registers for arguments, and those registers belong to the closure. Those argument registers must be included in the renaming. (basic-blocks): New slots closures and cl-hash. The former lists the closure head basic blocks; all the basic blocks which are the entry blocks of a closure. The cl-hash associates each head block with a list of all the blocks (including the head block). (basic-blocks identify-closures): New method. This scans the list of blocks, identifying the closure heads, and associating them with their closers, to establish the bb.closure list. Then for each closure head in the list, the graph is searched to find all the blocks of a closure, and these lists are put into the bb.cl-hash. (basic-block fill-treg-compacting-map): This method scans a basic block, ferreting out all of it t registers, and adds renaming entries for them into a hash that is passed in. If the block is the head of a closure, then the close instruction of the block's closer is also scanned for t registers: but only the arguments, not the destination register. (basic-block apply-treg-compacting-map): This method renames the t register of a block using the renaming map. It follows the registers in the same way as fill-treg-compacting-map, and consquently also goes into the close instruction to rename the argument registers, as necessary. When tweaking the close instruction, it also updates the instruction's number-of-tregs field with the newly calculated number, which comes from the map size. (basic-blocks compact-tregs): This method ties it together, using the above methods to identify the basic blocks belonging to closures, build register renaming maps for them, and hen apply the renaming.
-rw-r--r--stdlib/compiler.tl1
-rw-r--r--stdlib/optimize.tl62
2 files changed, 63 insertions, 0 deletions
diff --git a/stdlib/compiler.tl b/stdlib/compiler.tl
index fadbef32..12e51947 100644
--- a/stdlib/compiler.tl
+++ b/stdlib/compiler.tl
@@ -1634,6 +1634,7 @@
(cond
((>= olev 6)
bb.(merge-jump-thunks)
+ bb.(compact-tregs)
bb.(late-peephole bb.(get-insns)))
(t bb.(get-insns))))
insns)))
diff --git a/stdlib/optimize.tl b/stdlib/optimize.tl
index 7a1a8bc0..01105235 100644
--- a/stdlib/optimize.tl
+++ b/stdlib/optimize.tl
@@ -38,6 +38,7 @@
links
rlinks
insns
+ closer
(:method print (bl stream pretty-p)
(put-string "#S" stream)
@@ -56,6 +57,8 @@
(hash (hash))
(li-hash (hash :eq-based))
list
+ closures
+ (cl-hash (hash))
rescan
recalc
reelim
@@ -635,6 +638,65 @@
,*rest))
(@else else)))
+(defmeth basic-blocks identify-closures (bb)
+ (zap bb.closures)
+ (each ((bl bb.list))
+ (when-match @(end ((close . @nil))) bl.insns
+ (let ((nx bl.next))
+ (set nx.closer bl)
+ (push nx bb.closures))))
+ (upd bb.closures nreverse)
+ (let ((visited (hash :eq-based)))
+ (labels ((visit (bl clhead)
+ (when (test-set [visited bl])
+ (push bl [bb.cl-hash clhead])
+ [mapcar (lop visit clhead) bl.links])))
+ (each ((cb bb.closures))
+ (visit cb cb))))
+ [hash-update bb.cl-hash nreverse])
+
+(defmeth basic-block fill-treg-compacting-map (bl map)
+ (labels ((add-treg (reg)
+ (unless [map reg]
+ (if-match (t @nil) reg
+ (set [map reg] ^(t ,(len map))))))
+ (add-tregs (args)
+ [mapcar add-treg args]))
+ (iflet ((cl bl.closer))
+ (let ((cloinsn (car (last cl.insns))))
+ (add-tregs (cddr cloinsn))))
+ (each ((insn bl.insns))
+ (match-case insn
+ ((close @reg . @nil)
+ (add-treg reg))
+ ((@op . @args)
+ (add-tregs args))))))
+
+(defmeth basic-block apply-treg-compacting-map (bl map)
+ (labels ((fix (arg) [map arg arg])
+ (fix-tregs (args) [mapcar fix args]))
+ (iflet ((cl bl.closer))
+ (match ((close @reg @frsize @ntregs . @rest)) (last cl.insns)
+ (set (last cl.insns)
+ ^((close ,reg ,frsize ,(len map) ,*(fix-tregs rest))))))
+ (set bl.insns (collect-each ((insn bl.insns))
+ (match-case insn
+ ((close @reg . @rest)
+ ^(close ,(fix reg) ,*rest))
+ ((@op . @args)
+ ^(,op ,*(fix-tregs args)))
+ (@else else))))))
+
+(defmeth basic-blocks compact-tregs (bb)
+ bb.(identify-closures)
+ (each ((bl bb.closures))
+ (let ((clist [bb.cl-hash bl]))
+ (let ((map (hash-from-pairs '(((t 0) (t 0)) ((t 1) (t 1))))))
+ (each ((cl clist))
+ cl.(fill-treg-compacting-map map))
+ (each ((cl clist))
+ cl.(apply-treg-compacting-map map))))))
+
(defun rewrite (fun list)
(build
(while* list