summaryrefslogtreecommitdiffstats
path: root/libtags.txr
blob: 6117890cc467336497cdbe1d9ac5fe996854f1e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
#!/usr/bin/env txr
@(mdo
  ;; TODO
  ;; #. etags support (update to new hash table format, etc.).
  ;; #. Remove obsolete symbols (flip, etc.).
  ;; #. Update to the new compat handling. There may be fewer if3s in the reg calls.
  ;; #. merge into tags.tl the writing functions.
  ;; #. handle reg_vars, etc. need to handle the reg_varl(sym,val) which is in the definition of reg_var.
  ;;    reg_var(sym, obj) too, with the -b option.
  ;;    (there are many cases where the reg_vars are used to initialize some variables.
  ;;    need special handling, probably.)
  ;; #. others of my todos.
  ;;
  ;; #. *args-eff* not stored in var, so getting doubly stored.
  ;;
  ;; #. make sure some hard-to-determine things are printed to stderr,
  ;; like the multiple occurrences of idents in the existing tags file.
  ;; makes it easier to handle changes in the C source.
  ;;
  ;; #. Check if it still allows us to navigate to the right tags (e.g., acons-new vs acons_new).

  ;; FIXME Why is .l getting added.
  ;; and .y too.
  ;; with only *.c pattern.
  ;; Should I add .shipped ones?
  ;; probably better to assume maintainer, since libtags.txr is for
  ;; developers of txr.
  ;; so check the .l, .y, yy.tab.c, etc.
  ;; not the .shipped ones.

  ;; libtags.txr can tag the source even without an existing tags file.
  ;; It will just be less accurate.
  ;;
  ;; libtags.txr automatically ignores lines with any comments, so
  ;; adding "/* OBS */" to for example num-chr does remove it.
  ;; However, there may be no other comments, otherwise it will miss
  ;; them.
  ;;
  ;; Even if some vars like path_sep_chars are declared and initialized
  ;; at the same time, better to just tag the reg_var call directly.
  ;; The user can navigate manually to the var in question; there would
  ;; be too many useless false positives.
  ;; also when they get assigned, like stderr_s, etc.
  ;; *maybe* we can have a list of exceptions, of which path_sep_chars
  ;; can be a part of.
  ;; but even then, it's being assigned in a static_str, so that may
  ;; be interesting to know.
  ;;
  ;; actually, completely unnecessary to specify that type is intrinsic,
  ;; because they are in c files.
  ;; any c files with such a tag is an intrinsic.

  ;; Note that libtags.txr should be run from the TXR source tree.
  ;; Because it globs for *.c.

  (defvar *tags-lib*)

  (let ((*tags-lib* t))
    (load "txrtags"))

  (define-option-struct libtags-opts tags-opts
    (v verbose :bool "Print diagnostic messages during processing."))

  (defvarl output)
  (defvarl emacs)
  (defvarl verbose)

  (defvarl ix-tags (hash :equal-based))
  ;; FIXME Rename to sym-vars?
  (defvarl var-syms (hash :equal-based))
  (defvarl fun-vars (hash :equal-based))

  (defun update-ix-tags (tag newkey : oldkey)
    ;; Remove the var_s tag because we need not tag it if we have the
    ;; actual function.
    (when oldkey
      (del [ix-tags oldkey]))
    (upd [ix-tags newkey] (append (sys:var 1) (list tag))))

  (defun qualify-sym (sym pkg)
    (join (casequal pkg
            ("user_package" "")
            ("system_package" "sys:")
            ("keyword_package" ":")
            (t (when verbose
                 (put-line `@sym: in unknown package @pkg` *stderr*))
               pkg))
          sym))

  (defun op-error-fun-p (fun)
    (mequal fun
            "op_error" "op_meta_error"
            "op_qquote_error" "op_unquote_error")))
@(do
   (let ((o (new libtags-opts)))
     o.(getopts *args*)
     (set output (cond (o.output o.output)
                   (o.emacs "TAGS")
                   (t "tags")))
     (set emacs o.emacs)
     (set verbose o.verbose)))
@(bind var_s #/\w[\w\d]*_s/)
@(bind cident #/\w[\w\d]*/)
@(bind regfun #/reg_(op|mac|fun)/)
@(bind regvar #/reg_(varl?|symacro)|ffi_typedef/)
@(bind lpar "(")
@(bind rpar ")")
@;;
@(define get-interned-sym (sym))@\
@  (local lit pkg)intern(lit("@lit"), @{pkg cident})@\
@  (bind sym @(qualify-sym lit pkg))@\
@(end)
@;;
@(define get-fun (fun))@\
@  func_@/[\w\d]+/(@{fun cident}@(maybe), @/\d+/@(end))@\
@(end)
@;;
@(define get-sym-fun (fun))@\
@  (local fun-var)@\
@  (cases)func_@/[\w\d]+/(if3(opt_compat && opt_compat <= @/\d+/, @\
@                             cident, @{fun cident})@(maybe), @/\d+/@(end))@\
@  ;; TODO There may be missing cases here of func_ with compat opt handling.
@  (or)@(get-fun fun)@\
@  (or)@{fun-var cident}@\
@    (do (when verbose
           (unless (or (starts-with "op_" fun-var)
                       [fun-vars fun-var])
             (put-line `@{fun-var}: undefined function variable` *stderr*))))@\
@    (bind fun @(or [fun-vars fun-var] fun-var))@\
@  (end)@\
@(end)
@;;
@(define get-sym-fun (fun))
@  (cases)
@    / +/@(get-sym-fun fun)@rpar;
@  (or)
@    / +/func_@/[\w\d]+/(if3(opt_compat && opt_compat <= @/\d+/,
@    (cases)
@      ;; For abs-path-p.
@      / +/@cident, @{fun cident})@(maybe), @/\d+/@(end))@rpar;
@    (or)
@      ;; For lexical-var-p.
@      / +/@cident,
@      / +/@{fun cident}@rpar@(maybe), @/\d+/@(end)@rpar@rpar;
@    (end)
@  (or)
@    ;; For match-regex, match-regex-right and match-regst-right.
@    / +/func_@/[\w\d]+/((opt_compat && opt_compat <= @/\d+/) ?
@                    / +/@cident : @{fun cident}@(maybe), @/\d+/@(end))@rpar;
@  (end)
@(end)
@(define get-file-ix-tags (file))
@  (next file)
@  (collect)
@    (local sym var fun pkg)
@    (all)
@      line
@    (and)
@      (cases)
@        / +/@(maybe)val @(end)@{var cident} = @(get-fun fun);
@        (do (set [fun-vars var] fun))
@      (or)
@        / +/@(maybe)val @(end)@{var var_s} = @(get-interned-sym sym);
@        (do (if [var-syms var]
               (when verbose
                 (put-line `@var: reassigned variable` *stderr*))
               (progn
                 (iflet ((tags [ix-tags var]))
                   ;; The variable is declared later in our search.
                   (progn
                     (each ((tag tags))
                       (let ((old-ident tag.ident))
                         (set tag.ident sym)
                         (typecase tag
                           (fun-tag (update-ix-tags tag old-ident))
                           (var-tag (update-ix-tags tag sym))
                           (t
                            ;; This would be a bug in libtags.txr,
                            ;; so print it regardless of --verbose.
                            (put-line `@(struct-type-name tag): unexpected struct type`
                                      *stderr*)))))
                     (del [ix-tags var]))
                   ;; We may not find a corresponding C function or
                   ;; variable (either because of missing patterns in
                   ;; libtags.txr, or accidental omissions in the C
                   ;; source), in which case we will just tag the line of
                   ;; the var_s assignment.
                   (set [ix-tags var] (list (new tag
                                                 ident sym
                                                 path file
                                                 line line))))
                 ;; Keep track of the symbols, because when we find a
                 ;; symbol corresponding to the above tag to insert into
                 ;; ix-tags, we remove the above tag, but some symbols
                 ;; are multiply bound, for example ‘and’ which is both
                 ;; an operator and a function.
                 (set [var-syms var] sym))))
@      (or)
@        (cases)
@          / +/@regfun(@{var var_s}, @(get-sym-fun fun));
@        (or)
@          / +/@regfun(@{var var_s},
@                  / +/@(get-sym-fun fun));
@        (end)
@        (do
          ;; op_error and company appear only in the var_s cases
          ;; (because otherwise the interned symbol would be used
          ;; only for throwing an error).
          (unless (op-error-fun-p fun)
            (iflet ((sym [var-syms var])
                    ;; We store the path and line in case there is no such
                    ;; tagged function or variable in the tags file, so
                    ;; that we can still jump to the line where the symbol
                    ;; was interned.
                    (tag (new fun-tag
                              ident (or sym fun)
                              path file
                              line line))
                    ((have sym)))
              (update-ix-tags tag fun var)
              (update-ix-tags tag var))))
@      (or)
@        (cases)
@          / +/@regfun(@(get-interned-sym sym), @(get-sym-fun fun));
@          (bind var nil)
@        (or)
@          / +/@regfun@lpar@(get-interned-sym sym),
@          (get-sym-fun fun)
@          (bind var nil)
@        (or)
@          ;; The assignment form always spans two or more lines.
@          / +/@regfun(@{var var_s} = @(get-interned-sym sym),
@                  / +/@(get-sym-fun fun));
@        (or)
@          / +/@regfun(@{var var_s} = intern(lit("@lit"),
@                  / +/@{pkg cident}), @(get-sym-fun fun));
@          (bind sym @(qualify-sym lit pkg))
@        (end)
@        (do (when var
               (if [var-syms var]
                 (when verbose
                   (put-line `@var: reassigned variable` *stderr*))
                 (set [var-syms var] sym)))
             (update-ix-tags (new fun-tag
                                  ident sym
                                  path file
                                  line line)
                             fun var))
@      (or)
@        (cases)
@          / +/@regvar(@{var var_s}, @(skip));
@        (or)
@          ;; Cannot add a comma after the skip because the line
@          ;; contains many comma.
@          / +/ffi_typedef@lpar@{var var_s}, @(skip)
@        (end)
@        (do (iflet ((sym [var-syms var])
                     (tag (new var-tag
                               ;; The var value is not used.
                               ;; (except in debugging, to print the undefined
                               ;; variables.)
                               ident (or sym var)
                               path file
                               line line))
                     ((have sym)))
               ;; FIXME Makes sense to have a list here?
               ;; Or just following along with the style for funs?
               (update-ix-tags tag
                               ;; Doesn't matter if we set the hash table
                               ;; key to sym, because in the output we
                               ;; separate the tags based on var-tag or
                               ;; tag/fun-tag.
                               ;; Only fun-tags need to have a key that
                               ;; corresponds to the existing tags.
                               sym var)
               (update-ix-tags tag var)))
@      (or)
@        (cases)
@          / +/@regvar(@(get-interned-sym sym), @(skip));
@          (bind var nil)
@        (or)
@          / +/@regvar@lpar@(get-interned-sym sym),
@          (bind var nil)
@        (or)
@          / +/@regvar(@{var var_s} = @(get-interned-sym sym), @(skip));
@        (or)
@          / +/@regvar@lpar@{var var_s} = @(get-interned-sym sym),
@        (or)
@          / +/@regvar(@{var var_s} = intern(lit("@lit"),
@                  / +/@{pkg cident}), @(skip));
@          (bind sym @(qualify-sym lit pkg))
@        (or)
@          / +/@regvar@lpar@{var var_s} = intern(lit("@lit"),
@                      / +/@{pkg cident}),
@          (bind sym @(qualify-sym lit pkg))
@        (end)
@        (do (when var
               (if [var-syms var]
                 (when verbose
                   (put-line `@var: reassigned variable` *stderr*))
                 (set [var-syms var] sym)))
             (update-ix-tags (new var-tag
                                  ident sym
                                  path file
                                  line line)
                             sym var))
@      (end)
@    (end)
@  (end)
@(end)
@;; Move lib.c to the front, because many _f variables are
@;; defined there before being used elsewhere, for example in eval.c.
@;; (And with such an order, none are ever defined after being used.)
@;; However, if we ever need to do it, we could add the tags in question
@;; to another hash table and replace the _f variables in question as soon
@;; as we found suitable candidates.
@(next :list (cons "lib.c" (remqual "lib.c"
                                    (command-get-lines "git ls-files '*.c'"))))
@(repeat)
@  file.c
@  (get-file-ix-tags `@file.c`)
@(end)
@(do
  ;; Is nshuffle even getting detected as possible duplicate?
  ;; FIXME The eval.c compat handling has changed.
  ;; Need to update that.

  (when nil
    (dohash (ident tag ix-tags)
      (when (mequal tag.ident
                    ;; Obsolete symbols.
                    "flip" "slot-p")
        (del [ix-tags ident]))))

  ;; TODO what if sym has _s? shouldn't happen, but could be more
  ;; robust by using another kind of key. like (list ident "foo").

  (when verbose
    (let ((alist (keep-if (op ends-with "_s" (first @1))
                          (hash-pairs ix-tags)))
          (undefined nil)
          (missing nil))
      (each ((pair alist))
        (tree-bind (key tags) pair
          (each ((tag tags))
            (typecase tag
              (fun-tag (push (list key (join "(" tag.ident ")")) undefined))
              (var-tag (push (list key) undefined))
              (tag (push (list key (join "(" tag.ident ")")) missing))
              (t (put-line `@(struct-type-name tag): unexpected struct type`
                           *stderr*))))))
      (upd undefined (nsort @1 : car))
      (upd missing (nsort @1 : car))
      (mapdo (op put-line `@(cat-str @1 " "): undefined variable`
                 *stderr*)
             undefined)
      (mapdo (op put-line `@(cat-str @1 " "): no corresponding function or value`
                 *stderr*)
             missing)
      (put-line `@(len undefined) undefined variables` *stderr*)
      (put-line `@(len missing) missing corresponding functions` *stderr*)
      (let ((vals (flow ix-tags hash-values flatten (nsort @1 : .ident))))
        (put-line `@(len vals) ix-tags` *stderr*))))

  (defun merge-ix-tags (orig-tags)
    (let ((tags orig-tags)
          ;; Empty if no original tags file.
          (orig-tags (group-by (op identity @1.ident) orig-tags))
          (ix-tags (hash-pairs ix-tags)))
      (each ((pair ix-tags))
        ;; These idents are unique, because we store them into the hash
        ;; table and have lists of ix-tags for each ident.
        (tree-bind (ident ix-tags) pair
          (condlet
            ((((ends-with "_s" ident)))
             ;; We don't want the tag to point to the declaration of the
             ;; variable (which is what would happen if we duplicate the
             ;; tag in the existing tags file), because that is of
             ;; limited usefulness, so fall back to our info.
             (upd tags (revappend ix-tags)))
            (((orig-tags-orig orig-tags)
              (orig-tags (keep-if (andf (op mequal
                                            (short-suffix @1.path)
                                            ;; We know that the identifiers are
                                            ;; in C source (in particular, not
                                            ;; in TXR Lisp source).
                                            ".c" ".l" ".y")
                                        ;; Assume that an open parenthesis means the tagged
                                        ;; identifier is a function.
                                        ;; This way we can skipped tagged struct members, which
                                        ;; can share the same name as an existing function, but
                                        ;; which we want to ignore (because we are tagging Lisp
                                        ;; functions here).
                                        (op find #\( @1.line))
                                  [orig-tags ident])))
             ;; We may tag Lisp identifiers several times (possibly with
             ;; same-named static functions in different compilation
             ;; units, but more likely from #ifdef blocks), but it's the
             ;; best we can do without fully parsing the C.
             (when (and verbose (> (len orig-tags) 1))
               (put-line `@ident: multiple occurrences in tags file` *stderr*))
             (tree-bind (: var-tags other-tags) (separate (op equal @1.type "v")
                                                          ix-tags)
               (upd tags (revappend var-tags))
               (each-prod ((orig-tag orig-tags)
                           (ix-tag other-tags))
                 (unless (equal orig-tag.ident ix-tag.ident)
                   (let ((tag (copy-struct orig-tag)))
                     (set tag.ident ix-tag.ident)
                     (push tag tags))))))
            (t
             (when verbose
               (whenlet ((orig-tags (keep-if (op mequal
                                                 (short-suffix @1.path)
                                                 ".c" ".l" ".y")
                                             [orig-tags-orig ident])))
                 (each-prod ((orig-tag orig-tags)
                             (ix-tag ix-tags))
                   ;; FIXME args (for args_s) is duplicated four times.
                   (put-line `@ident | orig @{orig-tag.ident} @{orig-tag.path} | ix @{ix-tag.ident} @{ix-tag.path}`))))
             ;; The tags file doesn't contain a tag for our ident.
             ;; Fall back to our info.
             (upd tags (revappend ix-tags))))))
      ;; Not nsort so as not to modify orig-tags in caller.
      (sort tags : .ident)))

  ;; We may as well merge the ctags file (i.e., sort the lines), since
  ;; we have to read all the existing lines.
  ;;
  ;; Greatly adjusted from tags.tl.
  ;;
  ;; For some reason, acons-new is found only if it is placed before
  ;; acons_new, and asin is found only if it is placed before asine.
  ;; Is there some alphabetical ordering particularity?
  ;; Will need to resort the ctags since me_op could be associated to op
  ;; or do.
  (defun write-tagfile (ix-tags)
    (let* ((orig-tags (read-tagfile output))
           (ix-tags (merge-ix-tags orig-tags)))
      (with-stream (stream (open-file output "w"))
        (each ((tag ix-tags))
          (put-line tag.(text) stream)))))

  ;; Greatly adjusted from tags.tl.
  ;;
  ;; FIXME incomplete at the moment.
  (defun write-etagfile (ix-tags)
    (let ((orig-tags (read-etagfile output)))
      (upd orig-tags (nsort @1 : car))
      (file-put "tags.out" orig-tags)
      (with-stream (stream (open-file output "w"))
        (each ((pair orig-tags))
          (tree-bind (path . etags) pair
            (let ((str (with-out-string-stream (s)
                         (each ((etag etags))
                           (unless (ends-with "_s" etag.ident)
                             (each ((ix-tag [keep-if (op find etag.ident @1 : .ident)
                                                     ix-tags .ctags]))
                               (put-line `duping @ix-tag`)
                               ;; (put-line `@{lisptag.ident}@[tag.(text) (len tag.ident)..:]`
                               ;;           stream)
                               ))
                           (put-line etag.(etext) s)))))
              (put-string `@{etag-sec-start}\n@{path},@(len str)\n@{str}`
                          stream)))))))

  (if emacs
    (write-etagfile ix-tags)
    (write-tagfile ix-tags)))