From a16a267958fb76b5cb1d7154d7c11800b6daeae5 Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Thu, 25 May 2017 20:31:36 -0700
Subject: ffi: bitfield support.

* ffi.c (sbit_s, ubit_s): New symbol variables.
(struct txr_ffi_type): New members, shift and mask.
(ffi_sbit_put, ffi_sbit_get, ffi_ubit_put, ffi_ubit_get,
bitfield_syntax_p): New static functions.
(make_ffi_type_pointer): Disallow pointers to bitfields.
(make_ffi_type_struct): Process bitfield members and
set up shifts and masks accordingly. Recently introduced
bug fixed here at the same time: the alignment calculation
for each member must be done top-of-loop.
(ffi_struct_compile): Exclude bitfields from the check
against members with zero type. Compile the bitfield syntax.
(ffi_typedef): Do not allow typedefs of bitfield type.
Not only doesn't this make sense, but bitfield types
are destructively modified in make_ffi_type_struct: they
are imbued with a mask and offset tied to their position in
a particular struct.

* ffi.h (sbit_s, ubit_s): Delared.

* txr.1: Documented bitfields.
---
 ffi.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 ffi.h |   2 +
 txr.1 | 129 ++++++++++++++++++++++++++++++++++++
 3 files changed, 341 insertions(+), 26 deletions(-)

diff --git a/ffi.c b/ffi.c
index 59a68482..53a0a18b 100644
--- a/ffi.c
+++ b/ffi.c
@@ -89,6 +89,8 @@ val ptr_in_s, ptr_out_s, ptr_in_d_s, ptr_out_d_s, ptr_out_s_s, ptr_s;
 
 val closure_s;
 
+val sbit_s, ubit_s;
+
 val ffi_type_s, ffi_call_desc_s, ffi_closure_s;
 
 static val ffi_typedef_hash;
@@ -108,6 +110,7 @@ struct txr_ffi_type {
   val syntax;
   val eltype;
   cnum size, align;
+  unsigned shift, mask;
   cnum nelem;
   struct smemb *memb;
   unsigned null_term : 1;
@@ -613,6 +616,91 @@ static val ffi_wchar_get(struct txr_ffi_type *tft, mem_t *src, val self)
   return chr(c);
 }
 
+static void ffi_sbit_put(struct txr_ffi_type *tft, val n,
+                         mem_t *dst, val self)
+{
+  unsigned mask = tft->mask;
+  unsigned sbmask = mask ^ (mask >> 1);
+  int shift = tft->shift;
+  cnum cn = c_num(n);
+  int in = cn;
+  unsigned uput = (((unsigned) in) << shift) & mask;
+
+  if (in != cn)
+    goto range;
+
+  if (uput & sbmask) {
+    int icheck = -(int)(((uput ^ mask) >> shift) + 1);
+    if (icheck != cn)
+      goto range;
+  } else if (uput >> shift != cn) {
+    goto range;
+  }
+
+  {
+    unsigned field = *coerce(unsigned *, dst);
+    field &= ~mask;
+    field |= uput;
+    *coerce(unsigned *, dst) = field;
+  }
+
+  return;
+range:
+  uw_throwf(error_s, lit("~a: value ~s is out of range of "
+                         "signed ~s bit-field"),
+            self, n, num_fast(tft->nelem), nao);
+}
+
+static val ffi_sbit_get(struct txr_ffi_type *tft, mem_t *src, val self)
+{
+  unsigned mask = tft->mask;
+  unsigned sbmask = mask ^ (mask >> 1);
+  int shift = tft->shift;
+  unsigned uget = *coerce(unsigned *, src) & mask;
+
+  if (uget & sbmask)
+    return num(-(int)(((uget ^ mask) >> shift) + 1));
+  return unum(uget >> shift);
+}
+
+static void ffi_ubit_put(struct txr_ffi_type *tft, val n,
+                         mem_t *dst, val self)
+{
+  unsigned mask = tft->mask;
+  int shift = tft->shift;
+  ucnum cn = c_unum(n);
+  unsigned un = cn;
+  unsigned uput = (un << shift) & mask;
+
+  if (un != cn)
+    goto range;
+
+  if (uput >> shift != cn)
+    goto range;
+
+  {
+    unsigned field = *coerce(unsigned *, dst);
+    field &= ~mask;
+    field |= uput;
+    *coerce(unsigned *, dst) = field;
+  }
+
+  return;
+
+range:
+  uw_throwf(error_s, lit("~a: value ~s is out of range of "
+                         "unsigned ~s bit-field"),
+            self, n, num_fast(tft->nelem), nao);
+}
+
+static val ffi_ubit_get(struct txr_ffi_type *tft, mem_t *src, val self)
+{
+  unsigned mask = tft->mask;
+  int shift = tft->shift;
+  unsigned uget = *coerce(unsigned *, src) & mask;
+  return unum(uget >> shift);
+}
+
 static void ffi_cptr_put(struct txr_ffi_type *tft, val n, mem_t *dst,
                          val self)
 {
@@ -1443,6 +1531,16 @@ static void ffi_carray_put(struct txr_ffi_type *tft, val carray, mem_t *dst,
   *coerce(mem_t **, dst) = p;
 }
 
+static val bitfield_syntax_p(val syntax)
+{
+  if (!consp(syntax)) {
+    return nil;
+  } else {
+    val sym = car(syntax);
+    return tnil(sym == sbit_s || sym == ubit_s);
+  }
+}
+
 static val make_ffi_type_builtin(val syntax, val lisp_type,
                                  cnum size, cnum align, ffi_type *ft,
                                  void (*put)(struct txr_ffi_type *,
@@ -1481,26 +1579,34 @@ static val make_ffi_type_pointer(val syntax, val lisp_type,
                                                  val obj, mem_t *dst),
                                  val tgtype)
 {
-  struct txr_ffi_type *tft = coerce(struct txr_ffi_type *,
-                                    chk_calloc(1, sizeof *tft));
+  val self = lit("ffi-type-compile");
+  struct txr_ffi_type *tgtft = ffi_type_struct(tgtype);
 
-  val obj = cobj(coerce(mem_t *, tft), ffi_type_s, &ffi_type_ptr_ops);
+  if (tgtft->size == 0 && bitfield_syntax_p(tgtft->syntax)) {
+    uw_throwf(error_s, lit("~a: type combination ~s not allowed"),
+              self, syntax, nao);
+  } else {
+    struct txr_ffi_type *tft = coerce(struct txr_ffi_type *,
+                                      chk_calloc(1, sizeof *tft));
 
-  tft->ft = &ffi_type_pointer;
-  tft->syntax = syntax;
-  tft->lt = lisp_type;
-  tft->size = sizeof (mem_t *);
-  tft->align = alignof (mem_t *);
-  tft->put = put;
-  tft->get = get;
-  tft->eltype = tgtype;
-  tft->in = in;
-  tft->out = out;
-  tft->release = release;
-  tft->alloc = ffi_fixed_alloc;
-  tft->free = free;
+    val obj = cobj(coerce(mem_t *, tft), ffi_type_s, &ffi_type_ptr_ops);
 
-  return obj;
+    tft->ft = &ffi_type_pointer;
+    tft->syntax = syntax;
+    tft->lt = lisp_type;
+    tft->size = sizeof (mem_t *);
+    tft->align = alignof (mem_t *);
+    tft->put = put;
+    tft->get = get;
+    tft->eltype = tgtype;
+    tft->in = in;
+    tft->out = out;
+    tft->release = release;
+    tft->alloc = ffi_fixed_alloc;
+    tft->free = free;
+
+    return obj;
+  }
 }
 
 static val make_ffi_type_struct(val syntax, val lisp_type,
@@ -1519,6 +1625,8 @@ static val make_ffi_type_struct(val syntax, val lisp_type,
   ucnum offs = 0;
   ucnum most_align = 0;
   int need_out_handler = 0;
+  int bit_offs = 0;
+  const int bits_int = 8 * sizeof(int);
 
   ft->type = FFI_TYPE_STRUCT;
   ft->size = 0;
@@ -1539,26 +1647,77 @@ static val make_ffi_type_struct(val syntax, val lisp_type,
     val type = pop(&types);
     val slot = pop(&slots);
     struct txr_ffi_type *mtft = ffi_type_struct(type);
-    cnum align = mtft->align;
     cnum size = mtft->size;
-    ucnum almask = align - 1;
 
     elements[i] = mtft->ft;
 
     memb[i].mtype = type;
     memb[i].mname = slot;
     memb[i].mtft = mtft;
-    memb[i].offs = offs;
 
-    if (align > most_align)
-      most_align = align;
+    if (size == 0 && bitfield_syntax_p(mtft->syntax)) {
+      int bits = mtft->nelem;
+      int room = bits_int - bit_offs;
 
-    need_out_handler = need_out_handler || mtft->out != 0;
+      if (bits == 0) {
+        if (bit_offs > 0) {
+          offs += sizeof (int);
+          bit_offs = 0;
+        }
+        nmemb--, i--;
+        continue;
+      }
+
+      if (bit_offs == 0) {
+        ucnum almask = alignof (int) - 1;
+        offs = (offs + almask) & ~almask;
+      }
+
+      if (most_align < alignof (int))
+        most_align = alignof (int);
+
+      if (bits > room) {
+        offs += sizeof (int);
+        bit_offs = 0;
+      }
 
-    offs += size;
-    offs = (offs + almask) & ~almask;
+      memb[i].offs = offs;
+
+#if HAVE_LITTLE_ENDIAN
+      mtft->shift = bit_offs;
+#else
+      mtft->shift = bits_int - bit_offs - bits;
+#endif
+      if (bits == bits_int)
+        mtft->mask = UINT_MAX;
+      else
+        mtft->mask = ((1U << bits) - 1) << mtft->shift;
+      bit_offs += bits;
+    } else {
+      cnum align = mtft->align;
+      ucnum almask = align - 1;
+
+      if (bit_offs > 0) {
+        offs += sizeof (int);
+        bit_offs = 0;
+      }
+
+      offs = (offs + almask) & ~almask;
+      memb[i].offs = offs;
+      offs += size;
+
+      if (align > most_align)
+        most_align = align;
+    }
+
+    need_out_handler = need_out_handler || mtft->out != 0;
   }
 
+  if (bit_offs > 0)
+    offs += sizeof (int);
+
+  tft->nelem = i;
+
   elements[i] = 0;
 
   if (need_out_handler)
@@ -1633,7 +1792,7 @@ static val ffi_struct_compile(val membs, val *ptypes, val self)
     if (cddr(mp))
       uw_throwf(error_s, lit("~a: excess elements in type-member pair ~s"),
                 self, mp, nao);
-    if (ctft->size == 0)
+    if (ctft->size == 0 && !bitfield_syntax_p(ctft->syntax))
       uw_throwf(error_s, lit("~a: incomplete type ~s cannot be struct member"),
                 self, type, nao);
     pttail = list_collect(pttail, comp_type);
@@ -1797,7 +1956,25 @@ val ffi_type_compile(val syntax)
       return make_ffi_type_pointer(syntax, carray_s,
                                    ffi_carray_put, ffi_carray_get,
                                    0, 0, 0, eltype);
+    } else if (sym == sbit_s || sym == ubit_s) {
+      val nbits = cadr(syntax);
+      cnum nb = c_num(nbits);
+      val type = make_ffi_type_builtin(syntax, integer_s, 0, 0,
+                                       &ffi_type_void,
+                                       if3(sym == sbit_s,
+                                           ffi_sbit_put, ffi_ubit_put),
+                                       if3(sym == sbit_s,
+                                           ffi_sbit_get, ffi_ubit_get));
+      struct txr_ffi_type *tft = ffi_type_struct(type);
+      const int bits_int = 8 * sizeof(int);
+      if (nb < 0 || nb > bits_int)
+        uw_throwf(error_s, lit("~a: invalid bitfield size ~s; "
+                               "must be 0 to ~s"),
+                  self, nbits, num_fast(bits_int), nao);
+      tft->nelem = c_num(nbits);
+      return type;
     }
+
     uw_throwf(error_s, lit("~a: unrecognized type operator: ~s"),
               self, sym, nao);
   } else {
@@ -2422,6 +2599,11 @@ mem_t *ffi_closure_get_fptr(val closure)
 
 val ffi_typedef(val name, val type)
 {
+  val self = lit("ffi-typedef");
+  struct txr_ffi_type *tft = ffi_type_struct_checked(type);
+  if (tft->size == 0 && bitfield_syntax_p(tft->syntax))
+    uw_throwf(error_s, lit("~a: cannot create a typedef for bitfield type"),
+              self, nao);
   return sethash(ffi_typedef_hash, name, type);
 }
 
@@ -2836,6 +3018,8 @@ void ffi_init(void)
   ptr_out_s_s = intern(lit("ptr-out-s"), user_package);
   ptr_s = intern(lit("ptr"), user_package);
   closure_s = intern(lit("closure"), user_package);
+  sbit_s = intern(lit("sbit"), user_package);
+  ubit_s = intern(lit("ubit"), user_package);
   ffi_type_s = intern(lit("ffi-type"), user_package);
   ffi_call_desc_s = intern(lit("ffi-call-desc"), user_package);
   ffi_closure_s = intern(lit("ffi-closure"), user_package);
diff --git a/ffi.h b/ffi.h
index c6a7102a..5d89e452 100644
--- a/ffi.h
+++ b/ffi.h
@@ -51,6 +51,8 @@ extern val ptr_in_s, ptr_out_s, ptr_in_d_s, ptr_out_d_s, ptr_out_s_s, ptr_s;
 
 extern val closure_s;
 
+extern val sbit_s, ubit_s;
+
 extern val ffi_type_s, ffi_call_desc_s, ffi_closure_s;
 
 val ffi_type_compile(val syntax);
diff --git a/txr.1 b/txr.1
index 0acaa2a0..5eb9e9fe 100644
--- a/txr.1
+++ b/txr.1
@@ -53770,6 +53770,11 @@ anonymous padding member simply generates a skip of the number of byte
 corresponding to the size of its type, plus any necessary additional padding
 for the alignment of the subsequent member.
 
+Structure members may be bitfields, which are described using the
+.code ubit
+and
+.code sbit
+compound type operators.
 .meIP (array < dim << type )
 The FFI
 .code array
@@ -54114,6 +54119,130 @@ object.
 
 The get semantics retrieves a Lisp value without freeing.
 
+.meIP ({ubit | sbit} << width )
+The
+.code ubit
+and
+.code sbit
+types denote C language style bitfields. These types can only appear
+as members of structures. A bitfield type cannot be the argument or return
+value of a foreign function or closure, and cannot be a foreign variable.
+Arrays of bitfields and pointers, of any kind, to bitfields are a forbidden
+type combination that is rejected by the type system.
+
+The
+.code ubit
+type denotes a bitfield of type
+.codn uint ,
+corresponding to an
+.code unsigned
+bitfield in the C language.
+
+The
+.code sbit
+type denotes a bitfield of type
+.codn int .
+Unlike in the C language, it is not implementation-defined whether such
+a bit-field represents signed values; it converts between Lisp integers
+that may be positive or negative, and a foreign representation which is
+two's complement.
+
+Bitfields of any other type are not supported.
+
+The
+.meta width
+parameter of the type indicates the number of bits. It may range from
+zero to the number of bits in the
+.code uint
+type.
+
+In a structure, bitfields are allocated out in storage units which have the
+same width and alignment requirements as a
+.codn uint .
+These storage units themselves can be regarded as anonymous members of the
+structure.  When a new unit needs to be allocated in a structure to hold
+bitfields, it is allocated in the same manner as a named member of type
+.code uint
+would be at the same position.
+
+If a bitfield with non-zero width is not preceded by any non-zero-width
+bitfield, then a new unit is allocated, and the bitfield is placed into the
+first available position in that unit.  On a big-endian machine, the first
+available position starts at the most significant bit of the underlying
+storage word.  On a little-endian machine, the first available bit position
+is the least significant bit of the storage word.
+
+If a non-zero-width bitfield is preceded by a non-zero-width bitfield, then
+the new bitfield is packed into the same storage unit as that bitfield if
+there is enough remaining room in that unit. Otherwise, it is placed into a
+new unit. Bitfields are not split across unit boundaries.
+
+A zero-length bitfield is permitted. It may be given a name, but the field
+will not perform any conversions to and from the corresponding slot in the
+Lisp structure.  Note that the FFI struct definition itself causes the
+corresponding Lisp structure type to come into existence, then the Lisp
+structure type will have slots for all the zero width named bitfields,
+even though those slots don't participate in any conversions in conjunction
+with the FFI type.
+
+A zero-length bitfield functions in a similar manner in the FFI type
+system as in the C language.  If it is placed between two bitfields, then it
+forces them to be in separate storage units. That is to say, the bitfield
+which follows is placed into a new storage unit, even if the previous
+bitfield leaves enough room in it storage unit. 
+
+A zero-length bitfield that does not appear between non-zero-length
+bitfields has no effect.
+
+A
+.code ubit
+field stores values which follow a pure binary enumeration. For instance,
+a bit field of width 4 stores values from 0 to 15. On conversion from
+the Lisp structure to the foreign structure, the corresponding member
+must be a integer value in this range, or an error exception is thrown.
+
+On conversion from the foreign representation to Lisp, the integer
+corresponding to the bit pattern is recovered. Bitfields follow the
+bit order of the underlying storage word. That is to say, the most
+significant binary digit of the bitfield is the one which is closest
+to the the most significant bit of the underlying storage unit.
+If a four-bit field is placed into an empty storage unit and the value
+8 its stored, then on a big-endian machine, this has the effect of
+setting to 1 the most significant bit of the underlying storage word.
+On a little-endian machine, it has the effect of setting bit 3 of
+the word (where bit 0 is the least significant bit).
+
+The
+.code sbit
+field creates a correspondence between a range of Lisp integers,
+and a foreign representation based on the two's complement system.
+The most significant bit of the bit field functions as a sign bit.
+Values whose most significant bit is clear are positive, and use
+a pure binary representation just like their
+.code ubit
+counterparts. The representation of negative values is defined
+by the "two's complement" operation, which maps each value to
+its additive inverse. The operation consists of temporarily treating the
+entire bitfield as unsigned, and inverting the logical value of all the
+bits, and then adding 1 with "wrap-around" to zero if 1 is added to a field
+consisting of all 1 bits. (Thus zero maps to zero, as expected).
+An anomaly in the two's complement system is that the most negative
+value has no positive counterpart. The two's complement operation
+on the most negative value produces that same value itself.
+
+A
+.code sbit
+field of width 1
+can only store two values: -1 and 0, represented by the bit patterns
+1 and 0. An attempt to convert any other integer value to a
+.code sbit
+field of width 1 results in an error.
+
+A
+.code sbit
+field of width 2 can represent the values -2, -1, 0 and 1, which are
+stored as the bit patterns 10, 11, 00 and 01, respectively.
+
 .meIP ({buf | buf-d} << size )
 The parametrized
 .code buf
-- 
cgit v1.2.3