/* Copyright 2009-2014
 * Kaz Kylheku <kaz@kylheku.com>
 * Vancouver, Canada
 * All rights reserved.
 *
 * Redistribution of this software in source and binary forms, with or without
 * modification, is permitted provided that the following two conditions are met.
 *
 * Use of this software in any manner constitutes agreement with the disclaimer
 * which follows the two conditions.
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DAMAGES, HOWEVER CAUSED,
 * AND UNDER ANY THEORY OF LIABILITY, ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

%{

#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
#include <dirent.h>
#include <wchar.h>
#include <setjmp.h>
#include <signal.h>
#include "config.h"
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "lib.h"
#include "gc.h"
#include "stream.h"
#include "utf8.h"
#include "signal.h"
#include "unwind.h"
#include "hash.h"
#include "parser.h"
#include "eval.h"
#include "txr.h"
#include "y.tab.h"

#define YY_INPUT(buf, result, max_size)           \
  do {                                            \
    val c = get_byte(yyextra->stream);            \
    int n = 0;                                    \
    if (c)                                        \
      buf[n++] = convert(char, c_num(c));         \
    result = n;                                   \
  } while (0)

int opt_loglevel = 1;   /* 0 - quiet; 1 - normal; 2 - verbose */

val form_to_ln_hash;

#define FLEX_NUM_VERSION 10000*YY_FLEX_MAJOR_VERSION + \
                         100*YY_FLEX_MINOR_VERSION + \
                         YY_FLEX_SUBMINOR_VERSION

#if FLEX_NUM_VERSION < 20509
int yylex_destroy(void)
{
  return 0;
}
#endif

/* Missing prototypes not generated by flex. */
int yyget_column(yyscan_t);
void yyset_column (int column_no, yyscan_t yyscanner);

/* The following function is all that libflex provides.
   By providing it here, we eliminate the need to link libflex. */
#define YY_SKIP_YYWRAP
INLINE int yywrap(yyscan_t scanner)
{
  (void) scanner;
  return 1;
}

void yyerror(scanner_t *scanner, parser_t *parser, const char *s)
{
  yyerrorf(scanner, lit("~a"), string_utf8(s), nao);
  if (parser->prepared_msg) {
    yyerrorf(scanner, lit("~a"), parser->prepared_msg, nao);
    parser->prepared_msg = nil;
  }
}

void yyerrorf(scanner_t *scanner, val fmt, ...)
{
  parser_t *parser = yyget_extra(scanner);

  if (opt_loglevel >= 1) {
    va_list vl;
    va_start (vl, fmt);
    format(std_error, lit("~a: (~a:~a): "), prog_string,
           parser->name, num(parser->lineno), nao);
    vformat(std_error, fmt, vl);
    put_char(chr('\n'), std_error);
    va_end (vl);
  }
  parser->errors++;
}

static void yyerrprepf(scanner_t *scanner, val fmt, ...)
{
  parser_t *parser = yyget_extra(scanner);

  if (opt_loglevel >= 1) {
    va_list vl;
    va_start (vl, fmt);
    parser->prepared_msg = vformat_to_string(fmt, vl);
    va_end (vl);
  }
}

static wchar_t char_esc(int letter)
{
  switch (letter) {
  case ' ': return L' ';
  case 'a': return L'\a';
  case 'b': return L'\b';
  case 't': return L'\t';
  case 'n': return L'\n';
  case 'v': return L'\v';
  case 'f': return L'\f';
  case 'r': return L'\r';
  case 'e': return 27;
  case '"': return L'"';
  case '\'': return L'\'';
  case '`': return L'`';
  case '/': return L'/';
  case '\\': return L'\\';
  }

  internal_error("unhandled escape character");
}

static wchar_t num_esc(scanner_t *scn, char *num)
{
  if (num[0] == 'x') {
    if (strlen(num) > 7)
      yyerror(scn, yyget_extra(scn), "too many digits in hex character escape");
    return strtol(num + 1, 0, 16);
  } else {
    if (num[0] == 'o')
      num++;
    if (strlen(num) > 8)
      yyerror(scn, yyget_extra(scn), "too many digits in octal character escape");
    return strtol(num, 0, 8);
  }
}

%}

%option stack nounput noinput reentrant bison-bridge extra-type="parser_t *"

SYM     [a-zA-Z0-9_]+
SGN     [+\-]
EXP     [eE][+\-]?[0-9]+
DIG     [0-9]
XDIG    [0-9A-Fa-f]
NUM     {SGN}?{DIG}+
FLO     {SGN}?({DIG}*[.]{DIG}+{EXP}?|{DIG}+[.]?{EXP})
FLODOT  {SGN}?{DIG}+[.]
XNUM    #x{SGN}?{XDIG}+
ONUM    #o{SGN}?[0-7]+
BNUM    #b{SGN}?[0-1]+
BSCHR   [a-zA-Z0-9!$%&*+\-<=>?\\_~]
NSCHR   [a-zA-Z0-9!$%&*+\-<=>?\\_~/]
ID_END  [^a-zA-Z0-9!$%&*+\-<=>?\\_~/]
EXTRA   [#^]
TOK     {SYM}
BT0     {BSCHR}({BSCHR}|{EXTRA})*
BT1     @{BT0}+
BT2     ({BSCHR}|{EXTRA})+
BTREG   (({BT0}|{BT1})?:{BT2}|({BT0}|{BT1})(:{BT2})?|:)
BTKEY   @?:{BT2}?
BTOK    {BTREG}|{BTKEY}
NT0     {NSCHR}({NSCHR}|{EXTRA})*
NT1     @{NT0}+
NT2     ({NSCHR}|{EXTRA})+
NTREG   (({NT0}|{NT1})?:{NT2}|({NT0}|{NT1})(:{NT2})?|:)
NTKEY   @?:{NT2}?
NTOK    {NTREG}|{NTKEY}
WS      [\t ]*
HEX     [0-9A-Fa-f]
OCT     [0-7]

REGOP   [/()|.*?+~&%\[\]\-]

ASC     [\x00-\x7f]
ASCN    [\x00-\t\v-\x7f]
U       [\x80-\xbf]
U2      [\xc2-\xdf]
U3      [\xe0-\xef]
U4      [\xf0-\xf4]

UANY    {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UANYN   {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}

%x      SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT QWLIT

%%

<SPECIAL,QSPECIAL,NESTED,BRACED>{NUM} {
  val str = string_own(utf8_dup_from(yytext));

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);

  yylval->val = int_str(str, num(10));
  return NUMBER;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>({XNUM}|{ONUM}|{BNUM}) {
  val str = string_own(utf8_dup_from(yytext + 2));
  int base;

  switch (yytext[1]) {
  case 'x': base = 16; break;
  case 'o': base = 8; break;
  case 'b': default: base = 2; break;
  }

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);

  yylval->val = int_str(str, num_fast(base));
  return NUMBER;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>({BNUM}|{ONUM}|{XNUM}){TOK} {
  int base = 0;
  val str = string_own(utf8_dup_from(yytext + 2));

  switch (yytext[1]) {
  case 'x': base = 16; break;
  case 'o': base = 8; break;
  case 'b': default: base = 2; break;
  }

  yyerrorf(yyg, lit("trailing junk in numeric literal: ~a~a~a"),
           chr(yytext[0]), chr(yytext[1]), str, nao);

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);

  yylval->val = int_str(str, num_fast(base));
  return NUMBER;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>{FLO} {
  val str = string_own(utf8_dup_from(yytext));

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);

  yylval->val = flo_str(str);
  return NUMBER;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>{FLODOT}/[^.] {
  val str = string_own(utf8_dup_from(yytext));

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);

  yylval->val = flo_str(str);
  return NUMBER;
}

<SPECIAL,QSPECIAL>({FLO}|{FLODOT}){TOK} |
<BRACED>({FLO}|{FLODOT}){BTOK} |
<NESTED>({FLO}|{FLODOT}){NTOK} {
  val str = string_utf8(yytext);

  yyerrorf(yyg, lit("trailing junk in floating-point literal: ~a"), str, nao);

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);

  yylval->val = flo_str(str);
  return NUMBER;
}

<NESTED,QSILIT,QWLIT>@{NUM} {
  val str = string_own(utf8_dup_from(yytext + 1));

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);
  yylval->val = int_str(str, num(10));
  return METANUM;
}

<NESTED,QSILIT,QWLIT>@{XNUM} {
  val str = string_own(utf8_dup_from(yytext + 3));

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);
  yylval->val = int_str(str, num(16));
  return METANUM;
}

<NESTED,QSILIT,QWLIT>@{ONUM} {
  val str = string_own(utf8_dup_from(yytext + 3));

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);
  yylval->val = int_str(str, num(8));
  return METANUM;
}

<NESTED,QSILIT,QWLIT>@{BNUM} {
  val str = string_own(utf8_dup_from(yytext + 3));

  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);
  yylval->val = int_str(str, num(2));
  return METANUM;
}

<SPECIAL,QSPECIAL>{TOK} |
<BRACED>{BTOK} |
<NESTED>{NTOK} {
  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);

  yylval->lexeme = utf8_dup_from(yytext);
  return SYMTOK;
}

<SPECIAL>\({WS}all{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return ALL;
}

<SPECIAL>\({WS}some/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return SOME;
}

<SPECIAL>\({WS}none{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return NONE;
}

<SPECIAL>\({WS}maybe{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return MAYBE;
}

<SPECIAL>\({WS}cases{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return CASES;
}

<SPECIAL>\({WS}block/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return BLOCK;
}

<SPECIAL>\({WS}choose/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return CHOOSE;
}

<SPECIAL>\({WS}gather/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return GATHER;
}

<SPECIAL>\({WS}and{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return AND;
}

<SPECIAL>\({WS}or{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return OR;
}

<SPECIAL>\({WS}end{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return END;
}

<SPECIAL>\({WS}collect/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return COLLECT;
}

<SPECIAL>\({WS}coll/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return COLL;
}

<SPECIAL>\({WS}until{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return UNTIL;
}

<SPECIAL>\({WS}output/{ID_END}  {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return OUTPUT;
}

<SPECIAL>\({WS}repeat/{ID_END}  {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return REPEAT;
}


<SPECIAL>\({WS}rep/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return REP;
}

<SPECIAL>\({WS}single{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return SINGLE;
}

<SPECIAL>\({WS}first{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return FIRST;
}

<SPECIAL>\({WS}last{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return LAST;
}

<SPECIAL>\({WS}empty{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return EMPTY;
}

<SPECIAL>\({WS}mod/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return MOD;
}

<SPECIAL>\({WS}modlast/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return MODLAST;
}

<SPECIAL>\({WS}define/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return DEFINE;
}

<SPECIAL>\({WS}try{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return TRY;
}

<SPECIAL>\({WS}catch/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return CATCH;
}

<SPECIAL>\({WS}finally{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return FINALLY;
}

<SPECIAL>\({WS}if/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return IF;
}

<SPECIAL>\({WS}elif/{ID_END} {
  yy_push_state(NESTED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return ELIF;
}

<SPECIAL>\({WS}else{WS}\) {
  yy_pop_state(yyscanner);
  yylval->lineno = yyextra->lineno;
  return ELSE;
}

<SPECIAL,QSPECIAL>[{] {
  yy_push_state(BRACED, yyscanner);
  yylval->lineno = yyextra->lineno;
  return yytext[0];
}

<SPECIAL,QSPECIAL,NESTED,BRACED>[(\[] {
 yy_push_state(NESTED, yyscanner);
 yylval->lineno = yyextra->lineno;
 return yytext[0];
}

<NESTED,BRACED>@ {
  yylval->lineno = yyextra->lineno;
  return yytext[0];
}

<NESTED,QSPECIAL,BRACED>,[*] {
  yylval->chr = '*';
  return SPLICE;
}

<QSPECIAL,NESTED,BRACED>[,'^] {
  yylval->chr = yytext[0];
  return yytext[0];
}

<BRACED>[}] {
  yy_pop_state(yyscanner);
  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);
  return yytext[0];
}

<SPECIAL,QSPECIAL,NESTED>[)\]] {
  yy_pop_state(yyscanner);
  if (yy_top_state(yyscanner) == INITIAL
      || yy_top_state(yyscanner) == QSILIT
      || yy_top_state(yyscanner) == QWLIT)
    yy_pop_state(yyscanner);
  return yytext[0];
}

<SPECIAL,QSPECIAL,NESTED,BRACED>{WS} {
  /* Eat whitespace in directive */
}

<SPECIAL,QSPECIAL,NESTED,BRACED>\" {
  yy_push_state(STRLIT, yyscanner);
  return '"';
}

<SPECIAL,QSPECIAL,NESTED,BRACED>#\\ {
  yy_push_state(CHRLIT, yyscanner);
  return HASH_BACKSLASH;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>#[/] {
  yy_push_state(REGEX, yyscanner);
  return HASH_SLASH;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>` {
  yy_push_state(QSILIT, yyscanner);
  return '`';
}

<SPECIAL,QSPECIAL,NESTED,BRACED>#\" {
  yy_push_state(WLIT, yyscanner);
  return WORDS;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>#\*\" {
  yy_push_state(WLIT, yyscanner);
  return WSPLICE;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>#\` {
  yy_push_state(QWLIT, yyscanner);
  return QWORDS;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>#\*\` {
  yy_push_state(QWLIT, yyscanner);
  return QWSPLICE;
}

<NESTED,BRACED># {
  return '#';
}

<NESTED,BRACED>#H {
  yylval->lineno = yyextra->lineno;
  return HASH_H;
}

<NESTED>\.\. {
  yylval->lineno = yyextra->lineno;
  return DOTDOT;
}

<SPECIAL>@ {
  yy_pop_state(yyscanner);
  yylval->lexeme = chk_strdup(L"@");
  return TEXT;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>\n {
  yyextra->lineno++;
}

<SPECIAL,BRACED>[/] {
  yy_push_state(REGEX, yyscanner);
  return '/';
}

<SPECIAL,QSPECIAL,NESTED>\. {
  yylval->chr = '.';
  return '.';
}

<SPECIAL,QSPECIAL,NESTED,BRACED>[\\]\n{WS} {
  if (YYSTATE == SPECIAL)
    yy_pop_state(yyscanner);  /* @\ continuation */
  yyextra->lineno++;
}

<SPECIAL>[\\][abtnvfre ] {
  wchar_t lexeme[2];
  lexeme[0] = char_esc(yytext[1]);
  lexeme[1] = 0;
  yylval->lexeme = chk_strdup(lexeme);
  yy_pop_state(yyscanner);
  return TEXT;
}

<SPECIAL>[\\](x{HEX}+|{OCT}+) {
  wchar_t lexeme[2];
  lexeme[0] = num_esc(yyg, yytext + 1);
  lexeme[1] = 0;
  yylval->lexeme = chk_strdup(lexeme);
  yy_pop_state(yyscanner);
  return TEXT;
}

<SPECIAL>[\\]. {
  yyerrorf(yyg, lit("unrecognized escape: \\~a"), chr(yytext[1]), nao);
}

<SPECIAL,QSPECIAL,NESTED,BRACED>[;].* {
  /* comment */
}

<SPECIAL,QSPECIAL,NESTED,BRACED>{UANYN} {
  yyerrprepf(yyg, lit("bad character in directive: '~a'"),
             string_utf8(yytext), nao);
  return ERRTOK;
}

<SPECIAL,QSPECIAL,NESTED,BRACED>. {
  yyerrprepf(yyg, lit("non-UTF-8 byte in directive: '\\x~02x'"),
             num(convert(unsigned char, yytext[0])), nao);
  return ERRTOK;
}

<REGEX>[/] {
  yylval->chr = '/';
  return '/';
}


<REGEX>[\\][abtnvfre\\ ] {
  yylval->chr = char_esc(yytext[1]);
  return REGCHAR;
}

<REGEX>[\\](x{HEX}+|{OCT}+);? {
  yylval->chr = num_esc(yyg, yytext + 1);
  return REGCHAR;
}

<REGEX>[\\][sSdDwW] {
  yylval->chr = yytext[1];
  return REGTOKEN;
}

<REGEX>{WS}[\\]\n{WS} {
  yyextra->lineno++;
}

<REGEX>\n {
  yyextra->lineno++;
  yyerrprepf(yyg, lit("newline in regex"), nao);
  return ERRTOK;
}

<REGEX>{REGOP} {
  yylval->chr = yytext[0];
  return yytext[0];
}

<REGEX>[\\]{REGOP} {
  yylval->chr = yytext[1];
  return REGCHAR;
}

<REGEX>[\\]. {
  if (opt_compat && opt_compat <= 105) {
    yylval->chr = yytext[1];
    return REGCHAR;
  }
    
  yyerrprepf(yyg, lit("unrecognized escape in regex"), nao);
  return ERRTOK;
}

<REGEX>[\\] {
  yyerrprepf(yyg, lit("dangling backslash in regex"), nao);
  return ERRTOK;
}

<REGEX>{UANYN}  {
  wchar_t buf[8];
  utf8_from(buf, yytext);
  yylval->chr = buf[0];
  return REGCHAR;
}

<REGEX>. {
  yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"),
             num(convert(unsigned char, yytext[0])), nao);
  return ERRTOK;
}

<INITIAL>[ ]+ {
  yylval->lexeme = utf8_dup_from(yytext);
  return SPACE;
}

<INITIAL>({UONLY}|[^@\n ])+ {
  yylval->lexeme = utf8_dup_from(yytext);
  return TEXT;
}

<INITIAL>\n {
  yyextra->lineno++;
  return '\n';
}

<INITIAL>@{WS}\* {
  yy_push_state(SPECIAL, yyscanner);
  return '*';
}

<INITIAL>@ {
  yy_push_state(SPECIAL, yyscanner);
}

<INITIAL>@\x01R {
  yy_push_state(REGEX, yyscanner);
  return SECRET_ESCAPE_R;
}

<INITIAL>@\x01E {
  yy_push_state(SPECIAL, yyscanner);
  yy_push_state(NESTED, yyscanner);
  return SECRET_ESCAPE_E;
}

<INITIAL>^@[#;].*\n {
  /* eat whole line comment */
  yyextra->lineno++;
}

<INITIAL>@[#;].* {
  /* comment to end of line */
}

<STRLIT,WLIT>\" {
  yy_pop_state(yyscanner);
  return yytext[0];
}

<QSILIT,QWLIT>\` {
  yy_pop_state(yyscanner);
  return yytext[0];
}

<STRLIT,QSILIT,WLIT,QWLIT>[\\][abtnvfre "`'\\ ] {
  yylval->chr = char_esc(yytext[1]);
  return LITCHAR;
}

<STRLIT,QSILIT,WLIT,QWLIT>{WS}[\\]\n{WS} {
  yyextra->lineno++;
}

<STRLIT,QSILIT,WLIT,QWLIT>[\\](x{HEX}+|{OCT}+);?  {
  yylval->chr = num_esc(yyg, yytext+1);
  return LITCHAR;
}

<STRLIT,QSILIT,WLIT,QWLIT>[\\]. {
  yyerrorf(yyg, lit("unrecognized escape: \\~a"), chr(yytext[1]), nao);
}

<CHRLIT>(x{HEX}+|o{OCT}+) {
  yylval->chr = num_esc(yyg, yytext);
  return LITCHAR;
}

<CHRLIT>{SYM} {
  yylval->lexeme = utf8_dup_from(yytext);
  return SYMTOK;
}

<CHRLIT>[^ \t\n] {
  yylval->lexeme = utf8_dup_from(yytext);
  return SYMTOK; /* hack */
}

<STRLIT>\n {
  yyerrprepf(yyg, lit("newline in string literal"), nao);
  yyextra->lineno++;
  yylval->chr = yytext[0];
  return ERRTOK;
}

<CHRLIT>\n {
  yyerrprepf(yyg, lit("newline in character literal"), nao);
  yyextra->lineno++;
  yylval->chr = yytext[0];
  return ERRTOK;
}

<QSILIT>\n {
  yyerrprepf(yyg, lit("newline in string quasiliteral"), nao);
  yyextra->lineno++;
  yylval->chr = yytext[0];
  return ERRTOK;
}

<WLIT,QWLIT>\n {
  yyextra->lineno++;
  return ' ';
}

<QSILIT,QWLIT>@ {
  yy_push_state(QSPECIAL, yyscanner);
}

<WLIT,QWLIT>{WS} {
  return ' ';
}

<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} {
  wchar_t buf[8];
  utf8_from(buf, yytext);
  yylval->chr = buf[0];
  return LITCHAR;
}

<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>. {
  yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"),
             num(convert(unsigned char, yytext[0])), nao);
  return ERRTOK;
}

%%

void end_of_regex(scanner_t *yyg)
{
  if (YYSTATE != REGEX)
    internal_error("end_of_regex called in wrong scanner state");

  yy_pop_state(yyg);

  if (YYSTATE != INITIAL) {
    if (yy_top_state(yyg) == INITIAL
      || yy_top_state(yyg) == QSILIT
      || yy_top_state(yyg) == QWLIT)
      yy_pop_state(yyg);
  }
}

void end_of_char(scanner_t *yyg)
{
  if (YYSTATE != CHRLIT)
    internal_error("end_of_char called in wrong scanner state");

  yy_pop_state(yyg);
}

val source_loc(val form)
{
  return gethash(form_to_ln_hash, form);
}

val source_loc_str(val form)
{
  cons_bind (line, file, gethash(form_to_ln_hash, form));
  return if3(line,
             format(nil, lit("~a:~a"), file, line, nao),
             lit("source location n/a"));
}

void parser_l_init(void)
{
  prot1(&form_to_ln_hash);
  form_to_ln_hash = make_hash(t, nil, nil);
}