summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2013-10-05 10:01:24 -0700
committerKaz Kylheku <kaz@kylheku.com>2013-10-05 10:01:24 -0700
commit33c2ad9765e7dc34b9c645b304cfd51524056d9e (patch)
treed79c8bf99d16404788b65ade7f322dba9d380343
downloadhc-33c2ad9765e7dc34b9c645b304cfd51524056d9e.tar.gz
hc-33c2ad9765e7dc34b9c645b304cfd51524056d9e.tar.bz2
hc-33c2ad9765e7dc34b9c645b304cfd51524056d9e.zip
HTML cleaner utility.
-rw-r--r--Makefile14
-rw-r--r--hc.c265
-rw-r--r--hc.h227
-rw-r--r--hc.l243
4 files changed, 749 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0b9b82a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+CFLAGS := -g -Wall -W -ansi -D_XOPEN_SOURCE=500 $(EXTRA_CFLAGS)
+
+hc: lex.yy.o hc.o
+ $(CC) $(CFLAGS) $(OUR_CFLAGS) $^ -o $@ -lfl
+
+lex.yy.o: lex.yy.c hc.h
+
+hc.o: hc.c hc.h
+
+lex.yy.c: hc.l hc.h
+ $(LEX) -i -8 hc.l
+
+clean:
+ -rm hc lex.yy.o lex.yy.c
diff --git a/hc.c b/hc.c
new file mode 100644
index 0000000..4c14e04
--- /dev/null
+++ b/hc.c
@@ -0,0 +1,265 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "hc.h"
+
+
+static int allowed_el_spec[] = {
+ tok_el_a,
+ tok_el_abbr,
+ tok_el_acronym,
+ tok_el_address,
+ /* tok_el_applet, */
+ /* tok_el_area, */
+ tok_el_b,
+ /* tok_el_base, */
+ tok_el_basefont,
+ tok_el_bdo,
+ tok_el_big,
+ tok_el_blockquote,
+ /* tok_el_body, */
+ tok_el_br,
+ /* tok_el_button, */
+ tok_el_caption,
+ tok_el_center,
+ tok_el_cite,
+ tok_el_code,
+ tok_el_col,
+ tok_el_colgroup,
+ tok_el_dd,
+ tok_el_del,
+ tok_el_dfn,
+ tok_el_dir,
+ tok_el_div,
+ tok_el_dl,
+ tok_el_dt,
+ tok_el_em,
+ /* tok_el_fieldset, */
+ tok_el_font,
+ tok_el_form,
+ /* tok_el_frame, */
+ /* tok_el_frameset, */
+ tok_el_h1,
+ tok_el_h2,
+ tok_el_h3,
+ tok_el_h4,
+ tok_el_h5,
+ tok_el_h6,
+ /* tok_el_head, */
+ tok_el_hr,
+ /* tok_el_html, */
+ tok_el_i,
+ /* tok_el_iframe, */
+ tok_el_img,
+ /* tok_el_input, */
+ tok_el_ins,
+ tok_el_kbd,
+ /* tok_el_label, */
+ /* tok_el_legend, */
+ tok_el_li,
+ /* tok_el_link, */
+ /* tok_el_map, */
+ /* tok_el_menu, */
+ /* tok_el_meta, */
+ /* tok_el_noframes, */
+ /* tok_el_noscript, */
+ /* tok_el_object, */
+ tok_el_ol,
+ tok_el_optgroup,
+ /* tok_el_option, */
+ tok_el_p,
+ /* tok_el_param, */
+ tok_el_pre,
+ tok_el_q,
+ tok_el_samp,
+ /* tok_el_script,*/
+ /* tok_el_select, */
+ tok_el_small,
+ tok_el_span,
+ tok_el_strike,
+ tok_el_strong,
+ tok_el_style,
+ tok_el_sub,
+ tok_el_sup,
+ tok_el_table,
+ tok_el_tbody,
+ tok_el_td,
+ /* tok_el_textarea, */
+ tok_el_tfoot,
+ tok_el_th,
+ tok_el_thead,
+ /* tok_el_title, */
+ tok_el_tr,
+ tok_el_tt,
+ tok_el_u,
+ tok_el_ul,
+ /* tok_el_var, */
+ tok_eof,
+};
+
+static int allowed_el[tok_max];
+
+static const token_t blank;
+static token_t pushback;
+
+static void bail()
+{
+ fprintf(stderr, "bad html\n");
+ exit(EXIT_FAILURE);
+}
+
+static token_t mktok(toktype_t type, char *text)
+{
+ token_t tok = { 0, 0, 0, 0 };
+ tok.type = type;
+ tok.lexeme = strdup(text);
+ return tok;
+}
+
+static void deltok(token_t tok)
+{
+ free(tok.lexeme);
+}
+
+static int null(token_t tok)
+{
+ return tok.type == tok_eof;
+}
+
+static token_t gettok(void)
+{
+ if (null(pushback)) {
+ int type = yylex();
+ return mktok(type, yytext);
+ } else {
+ token_t tok = pushback;
+ pushback = blank;
+ return tok;
+ }
+}
+
+static void ungettok(token_t tok)
+{
+ deltok(pushback);
+ pushback = tok;
+}
+
+static token_t printtok(token_t tok)
+{
+ if (!null(tok))
+ fputs(tok.lexeme, stdout);
+ return tok;
+}
+
+static token_t match(int type)
+{
+ token_t tok = gettok();
+ if (tok.type != type)
+ bail();
+ return tok;
+}
+
+static token_t optmatch(int type)
+{
+ token_t tok = gettok();
+ if (tok.type != type) {
+ ungettok(tok);
+ return blank;
+ }
+ return tok;
+}
+
+static token_t lookfor(int type)
+{
+ token_t tok;
+ for (;;) {
+ tok = gettok();
+ if (tok.type == type || null(tok))
+ break;
+ deltok(tok);
+ }
+ return tok;
+}
+
+static token_t printuntil(int type)
+{
+ token_t tok;
+ for (;;) {
+ tok = gettok();
+ if (tok.type == type || null(tok))
+ break;
+ deltok(printtok(tok));
+ }
+ printtok(tok);
+ return tok;
+}
+
+static void parse_element(token_t in)
+{
+ token_t end = optmatch('/');
+ token_t name = gettok();
+
+ switch (name.type) {
+ case '/':
+ if (!null(end))
+ bail();
+ printtok(in);
+ printtok(name);
+ deltok(printtok(lookfor('>')));
+ goto out;
+ return;
+ case '>':
+ bail();
+ default:
+ break;
+ }
+
+ if (allowed_el[name.type]) {
+ printtok(in);
+ printtok(end);
+ printtok(name);
+ deltok(printuntil('>'));
+ } else {
+ deltok(lookfor('>'));
+ }
+
+out:
+ deltok(end);
+ deltok(name);
+}
+
+static void parse(void)
+{
+ for (;;) {
+ token_t tok = gettok();
+
+ switch (tok.type) {
+ case '<':
+ parse_element(tok);
+ break;
+ case tok_eof:
+ deltok(tok);
+ return;
+ default:
+ printtok(tok);
+ break;
+ }
+
+ deltok(tok);
+ }
+}
+
+static void init(void)
+{
+ int i;
+
+ for (i = 0; allowed_el_spec[i] != tok_eof; i++)
+ allowed_el[allowed_el_spec[i]] = 1;
+}
+
+int main(void)
+{
+ init();
+ parse();
+ return 0;
+}
diff --git a/hc.h b/hc.h
new file mode 100644
index 0000000..05bedfb
--- /dev/null
+++ b/hc.h
@@ -0,0 +1,227 @@
+
+typedef enum {
+ tok_eof = 0,
+ tok_doctype = 256,
+ tok_text,
+ tok_el_unknown,
+ tok_el_a,
+ tok_el_abbr,
+ tok_el_acronym,
+ tok_el_address,
+ tok_el_applet,
+ tok_el_area,
+ tok_el_b,
+ tok_el_base,
+ tok_el_basefont,
+ tok_el_bdo,
+ tok_el_big,
+ tok_el_blockquote,
+ tok_el_body,
+ tok_el_br,
+ tok_el_button,
+ tok_el_caption,
+ tok_el_center,
+ tok_el_cite,
+ tok_el_code,
+ tok_el_col,
+ tok_el_colgroup,
+ tok_el_dd,
+ tok_el_del,
+ tok_el_dfn,
+ tok_el_dir,
+ tok_el_div,
+ tok_el_dl,
+ tok_el_dt,
+ tok_el_em,
+ tok_el_fieldset,
+ tok_el_font,
+ tok_el_form,
+ tok_el_frame,
+ tok_el_frameset,
+ tok_el_h1,
+ tok_el_h2,
+ tok_el_h3,
+ tok_el_h4,
+ tok_el_h5,
+ tok_el_h6,
+ tok_el_head,
+ tok_el_hr,
+ tok_el_html,
+ tok_el_i,
+ tok_el_iframe,
+ tok_el_img,
+ tok_el_input,
+ tok_el_ins,
+ tok_el_kbd,
+ tok_el_label,
+ tok_el_legend,
+ tok_el_li,
+ tok_el_link,
+ tok_el_map,
+ tok_el_menu,
+ tok_el_meta,
+ tok_el_noframes,
+ tok_el_noscript,
+ tok_el_object,
+ tok_el_ol,
+ tok_el_optgroup,
+ tok_el_option,
+ tok_el_p,
+ tok_el_param,
+ tok_el_pre,
+ tok_el_q,
+ tok_el_samp,
+ tok_el_script,
+ tok_el_select,
+ tok_el_small,
+ tok_el_span,
+ tok_el_strike,
+ tok_el_strong,
+ tok_el_style,
+ tok_el_sub,
+ tok_el_sup,
+ tok_el_table,
+ tok_el_tbody,
+ tok_el_td,
+ tok_el_textarea,
+ tok_el_tfoot,
+ tok_el_th,
+ tok_el_thead,
+ tok_el_title,
+ tok_el_tr,
+ tok_el_tt,
+ tok_el_u,
+ tok_el_ul,
+ tok_el_var,
+ tok_at_unknown,
+ tok_at_accept,
+ tok_at_accept_charset,
+ tok_at_accesskey,
+ tok_at_action,
+ tok_at_align,
+ tok_at_alink,
+ tok_at_alt,
+ tok_at_archive,
+ tok_at_axis,
+ tok_at_background,
+ tok_at_bbr,
+ tok_at_bgcolor,
+ tok_at_border,
+ tok_at_cellpadding,
+ tok_at_cellspacing,
+ tok_at_char,
+ tok_at_charoff,
+ tok_at_charset,
+ tok_at_checked,
+ tok_at_cite,
+ tok_at_class,
+ tok_at_classid,
+ tok_at_clear,
+ tok_at_code,
+ tok_at_codebase,
+ tok_at_codetype,
+ tok_at_color,
+ tok_at_cols,
+ tok_at_colspan,
+ tok_at_compact,
+ tok_at_content,
+ tok_at_coords,
+ tok_at_data,
+ tok_at_datetime,
+ tok_at_declare,
+ tok_at_defer,
+ tok_at_dir,
+ tok_at_disabled,
+ tok_at_enctype,
+ tok_at_face,
+ tok_at_for,
+ tok_at_frame,
+ tok_at_frameborder,
+ tok_at_headers,
+ tok_at_height,
+ tok_at_href,
+ tok_at_hreflang,
+ tok_at_hspace,
+ tok_at_http_equiv,
+ tok_at_id,
+ tok_at_ismap,
+ tok_at_label,
+ tok_at_lang,
+ tok_at_language,
+ tok_at_link,
+ tok_at_longdesc,
+ tok_at_marginheight,
+ tok_at_marginwidth,
+ tok_at_maxlength,
+ tok_at_media,
+ tok_at_method,
+ tok_at_multiple,
+ tok_at_name,
+ tok_at_nohref,
+ tok_at_noresize,
+ tok_at_noshade,
+ tok_at_nowrap,
+ tok_at_object,
+ tok_at_onblur,
+ tok_at_onchange,
+ tok_at_onclick,
+ tok_at_ondblclick,
+ tok_at_onfocus,
+ tok_at_onkeydown,
+ tok_at_onkeypress,
+ tok_at_onkeyup,
+ tok_at_onload,
+ tok_at_onmousedown,
+ tok_at_onmousemove,
+ tok_at_onmouseout,
+ tok_at_onmouseover,
+ tok_at_onmouseup,
+ tok_at_onreset,
+ tok_at_onselect,
+ tok_at_onsubmit,
+ tok_at_onunload,
+ tok_at_profile,
+ tok_at_prompt,
+ tok_at_readonly,
+ tok_at_rel,
+ tok_at_rev,
+ tok_at_rows,
+ tok_at_rowspan,
+ tok_at_rules,
+ tok_at_scheme,
+ tok_at_scope,
+ tok_at_scrolling,
+ tok_at_selected,
+ tok_at_shape,
+ tok_at_size,
+ tok_at_span,
+ tok_at_src,
+ tok_at_standby,
+ tok_at_start,
+ tok_at_style,
+ tok_at_summary,
+ tok_at_tabindex,
+ tok_at_target,
+ tok_at_text,
+ tok_at_title,
+ tok_at_type,
+ tok_at_usemap,
+ tok_at_valign,
+ tok_at_value,
+ tok_at_valuetype,
+ tok_at_version,
+ tok_at_vlink,
+ tok_at_vspace,
+ tok_at_width,
+ tok_max
+} toktype_t;
+
+typedef struct {
+ int type;
+ int is_tag;
+ int is_close;
+ char *lexeme;
+} token_t;
+
+extern int yylex(void);
+extern char *yytext;
diff --git a/hc.l b/hc.l
new file mode 100644
index 0000000..8d16781
--- /dev/null
+++ b/hc.l
@@ -0,0 +1,243 @@
+/* This flex scanner is intended to be compliled case insensitive. */
+
+%{
+
+#include <stdio.h>
+#include <string.h>
+#include "hc.h"
+
+%}
+
+wsp [ \t\n\r\v\t]
+notwsp [^ \t\n\r\v\t]
+ctrl [\x0-\x1f]
+notctrl [^\x0-\x1f]
+special ["'<>/=&]
+notspecial [^"'<>/=&]
+elname [A-Za-z0-9]+
+attrname [^"'<>/=&\x0-\x1f\t\n\r\v\t ]
+endnm [^A-Za-z_\-0-9]
+%x ELM ATT
+
+%%
+
+[<] { BEGIN(ELM);
+ return '<'; }
+{notspecial}+ { return tok_text; }
+<ELM>a/{endnm} { BEGIN(ATT); return tok_el_a; }
+<ELM>abbr/{endnm} { BEGIN(ATT); return tok_el_abbr; }
+<ELM>acronym/{endnm} { BEGIN(ATT); return tok_el_acronym; }
+<ELM>address/{endnm} { BEGIN(ATT); return tok_el_address; }
+<ELM>applet/{endnm} { BEGIN(ATT); return tok_el_applet; }
+<ELM>area/{endnm} { BEGIN(ATT); return tok_el_area; }
+<ELM>b/{endnm} { BEGIN(ATT); return tok_el_b; }
+<ELM>base/{endnm} { BEGIN(ATT); return tok_el_base; }
+<ELM>basefont/{endnm} { BEGIN(ATT); return tok_el_basefont; }
+<ELM>bdo/{endnm} { BEGIN(ATT); return tok_el_bdo; }
+<ELM>big/{endnm} { BEGIN(ATT); return tok_el_big; }
+<ELM>blockquote/{endnm} { BEGIN(ATT); return tok_el_blockquote; }
+<ELM>body/{endnm} { BEGIN(ATT); return tok_el_body; }
+<ELM>br/{endnm} { BEGIN(ATT); return tok_el_br; }
+<ELM>button/{endnm} { BEGIN(ATT); return tok_el_button; }
+<ELM>caption/{endnm} { BEGIN(ATT); return tok_el_caption; }
+<ELM>center/{endnm} { BEGIN(ATT); return tok_el_center; }
+<ELM>cite/{endnm} { BEGIN(ATT); return tok_el_cite; }
+<ELM>code/{endnm} { BEGIN(ATT); return tok_el_code; }
+<ELM>col/{endnm} { BEGIN(ATT); return tok_el_col; }
+<ELM>colgroup/{endnm} { BEGIN(ATT); return tok_el_colgroup; }
+<ELM>dd/{endnm} { BEGIN(ATT); return tok_el_dd; }
+<ELM>del/{endnm} { BEGIN(ATT); return tok_el_del; }
+<ELM>dfn/{endnm} { BEGIN(ATT); return tok_el_dfn; }
+<ELM>dir/{endnm} { BEGIN(ATT); return tok_el_dir; }
+<ELM>div/{endnm} { BEGIN(ATT); return tok_el_div; }
+<ELM>dl/{endnm} { BEGIN(ATT); return tok_el_dl; }
+<ELM>dt/{endnm} { BEGIN(ATT); return tok_el_dt; }
+<ELM>em/{endnm} { BEGIN(ATT); return tok_el_em; }
+<ELM>fieldset/{endnm} { BEGIN(ATT); return tok_el_fieldset; }
+<ELM>font/{endnm} { BEGIN(ATT); return tok_el_font; }
+<ELM>form/{endnm} { BEGIN(ATT); return tok_el_form; }
+<ELM>frame/{endnm} { BEGIN(ATT); return tok_el_frame; }
+<ELM>frameset/{endnm} { BEGIN(ATT); return tok_el_frameset; }
+<ELM>h1/{endnm} { BEGIN(ATT); return tok_el_h1; }
+<ELM>h2/{endnm} { BEGIN(ATT); return tok_el_h2; }
+<ELM>h3/{endnm} { BEGIN(ATT); return tok_el_h3; }
+<ELM>h4/{endnm} { BEGIN(ATT); return tok_el_h4; }
+<ELM>h5/{endnm} { BEGIN(ATT); return tok_el_h5; }
+<ELM>h6/{endnm} { BEGIN(ATT); return tok_el_h5; }
+<ELM>head/{endnm} { BEGIN(ATT); return tok_el_head; }
+<ELM>hr/{endnm} { BEGIN(ATT); return tok_el_hr; }
+<ELM>html/{endnm} { BEGIN(ATT); return tok_el_html; }
+<ELM>i/{endnm} { BEGIN(ATT); return tok_el_i; }
+<ELM>iframe/{endnm} { BEGIN(ATT); return tok_el_iframe; }
+<ELM>img/{endnm} { BEGIN(ATT); return tok_el_img; }
+<ELM>input/{endnm} { BEGIN(ATT); return tok_el_input; }
+<ELM>ins/{endnm} { BEGIN(ATT); return tok_el_ins; }
+<ELM>kbd/{endnm} { BEGIN(ATT); return tok_el_kbd; }
+<ELM>label/{endnm} { BEGIN(ATT); return tok_el_label; }
+<ELM>legend/{endnm} { BEGIN(ATT); return tok_el_legend; }
+<ELM>li/{endnm} { BEGIN(ATT); return tok_el_li; }
+<ELM>link/{endnm} { BEGIN(ATT); return tok_el_link; }
+<ELM>map/{endnm} { BEGIN(ATT); return tok_el_map; }
+<ELM>menu/{endnm} { BEGIN(ATT); return tok_el_menu; }
+<ELM>meta/{endnm} { BEGIN(ATT); return tok_el_meta; }
+<ELM>noframes/{endnm} { BEGIN(ATT); return tok_el_noframes; }
+<ELM>noscript/{endnm} { BEGIN(ATT); return tok_el_noscript; }
+<ELM>object/{endnm} { BEGIN(ATT); return tok_el_object; }
+<ELM>ol/{endnm} { BEGIN(ATT); return tok_el_ol; }
+<ELM>optgroup/{endnm} { BEGIN(ATT); return tok_el_optgroup; }
+<ELM>option/{endnm} { BEGIN(ATT); return tok_el_option; }
+<ELM>p/{endnm} { BEGIN(ATT); return tok_el_p; }
+<ELM>param/{endnm} { BEGIN(ATT); return tok_el_param; }
+<ELM>pre/{endnm} { BEGIN(ATT); return tok_el_pre; }
+<ELM>q/{endnm} { BEGIN(ATT); return tok_el_q; }
+<ELM>samp/{endnm} { BEGIN(ATT); return tok_el_samp; }
+<ELM>script/{endnm} { BEGIN(ATT); return tok_el_script; }
+<ELM>select/{endnm} { BEGIN(ATT); return tok_el_select; }
+<ELM>small/{endnm} { BEGIN(ATT); return tok_el_small; }
+<ELM>span/{endnm} { BEGIN(ATT); return tok_el_span; }
+<ELM>strike/{endnm} { BEGIN(ATT); return tok_el_strike; }
+<ELM>strong/{endnm} { BEGIN(ATT); return tok_el_strong; }
+<ELM>style/{endnm} { BEGIN(ATT); return tok_el_style; }
+<ELM>sub/{endnm} { BEGIN(ATT); return tok_el_sub; }
+<ELM>sup/{endnm} { BEGIN(ATT); return tok_el_sup; }
+<ELM>table/{endnm} { BEGIN(ATT); return tok_el_table; }
+<ELM>tbody/{endnm} { BEGIN(ATT); return tok_el_tbody; }
+<ELM>td/{endnm} { BEGIN(ATT); return tok_el_td; }
+<ELM>textarea/{endnm} { BEGIN(ATT); return tok_el_textarea; }
+<ELM>tfoot/{endnm} { BEGIN(ATT); return tok_el_tfoot; }
+<ELM>th/{endnm} { BEGIN(ATT); return tok_el_th; }
+<ELM>thead/{endnm} { BEGIN(ATT); return tok_el_thead; }
+<ELM>title/{endnm} { BEGIN(ATT); return tok_el_title; }
+<ELM>tr/{endnm} { BEGIN(ATT); return tok_el_tr; }
+<ELM>tt/{endnm} { BEGIN(ATT); return tok_el_tt; }
+<ELM>u/{endnm} { BEGIN(ATT); return tok_el_u; }
+<ELM>ul/{endnm} { BEGIN(ATT); return tok_el_ul; }
+<ELM>var/{endnm} { BEGIN(ATT); return tok_el_var; }
+<ELM>{elname} { BEGIN(ATT); return tok_el_unknown; }
+<ELM>. { return yytext[0]; }
+
+<ATT>accept/{endnm} { return tok_at_accept; }
+<ATT>accept-charset/{endnm} { return tok_at_accept_charset; }
+<ATT>accesskey/{endnm} { return tok_at_accesskey; }
+<ATT>action/{endnm} { return tok_at_action; }
+<ATT>align/{endnm} { return tok_at_align; }
+<ATT>alink/{endnm} { return tok_at_alink; }
+<ATT>alt/{endnm} { return tok_at_alt; }
+<ATT>archive/{endnm} { return tok_at_archive; }
+<ATT>axis/{endnm} { return tok_at_axis; }
+<ATT>background/{endnm} { return tok_at_background; }
+<ATT>bbr/{endnm} { return tok_at_bbr; }
+<ATT>bgcolor/{endnm} { return tok_at_bgcolor; }
+<ATT>border/{endnm} { return tok_at_border; }
+<ATT>cellpadding/{endnm} { return tok_at_cellpadding; }
+<ATT>cellspacing/{endnm} { return tok_at_cellspacing; }
+<ATT>char/{endnm} { return tok_at_char; }
+<ATT>charoff/{endnm} { return tok_at_charoff; }
+<ATT>charset/{endnm} { return tok_at_charset; }
+<ATT>checked/{endnm} { return tok_at_checked; }
+<ATT>cite/{endnm} { return tok_at_cite; }
+<ATT>class/{endnm} { return tok_at_class; }
+<ATT>classid/{endnm} { return tok_at_classid; }
+<ATT>clear/{endnm} { return tok_at_clear; }
+<ATT>code/{endnm} { return tok_at_code; }
+<ATT>codebase/{endnm} { return tok_at_codebase; }
+<ATT>codetype/{endnm} { return tok_at_codetype; }
+<ATT>color/{endnm} { return tok_at_color; }
+<ATT>cols/{endnm} { return tok_at_cols; }
+<ATT>colspan/{endnm} { return tok_at_colspan; }
+<ATT>compact/{endnm} { return tok_at_compact; }
+<ATT>content/{endnm} { return tok_at_content; }
+<ATT>coords/{endnm} { return tok_at_coords; }
+<ATT>data/{endnm} { return tok_at_data; }
+<ATT>datetime/{endnm} { return tok_at_datetime; }
+<ATT>declare/{endnm} { return tok_at_declare; }
+<ATT>defer/{endnm} { return tok_at_defer; }
+<ATT>dir/{endnm} { return tok_at_dir; }
+<ATT>disabled/{endnm} { return tok_at_disabled; }
+<ATT>enctype/{endnm} { return tok_at_enctype; }
+<ATT>face/{endnm} { return tok_at_face; }
+<ATT>for/{endnm} { return tok_at_for; }
+<ATT>frame/{endnm} { return tok_at_frame; }
+<ATT>frameborder/{endnm} { return tok_at_frameborder; }
+<ATT>headers/{endnm} { return tok_at_headers; }
+<ATT>height/{endnm} { return tok_at_height; }
+<ATT>href/{endnm} { return tok_at_href; }
+<ATT>hreflang/{endnm} { return tok_at_hreflang; }
+<ATT>hspace/{endnm} { return tok_at_hspace; }
+<ATT>http-equiv/{endnm} { return tok_at_http_equiv; }
+<ATT>id/{endnm} { return tok_at_id; }
+<ATT>ismap/{endnm} { return tok_at_ismap; }
+<ATT>label/{endnm} { return tok_at_label; }
+<ATT>lang/{endnm} { return tok_at_lang; }
+<ATT>language/{endnm} { return tok_at_language; }
+<ATT>link/{endnm} { return tok_at_link; }
+<ATT>longdesc/{endnm} { return tok_at_longdesc; }
+<ATT>marginheight/{endnm} { return tok_at_marginheight; }
+<ATT>marginwidth/{endnm} { return tok_at_marginwidth; }
+<ATT>maxlength/{endnm} { return tok_at_maxlength; }
+<ATT>media/{endnm} { return tok_at_media; }
+<ATT>method/{endnm} { return tok_at_method; }
+<ATT>multiple/{endnm} { return tok_at_multiple; }
+<ATT>name/{endnm} { return tok_at_name; }
+<ATT>nohref/{endnm} { return tok_at_nohref; }
+<ATT>noresize/{endnm} { return tok_at_noresize; }
+<ATT>noshade/{endnm} { return tok_at_noshade; }
+<ATT>nowrap/{endnm} { return tok_at_nowrap; }
+<ATT>object/{endnm} { return tok_at_object; }
+<ATT>onblur/{endnm} { return tok_at_onblur; }
+<ATT>onchange/{endnm} { return tok_at_onchange; }
+<ATT>onclick/{endnm} { return tok_at_onclick; }
+<ATT>ondblclick/{endnm} { return tok_at_ondblclick; }
+<ATT>onfocus/{endnm} { return tok_at_onfocus; }
+<ATT>onkeydown/{endnm} { return tok_at_onkeydown; }
+<ATT>onkeypress/{endnm} { return tok_at_onkeypress; }
+<ATT>onkeyup/{endnm} { return tok_at_onkeyup; }
+<ATT>onload/{endnm} { return tok_at_onload; }
+<ATT>onmousedown/{endnm} { return tok_at_onmousedown; }
+<ATT>onmousemove/{endnm} { return tok_at_onmousemove; }
+<ATT>onmouseout/{endnm} { return tok_at_onmouseout; }
+<ATT>onmouseover/{endnm} { return tok_at_onmouseover; }
+<ATT>onmouseup/{endnm} { return tok_at_onmouseup; }
+<ATT>onreset/{endnm} { return tok_at_onreset; }
+<ATT>onselect/{endnm} { return tok_at_onselect; }
+<ATT>onsubmit/{endnm} { return tok_at_onsubmit; }
+<ATT>onunload/{endnm} { return tok_at_onunload; }
+<ATT>profile/{endnm} { return tok_at_profile; }
+<ATT>prompt/{endnm} { return tok_at_prompt; }
+<ATT>readonly/{endnm} { return tok_at_readonly; }
+<ATT>rel/{endnm} { return tok_at_rel; }
+<ATT>rev/{endnm} { return tok_at_rev; }
+<ATT>rows/{endnm} { return tok_at_rows; }
+<ATT>rowspan/{endnm} { return tok_at_rowspan; }
+<ATT>rules/{endnm} { return tok_at_rules; }
+<ATT>scheme/{endnm} { return tok_at_scheme; }
+<ATT>scope/{endnm} { return tok_at_scope; }
+<ATT>scrolling/{endnm} { return tok_at_scrolling; }
+<ATT>selected/{endnm} { return tok_at_selected; }
+<ATT>shape/{endnm} { return tok_at_shape; }
+<ATT>size/{endnm} { return tok_at_size; }
+<ATT>span/{endnm} { return tok_at_span; }
+<ATT>src/{endnm} { return tok_at_src; }
+<ATT>standby/{endnm} { return tok_at_standby; }
+<ATT>start/{endnm} { return tok_at_start; }
+<ATT>style/{endnm} { return tok_at_style; }
+<ATT>summary/{endnm} { return tok_at_summary; }
+<ATT>tabindex/{endnm} { return tok_at_tabindex; }
+<ATT>target/{endnm} { return tok_at_target; }
+<ATT>text/{endnm} { return tok_at_text; }
+<ATT>title/{endnm} { return tok_at_title; }
+<ATT>type/{endnm} { return tok_at_type; }
+<ATT>usemap/{endnm} { return tok_at_usemap; }
+<ATT>valign/{endnm} { return tok_at_valign; }
+<ATT>value/{endnm} { return tok_at_value; }
+<ATT>valuetype/{endnm} { return tok_at_valuetype; }
+<ATT>version/{endnm} { return tok_at_version; }
+<ATT>vlink/{endnm} { return tok_at_vlink; }
+<ATT>vspace/{endnm} { return tok_at_vspace; }
+<ATT>width/{endnm} { return tok_at_width; }
+<ATT>{attrname} { return tok_at_unknown; }
+
+<ATT>[>] { BEGIN(INITIAL); return yytext[0]; }
+<ATT>. { return yytext[0]; }
+
+%%