diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2013-10-05 10:01:24 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2013-10-05 10:01:24 -0700 |
commit | 33c2ad9765e7dc34b9c645b304cfd51524056d9e (patch) | |
tree | d79c8bf99d16404788b65ade7f322dba9d380343 | |
download | hc-33c2ad9765e7dc34b9c645b304cfd51524056d9e.tar.gz hc-33c2ad9765e7dc34b9c645b304cfd51524056d9e.tar.bz2 hc-33c2ad9765e7dc34b9c645b304cfd51524056d9e.zip |
HTML cleaner utility.
-rw-r--r-- | Makefile | 14 | ||||
-rw-r--r-- | hc.c | 265 | ||||
-rw-r--r-- | hc.h | 227 | ||||
-rw-r--r-- | hc.l | 243 |
4 files changed, 749 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0b9b82a --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +CFLAGS := -g -Wall -W -ansi -D_XOPEN_SOURCE=500 $(EXTRA_CFLAGS) + +hc: lex.yy.o hc.o + $(CC) $(CFLAGS) $(OUR_CFLAGS) $^ -o $@ -lfl + +lex.yy.o: lex.yy.c hc.h + +hc.o: hc.c hc.h + +lex.yy.c: hc.l hc.h + $(LEX) -i -8 hc.l + +clean: + -rm hc lex.yy.o lex.yy.c @@ -0,0 +1,265 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "hc.h" + + +static int allowed_el_spec[] = { + tok_el_a, + tok_el_abbr, + tok_el_acronym, + tok_el_address, + /* tok_el_applet, */ + /* tok_el_area, */ + tok_el_b, + /* tok_el_base, */ + tok_el_basefont, + tok_el_bdo, + tok_el_big, + tok_el_blockquote, + /* tok_el_body, */ + tok_el_br, + /* tok_el_button, */ + tok_el_caption, + tok_el_center, + tok_el_cite, + tok_el_code, + tok_el_col, + tok_el_colgroup, + tok_el_dd, + tok_el_del, + tok_el_dfn, + tok_el_dir, + tok_el_div, + tok_el_dl, + tok_el_dt, + tok_el_em, + /* tok_el_fieldset, */ + tok_el_font, + tok_el_form, + /* tok_el_frame, */ + /* tok_el_frameset, */ + tok_el_h1, + tok_el_h2, + tok_el_h3, + tok_el_h4, + tok_el_h5, + tok_el_h6, + /* tok_el_head, */ + tok_el_hr, + /* tok_el_html, */ + tok_el_i, + /* tok_el_iframe, */ + tok_el_img, + /* tok_el_input, */ + tok_el_ins, + tok_el_kbd, + /* tok_el_label, */ + /* tok_el_legend, */ + tok_el_li, + /* tok_el_link, */ + /* tok_el_map, */ + /* tok_el_menu, */ + /* tok_el_meta, */ + /* tok_el_noframes, */ + /* tok_el_noscript, */ + /* tok_el_object, */ + tok_el_ol, + tok_el_optgroup, + /* tok_el_option, */ + tok_el_p, + /* tok_el_param, */ + tok_el_pre, + tok_el_q, + tok_el_samp, + /* tok_el_script,*/ + /* tok_el_select, */ + tok_el_small, + tok_el_span, + tok_el_strike, + tok_el_strong, + tok_el_style, + tok_el_sub, + tok_el_sup, + tok_el_table, + tok_el_tbody, + tok_el_td, + /* tok_el_textarea, */ + tok_el_tfoot, + tok_el_th, + tok_el_thead, + /* tok_el_title, */ + tok_el_tr, + tok_el_tt, + tok_el_u, + tok_el_ul, + /* tok_el_var, */ + tok_eof, +}; + +static int allowed_el[tok_max]; + +static const token_t blank; +static token_t pushback; + +static void bail() +{ + fprintf(stderr, "bad html\n"); + exit(EXIT_FAILURE); +} + +static token_t mktok(toktype_t type, char *text) +{ + token_t tok = { 0, 0, 0, 0 }; + tok.type = type; + tok.lexeme = strdup(text); + return tok; +} + +static void deltok(token_t tok) +{ + free(tok.lexeme); +} + +static int null(token_t tok) +{ + return tok.type == tok_eof; +} + +static token_t gettok(void) +{ + if (null(pushback)) { + int type = yylex(); + return mktok(type, yytext); + } else { + token_t tok = pushback; + pushback = blank; + return tok; + } +} + +static void ungettok(token_t tok) +{ + deltok(pushback); + pushback = tok; +} + +static token_t printtok(token_t tok) +{ + if (!null(tok)) + fputs(tok.lexeme, stdout); + return tok; +} + +static token_t match(int type) +{ + token_t tok = gettok(); + if (tok.type != type) + bail(); + return tok; +} + +static token_t optmatch(int type) +{ + token_t tok = gettok(); + if (tok.type != type) { + ungettok(tok); + return blank; + } + return tok; +} + +static token_t lookfor(int type) +{ + token_t tok; + for (;;) { + tok = gettok(); + if (tok.type == type || null(tok)) + break; + deltok(tok); + } + return tok; +} + +static token_t printuntil(int type) +{ + token_t tok; + for (;;) { + tok = gettok(); + if (tok.type == type || null(tok)) + break; + deltok(printtok(tok)); + } + printtok(tok); + return tok; +} + +static void parse_element(token_t in) +{ + token_t end = optmatch('/'); + token_t name = gettok(); + + switch (name.type) { + case '/': + if (!null(end)) + bail(); + printtok(in); + printtok(name); + deltok(printtok(lookfor('>'))); + goto out; + return; + case '>': + bail(); + default: + break; + } + + if (allowed_el[name.type]) { + printtok(in); + printtok(end); + printtok(name); + deltok(printuntil('>')); + } else { + deltok(lookfor('>')); + } + +out: + deltok(end); + deltok(name); +} + +static void parse(void) +{ + for (;;) { + token_t tok = gettok(); + + switch (tok.type) { + case '<': + parse_element(tok); + break; + case tok_eof: + deltok(tok); + return; + default: + printtok(tok); + break; + } + + deltok(tok); + } +} + +static void init(void) +{ + int i; + + for (i = 0; allowed_el_spec[i] != tok_eof; i++) + allowed_el[allowed_el_spec[i]] = 1; +} + +int main(void) +{ + init(); + parse(); + return 0; +} @@ -0,0 +1,227 @@ + +typedef enum { + tok_eof = 0, + tok_doctype = 256, + tok_text, + tok_el_unknown, + tok_el_a, + tok_el_abbr, + tok_el_acronym, + tok_el_address, + tok_el_applet, + tok_el_area, + tok_el_b, + tok_el_base, + tok_el_basefont, + tok_el_bdo, + tok_el_big, + tok_el_blockquote, + tok_el_body, + tok_el_br, + tok_el_button, + tok_el_caption, + tok_el_center, + tok_el_cite, + tok_el_code, + tok_el_col, + tok_el_colgroup, + tok_el_dd, + tok_el_del, + tok_el_dfn, + tok_el_dir, + tok_el_div, + tok_el_dl, + tok_el_dt, + tok_el_em, + tok_el_fieldset, + tok_el_font, + tok_el_form, + tok_el_frame, + tok_el_frameset, + tok_el_h1, + tok_el_h2, + tok_el_h3, + tok_el_h4, + tok_el_h5, + tok_el_h6, + tok_el_head, + tok_el_hr, + tok_el_html, + tok_el_i, + tok_el_iframe, + tok_el_img, + tok_el_input, + tok_el_ins, + tok_el_kbd, + tok_el_label, + tok_el_legend, + tok_el_li, + tok_el_link, + tok_el_map, + tok_el_menu, + tok_el_meta, + tok_el_noframes, + tok_el_noscript, + tok_el_object, + tok_el_ol, + tok_el_optgroup, + tok_el_option, + tok_el_p, + tok_el_param, + tok_el_pre, + tok_el_q, + tok_el_samp, + tok_el_script, + tok_el_select, + tok_el_small, + tok_el_span, + tok_el_strike, + tok_el_strong, + tok_el_style, + tok_el_sub, + tok_el_sup, + tok_el_table, + tok_el_tbody, + tok_el_td, + tok_el_textarea, + tok_el_tfoot, + tok_el_th, + tok_el_thead, + tok_el_title, + tok_el_tr, + tok_el_tt, + tok_el_u, + tok_el_ul, + tok_el_var, + tok_at_unknown, + tok_at_accept, + tok_at_accept_charset, + tok_at_accesskey, + tok_at_action, + tok_at_align, + tok_at_alink, + tok_at_alt, + tok_at_archive, + tok_at_axis, + tok_at_background, + tok_at_bbr, + tok_at_bgcolor, + tok_at_border, + tok_at_cellpadding, + tok_at_cellspacing, + tok_at_char, + tok_at_charoff, + tok_at_charset, + tok_at_checked, + tok_at_cite, + tok_at_class, + tok_at_classid, + tok_at_clear, + tok_at_code, + tok_at_codebase, + tok_at_codetype, + tok_at_color, + tok_at_cols, + tok_at_colspan, + tok_at_compact, + tok_at_content, + tok_at_coords, + tok_at_data, + tok_at_datetime, + tok_at_declare, + tok_at_defer, + tok_at_dir, + tok_at_disabled, + tok_at_enctype, + tok_at_face, + tok_at_for, + tok_at_frame, + tok_at_frameborder, + tok_at_headers, + tok_at_height, + tok_at_href, + tok_at_hreflang, + tok_at_hspace, + tok_at_http_equiv, + tok_at_id, + tok_at_ismap, + tok_at_label, + tok_at_lang, + tok_at_language, + tok_at_link, + tok_at_longdesc, + tok_at_marginheight, + tok_at_marginwidth, + tok_at_maxlength, + tok_at_media, + tok_at_method, + tok_at_multiple, + tok_at_name, + tok_at_nohref, + tok_at_noresize, + tok_at_noshade, + tok_at_nowrap, + tok_at_object, + tok_at_onblur, + tok_at_onchange, + tok_at_onclick, + tok_at_ondblclick, + tok_at_onfocus, + tok_at_onkeydown, + tok_at_onkeypress, + tok_at_onkeyup, + tok_at_onload, + tok_at_onmousedown, + tok_at_onmousemove, + tok_at_onmouseout, + tok_at_onmouseover, + tok_at_onmouseup, + tok_at_onreset, + tok_at_onselect, + tok_at_onsubmit, + tok_at_onunload, + tok_at_profile, + tok_at_prompt, + tok_at_readonly, + tok_at_rel, + tok_at_rev, + tok_at_rows, + tok_at_rowspan, + tok_at_rules, + tok_at_scheme, + tok_at_scope, + tok_at_scrolling, + tok_at_selected, + tok_at_shape, + tok_at_size, + tok_at_span, + tok_at_src, + tok_at_standby, + tok_at_start, + tok_at_style, + tok_at_summary, + tok_at_tabindex, + tok_at_target, + tok_at_text, + tok_at_title, + tok_at_type, + tok_at_usemap, + tok_at_valign, + tok_at_value, + tok_at_valuetype, + tok_at_version, + tok_at_vlink, + tok_at_vspace, + tok_at_width, + tok_max +} toktype_t; + +typedef struct { + int type; + int is_tag; + int is_close; + char *lexeme; +} token_t; + +extern int yylex(void); +extern char *yytext; @@ -0,0 +1,243 @@ +/* This flex scanner is intended to be compliled case insensitive. */ + +%{ + +#include <stdio.h> +#include <string.h> +#include "hc.h" + +%} + +wsp [ \t\n\r\v\t] +notwsp [^ \t\n\r\v\t] +ctrl [\x0-\x1f] +notctrl [^\x0-\x1f] +special ["'<>/=&] +notspecial [^"'<>/=&] +elname [A-Za-z0-9]+ +attrname [^"'<>/=&\x0-\x1f\t\n\r\v\t ] +endnm [^A-Za-z_\-0-9] +%x ELM ATT + +%% + +[<] { BEGIN(ELM); + return '<'; } +{notspecial}+ { return tok_text; } +<ELM>a/{endnm} { BEGIN(ATT); return tok_el_a; } +<ELM>abbr/{endnm} { BEGIN(ATT); return tok_el_abbr; } +<ELM>acronym/{endnm} { BEGIN(ATT); return tok_el_acronym; } +<ELM>address/{endnm} { BEGIN(ATT); return tok_el_address; } +<ELM>applet/{endnm} { BEGIN(ATT); return tok_el_applet; } +<ELM>area/{endnm} { BEGIN(ATT); return tok_el_area; } +<ELM>b/{endnm} { BEGIN(ATT); return tok_el_b; } +<ELM>base/{endnm} { BEGIN(ATT); return tok_el_base; } +<ELM>basefont/{endnm} { BEGIN(ATT); return tok_el_basefont; } +<ELM>bdo/{endnm} { BEGIN(ATT); return tok_el_bdo; } +<ELM>big/{endnm} { BEGIN(ATT); return tok_el_big; } +<ELM>blockquote/{endnm} { BEGIN(ATT); return tok_el_blockquote; } +<ELM>body/{endnm} { BEGIN(ATT); return tok_el_body; } +<ELM>br/{endnm} { BEGIN(ATT); return tok_el_br; } +<ELM>button/{endnm} { BEGIN(ATT); return tok_el_button; } +<ELM>caption/{endnm} { BEGIN(ATT); return tok_el_caption; } +<ELM>center/{endnm} { BEGIN(ATT); return tok_el_center; } +<ELM>cite/{endnm} { BEGIN(ATT); return tok_el_cite; } +<ELM>code/{endnm} { BEGIN(ATT); return tok_el_code; } +<ELM>col/{endnm} { BEGIN(ATT); return tok_el_col; } +<ELM>colgroup/{endnm} { BEGIN(ATT); return tok_el_colgroup; } +<ELM>dd/{endnm} { BEGIN(ATT); return tok_el_dd; } +<ELM>del/{endnm} { BEGIN(ATT); return tok_el_del; } +<ELM>dfn/{endnm} { BEGIN(ATT); return tok_el_dfn; } +<ELM>dir/{endnm} { BEGIN(ATT); return tok_el_dir; } +<ELM>div/{endnm} { BEGIN(ATT); return tok_el_div; } +<ELM>dl/{endnm} { BEGIN(ATT); return tok_el_dl; } +<ELM>dt/{endnm} { BEGIN(ATT); return tok_el_dt; } +<ELM>em/{endnm} { BEGIN(ATT); return tok_el_em; } +<ELM>fieldset/{endnm} { BEGIN(ATT); return tok_el_fieldset; } +<ELM>font/{endnm} { BEGIN(ATT); return tok_el_font; } +<ELM>form/{endnm} { BEGIN(ATT); return tok_el_form; } +<ELM>frame/{endnm} { BEGIN(ATT); return tok_el_frame; } +<ELM>frameset/{endnm} { BEGIN(ATT); return tok_el_frameset; } +<ELM>h1/{endnm} { BEGIN(ATT); return tok_el_h1; } +<ELM>h2/{endnm} { BEGIN(ATT); return tok_el_h2; } +<ELM>h3/{endnm} { BEGIN(ATT); return tok_el_h3; } +<ELM>h4/{endnm} { BEGIN(ATT); return tok_el_h4; } +<ELM>h5/{endnm} { BEGIN(ATT); return tok_el_h5; } +<ELM>h6/{endnm} { BEGIN(ATT); return tok_el_h5; } +<ELM>head/{endnm} { BEGIN(ATT); return tok_el_head; } +<ELM>hr/{endnm} { BEGIN(ATT); return tok_el_hr; } +<ELM>html/{endnm} { BEGIN(ATT); return tok_el_html; } +<ELM>i/{endnm} { BEGIN(ATT); return tok_el_i; } +<ELM>iframe/{endnm} { BEGIN(ATT); return tok_el_iframe; } +<ELM>img/{endnm} { BEGIN(ATT); return tok_el_img; } +<ELM>input/{endnm} { BEGIN(ATT); return tok_el_input; } +<ELM>ins/{endnm} { BEGIN(ATT); return tok_el_ins; } +<ELM>kbd/{endnm} { BEGIN(ATT); return tok_el_kbd; } +<ELM>label/{endnm} { BEGIN(ATT); return tok_el_label; } +<ELM>legend/{endnm} { BEGIN(ATT); return tok_el_legend; } +<ELM>li/{endnm} { BEGIN(ATT); return tok_el_li; } +<ELM>link/{endnm} { BEGIN(ATT); return tok_el_link; } +<ELM>map/{endnm} { BEGIN(ATT); return tok_el_map; } +<ELM>menu/{endnm} { BEGIN(ATT); return tok_el_menu; } +<ELM>meta/{endnm} { BEGIN(ATT); return tok_el_meta; } +<ELM>noframes/{endnm} { BEGIN(ATT); return tok_el_noframes; } +<ELM>noscript/{endnm} { BEGIN(ATT); return tok_el_noscript; } +<ELM>object/{endnm} { BEGIN(ATT); return tok_el_object; } +<ELM>ol/{endnm} { BEGIN(ATT); return tok_el_ol; } +<ELM>optgroup/{endnm} { BEGIN(ATT); return tok_el_optgroup; } +<ELM>option/{endnm} { BEGIN(ATT); return tok_el_option; } +<ELM>p/{endnm} { BEGIN(ATT); return tok_el_p; } +<ELM>param/{endnm} { BEGIN(ATT); return tok_el_param; } +<ELM>pre/{endnm} { BEGIN(ATT); return tok_el_pre; } +<ELM>q/{endnm} { BEGIN(ATT); return tok_el_q; } +<ELM>samp/{endnm} { BEGIN(ATT); return tok_el_samp; } +<ELM>script/{endnm} { BEGIN(ATT); return tok_el_script; } +<ELM>select/{endnm} { BEGIN(ATT); return tok_el_select; } +<ELM>small/{endnm} { BEGIN(ATT); return tok_el_small; } +<ELM>span/{endnm} { BEGIN(ATT); return tok_el_span; } +<ELM>strike/{endnm} { BEGIN(ATT); return tok_el_strike; } +<ELM>strong/{endnm} { BEGIN(ATT); return tok_el_strong; } +<ELM>style/{endnm} { BEGIN(ATT); return tok_el_style; } +<ELM>sub/{endnm} { BEGIN(ATT); return tok_el_sub; } +<ELM>sup/{endnm} { BEGIN(ATT); return tok_el_sup; } +<ELM>table/{endnm} { BEGIN(ATT); return tok_el_table; } +<ELM>tbody/{endnm} { BEGIN(ATT); return tok_el_tbody; } +<ELM>td/{endnm} { BEGIN(ATT); return tok_el_td; } +<ELM>textarea/{endnm} { BEGIN(ATT); return tok_el_textarea; } +<ELM>tfoot/{endnm} { BEGIN(ATT); return tok_el_tfoot; } +<ELM>th/{endnm} { BEGIN(ATT); return tok_el_th; } +<ELM>thead/{endnm} { BEGIN(ATT); return tok_el_thead; } +<ELM>title/{endnm} { BEGIN(ATT); return tok_el_title; } +<ELM>tr/{endnm} { BEGIN(ATT); return tok_el_tr; } +<ELM>tt/{endnm} { BEGIN(ATT); return tok_el_tt; } +<ELM>u/{endnm} { BEGIN(ATT); return tok_el_u; } +<ELM>ul/{endnm} { BEGIN(ATT); return tok_el_ul; } +<ELM>var/{endnm} { BEGIN(ATT); return tok_el_var; } +<ELM>{elname} { BEGIN(ATT); return tok_el_unknown; } +<ELM>. { return yytext[0]; } + +<ATT>accept/{endnm} { return tok_at_accept; } +<ATT>accept-charset/{endnm} { return tok_at_accept_charset; } +<ATT>accesskey/{endnm} { return tok_at_accesskey; } +<ATT>action/{endnm} { return tok_at_action; } +<ATT>align/{endnm} { return tok_at_align; } +<ATT>alink/{endnm} { return tok_at_alink; } +<ATT>alt/{endnm} { return tok_at_alt; } +<ATT>archive/{endnm} { return tok_at_archive; } +<ATT>axis/{endnm} { return tok_at_axis; } +<ATT>background/{endnm} { return tok_at_background; } +<ATT>bbr/{endnm} { return tok_at_bbr; } +<ATT>bgcolor/{endnm} { return tok_at_bgcolor; } +<ATT>border/{endnm} { return tok_at_border; } +<ATT>cellpadding/{endnm} { return tok_at_cellpadding; } +<ATT>cellspacing/{endnm} { return tok_at_cellspacing; } +<ATT>char/{endnm} { return tok_at_char; } +<ATT>charoff/{endnm} { return tok_at_charoff; } +<ATT>charset/{endnm} { return tok_at_charset; } +<ATT>checked/{endnm} { return tok_at_checked; } +<ATT>cite/{endnm} { return tok_at_cite; } +<ATT>class/{endnm} { return tok_at_class; } +<ATT>classid/{endnm} { return tok_at_classid; } +<ATT>clear/{endnm} { return tok_at_clear; } +<ATT>code/{endnm} { return tok_at_code; } +<ATT>codebase/{endnm} { return tok_at_codebase; } +<ATT>codetype/{endnm} { return tok_at_codetype; } +<ATT>color/{endnm} { return tok_at_color; } +<ATT>cols/{endnm} { return tok_at_cols; } +<ATT>colspan/{endnm} { return tok_at_colspan; } +<ATT>compact/{endnm} { return tok_at_compact; } +<ATT>content/{endnm} { return tok_at_content; } +<ATT>coords/{endnm} { return tok_at_coords; } +<ATT>data/{endnm} { return tok_at_data; } +<ATT>datetime/{endnm} { return tok_at_datetime; } +<ATT>declare/{endnm} { return tok_at_declare; } +<ATT>defer/{endnm} { return tok_at_defer; } +<ATT>dir/{endnm} { return tok_at_dir; } +<ATT>disabled/{endnm} { return tok_at_disabled; } +<ATT>enctype/{endnm} { return tok_at_enctype; } +<ATT>face/{endnm} { return tok_at_face; } +<ATT>for/{endnm} { return tok_at_for; } +<ATT>frame/{endnm} { return tok_at_frame; } +<ATT>frameborder/{endnm} { return tok_at_frameborder; } +<ATT>headers/{endnm} { return tok_at_headers; } +<ATT>height/{endnm} { return tok_at_height; } +<ATT>href/{endnm} { return tok_at_href; } +<ATT>hreflang/{endnm} { return tok_at_hreflang; } +<ATT>hspace/{endnm} { return tok_at_hspace; } +<ATT>http-equiv/{endnm} { return tok_at_http_equiv; } +<ATT>id/{endnm} { return tok_at_id; } +<ATT>ismap/{endnm} { return tok_at_ismap; } +<ATT>label/{endnm} { return tok_at_label; } +<ATT>lang/{endnm} { return tok_at_lang; } +<ATT>language/{endnm} { return tok_at_language; } +<ATT>link/{endnm} { return tok_at_link; } +<ATT>longdesc/{endnm} { return tok_at_longdesc; } +<ATT>marginheight/{endnm} { return tok_at_marginheight; } +<ATT>marginwidth/{endnm} { return tok_at_marginwidth; } +<ATT>maxlength/{endnm} { return tok_at_maxlength; } +<ATT>media/{endnm} { return tok_at_media; } +<ATT>method/{endnm} { return tok_at_method; } +<ATT>multiple/{endnm} { return tok_at_multiple; } +<ATT>name/{endnm} { return tok_at_name; } +<ATT>nohref/{endnm} { return tok_at_nohref; } +<ATT>noresize/{endnm} { return tok_at_noresize; } +<ATT>noshade/{endnm} { return tok_at_noshade; } +<ATT>nowrap/{endnm} { return tok_at_nowrap; } +<ATT>object/{endnm} { return tok_at_object; } +<ATT>onblur/{endnm} { return tok_at_onblur; } +<ATT>onchange/{endnm} { return tok_at_onchange; } +<ATT>onclick/{endnm} { return tok_at_onclick; } +<ATT>ondblclick/{endnm} { return tok_at_ondblclick; } +<ATT>onfocus/{endnm} { return tok_at_onfocus; } +<ATT>onkeydown/{endnm} { return tok_at_onkeydown; } +<ATT>onkeypress/{endnm} { return tok_at_onkeypress; } +<ATT>onkeyup/{endnm} { return tok_at_onkeyup; } +<ATT>onload/{endnm} { return tok_at_onload; } +<ATT>onmousedown/{endnm} { return tok_at_onmousedown; } +<ATT>onmousemove/{endnm} { return tok_at_onmousemove; } +<ATT>onmouseout/{endnm} { return tok_at_onmouseout; } +<ATT>onmouseover/{endnm} { return tok_at_onmouseover; } +<ATT>onmouseup/{endnm} { return tok_at_onmouseup; } +<ATT>onreset/{endnm} { return tok_at_onreset; } +<ATT>onselect/{endnm} { return tok_at_onselect; } +<ATT>onsubmit/{endnm} { return tok_at_onsubmit; } +<ATT>onunload/{endnm} { return tok_at_onunload; } +<ATT>profile/{endnm} { return tok_at_profile; } +<ATT>prompt/{endnm} { return tok_at_prompt; } +<ATT>readonly/{endnm} { return tok_at_readonly; } +<ATT>rel/{endnm} { return tok_at_rel; } +<ATT>rev/{endnm} { return tok_at_rev; } +<ATT>rows/{endnm} { return tok_at_rows; } +<ATT>rowspan/{endnm} { return tok_at_rowspan; } +<ATT>rules/{endnm} { return tok_at_rules; } +<ATT>scheme/{endnm} { return tok_at_scheme; } +<ATT>scope/{endnm} { return tok_at_scope; } +<ATT>scrolling/{endnm} { return tok_at_scrolling; } +<ATT>selected/{endnm} { return tok_at_selected; } +<ATT>shape/{endnm} { return tok_at_shape; } +<ATT>size/{endnm} { return tok_at_size; } +<ATT>span/{endnm} { return tok_at_span; } +<ATT>src/{endnm} { return tok_at_src; } +<ATT>standby/{endnm} { return tok_at_standby; } +<ATT>start/{endnm} { return tok_at_start; } +<ATT>style/{endnm} { return tok_at_style; } +<ATT>summary/{endnm} { return tok_at_summary; } +<ATT>tabindex/{endnm} { return tok_at_tabindex; } +<ATT>target/{endnm} { return tok_at_target; } +<ATT>text/{endnm} { return tok_at_text; } +<ATT>title/{endnm} { return tok_at_title; } +<ATT>type/{endnm} { return tok_at_type; } +<ATT>usemap/{endnm} { return tok_at_usemap; } +<ATT>valign/{endnm} { return tok_at_valign; } +<ATT>value/{endnm} { return tok_at_value; } +<ATT>valuetype/{endnm} { return tok_at_valuetype; } +<ATT>version/{endnm} { return tok_at_version; } +<ATT>vlink/{endnm} { return tok_at_vlink; } +<ATT>vspace/{endnm} { return tok_at_vspace; } +<ATT>width/{endnm} { return tok_at_width; } +<ATT>{attrname} { return tok_at_unknown; } + +<ATT>[>] { BEGIN(INITIAL); return yytext[0]; } +<ATT>. { return yytext[0]; } + +%% |