3 ** tek.lib.luahtml - Lua+HTML parser/decoding library
4 ** Written by Timm S. Mueller <tmueller at neoscientists.org>
5 ** See copyright notice in COPYRIGHT
16 #define topfile(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE))
19 static FILE *tofile (lua_State *L) {
20 FILE **f = topfile(L);
22 luaL_error(L, "attempt to use a closed file");
27 static unsigned char *encodeutf8(unsigned char *buf, int c)
35 *buf++ = 0xc0 + (c >> 6);
36 *buf++ = 0x80 + (c & 0x3f);
40 *buf++ = 0xe0 + (c >> 12);
41 *buf++ = 0x80 + ((c & 0xfff) >> 6);
42 *buf++ = 0x80 + (c & 0x3f);
46 *buf++ = 0xf0 + (c >> 18);
47 *buf++ = 0x80 + ((c & 0x3ffff) >> 12);
48 *buf++ = 0x80 + ((c & 0xfff) >> 6);
49 *buf++ = 0x80 + (c & 0x3f);
51 else if (c < 67108864)
53 *buf++ = 0xf8 + (c >> 24);
54 *buf++ = 0x80 + ((c & 0xffffff) >> 18);
55 *buf++ = 0x80 + ((c & 0x3ffff) >> 12);
56 *buf++ = 0x80 + ((c & 0xfff) >> 6);
57 *buf++ = 0x80 + (c & 0x3f);
61 *buf++ = 0xfc + (c >> 30);
62 *buf++ = 0x80 + ((c & 0x3fffffff) >> 24);
63 *buf++ = 0x80 + ((c & 0xffffff) >> 18);
64 *buf++ = 0x80 + ((c & 0x3ffff) >> 12);
65 *buf++ = 0x80 + ((c & 0xfff) >> 6);
66 *buf++ = 0x80 + (c & 0x3f);
74 int (*readchar)(struct utf8reader *);
75 int accu, numa, min, bufc;
76 const unsigned char *src;
83 static int readstring(struct utf8reader *rd)
92 static int readfile(struct utf8reader *rd)
94 return fgetc(rd->file);
98 static int readutf8(struct utf8reader *rd)
109 c = rd->readchar(rd);
114 if (c == 254 || c == 255)
135 if (rd->accu == 0 || rd->accu < rd->min ||
136 (rd->accu >= 55296 && rd->accu <= 57343))
190 typedef enum { PARSER_UNDEF = -1, PARSER_HTML, PARSER_OPEN1, PARSER_OPEN2,
191 PARSER_CODE, PARSER_VAR, PARSER_CLOSE } parser_state_t;
194 static unsigned char *outchar(lua_State *L, unsigned char *buf, parser_state_t state, int c)
196 if (state == PARSER_HTML)
198 if (c > 127 || c == '[' || c == ']')
199 return buf + sprintf((char *) buf, "&#%02d;", c);
201 else if (state == PARSER_CODE)
204 return encodeutf8(buf, c);
207 luaL_error(L, "Non-ASCII character outside code or HTML context");
216 /* buffer including " " .. outfunc .. "(": */
217 unsigned char buf0[256];
218 /* pointer into buf0 past outfunc: */
220 /* html+lua parser state: */
221 parser_state_t state;
222 /* utf8 reader state: */
223 struct utf8reader utf8;
227 static const char *readparsed(lua_State *L, void *udata, size_t *sz)
229 struct readdata *rd = udata;
230 parser_state_t news = rd->state;
233 while ((c = readutf8(&rd->utf8)) >= 0)
243 rd->state = PARSER_HTML;
246 *sz = outchar(L, rd->buf + 2, rd->state, c) - rd->buf0;
247 return (char *) rd->buf0;
266 return (char *) rd->buf;
271 if (rd->state == PARSER_UNDEF)
273 rd->state = PARSER_VAR;
274 *sz = rd->buf - rd->buf0;
275 return (char *) rd->buf0;
277 rd->state = PARSER_VAR;
278 strcpy((char *) rd->buf, "]])");
279 memcpy(rd->buf + 3, rd->buf0, rd->buf - rd->buf0);
280 *sz = 3 + rd->buf - rd->buf0;
281 return (char *) rd->buf;
284 if (rd->state == PARSER_UNDEF)
285 rd->state = PARSER_CODE;
288 rd->state = PARSER_CODE;
295 return (char *) rd->buf;
312 if (rd->state == PARSER_CODE)
314 rd->state = PARSER_HTML;
317 *sz = rd->buf + 2 - rd->buf0;
318 return (char *) rd->buf0;
320 rd->state = PARSER_HTML;
321 strcpy((char *) rd->buf, " or \"nil\")");
322 memcpy(rd->buf + 10, rd->buf0, rd->buf - rd->buf0);
323 len = 10 + rd->buf - rd->buf0;
324 rd->buf[len++] = '[';
325 rd->buf[len++] = '[';
327 return (char *) rd->buf;
332 return (char *) rd->buf;
335 *sz = outchar(L, rd->buf, rd->state, c) - rd->buf;
336 return (char *) rd->buf;
339 rd->state = PARSER_UNDEF;
340 if (news == PARSER_HTML)
350 static int load(lua_State *L)
353 const char *outfunc = lua_tostring(L, 2);
354 const char *chunkname = lua_tostring(L, 3);
357 if (lua_isuserdata(L, 1))
359 rd.utf8.file = tofile(L);
360 rd.utf8.readchar = readfile;
364 rd.utf8.src = (unsigned char *) lua_tolstring(L, 1, &rd.utf8.srclen);
365 rd.utf8.readchar = readstring;
372 rd.state = PARSER_UNDEF;
373 strcpy((char *) rd.buf0, " ");
374 strcat((char *) rd.buf0, outfunc);
375 strcat((char *) rd.buf0, "(");
376 rd.buf = rd.buf0 + strlen((char *) rd.buf0);
378 res = lua_load(L, readparsed, &rd, chunkname);
384 /* nil, message on stack */
389 /*****************************************************************************/
392 static int encodeform(lua_State *L)
399 if (lua_isuserdata(L, 1))
401 rd.utf8.file = tofile(L);
402 rd.utf8.readchar = readfile;
406 rd.utf8.src = (unsigned char *) lua_tolstring(L, 1, &rd.utf8.srclen);
407 rd.utf8.readchar = readstring;
410 luaL_buffinit(L, &b);
416 while ((c = readutf8(&rd.utf8)) >= 0)
421 luaL_addlstring(&b, """, 6);
424 luaL_addlstring(&b, "&", 5);
427 luaL_addlstring(&b, "<", 4);
430 luaL_addlstring(&b, ">", 4);
433 if (c == 91 || c == 93 || c > 126)
435 sprintf(sbuf, "&#%03d;", c);
436 luaL_addstring(&b, sbuf);
448 /*****************************************************************************/
451 static const luaL_Reg lib[] =
454 { "encodeform", encodeform },
459 int luaopen_tek_lib_luahtml(lua_State *L)
461 luaL_register(L, "tek.lib.luahtml", lib);