bvv2cal
author Timm S. Mueller <tmueller@schulze-mueller.de>
Fri, 12 Nov 2010 09:23:02 +0100
changeset 1 522bf1c3dd7c
parent 0 c7642a4f51a5
child 2 f4034a59feaa
permissions -rwxr-xr-x
Times are now in GMT (according to RFC 2445), german umlauts are converted to
ASCII equivalents ae, oe, ue, ... as a concession to some calender tools
     1 #!/usr/bin/env lua
     2 --
     3 --	$ bvvcal [bezirksname] [startdatum] [enddatum]
     4 --
     5 --	bezirksname : Default "friedrichshain-kreuzberg"
     6 --	startdatum : in deutscher Notation (z.B. 31.12.2010), Default: heute
     7 --	enddatum : Default: startdatum
     8 --
     9 --	Konvertiert den Berliner BVV-Sitzungskalender 
    10 --	ins iCalendar 2.0 (vCal) Format
    11 --
    12 --	Benötigt: 
    13 --	- htmltidy : http://tidy.sourceforge.net/
    14 --	- wget : http://www.gnu.org/software/wget/
    15 --	- Lua 5.1.x : http://www.lua.org/
    16 --	- LuaExpat : http://www.keplerproject.org/luaexpat/
    17 --
    18 --	Revisionen:
    19 --	- 0.2 : Uhrzeiten waren lokal, nicht GMT. Korrigiert.
    20 --	- 0.1 : initial release
    21 --
    22 --	Autor: Timm S. Müller <timm-pirat@schulze-mueller.de>
    23 --
    24 
    25 local lxp = require "lxp"
    26 
    27 local district = arg[1] or "friedrichshain-kreuzberg"
    28 local startdate = arg[2] or os.date("%d.%m.%Y")
    29 local enddate = arg[3] or startdate
    30 local uid_uri = district .. ".bvv.berlin.piratenpartei.de"
    31 
    32 ----- --- --- -- - -  -   -    -
    33 
    34 --	Encoding ist laut RFC 2445 UTF-8, dies ist ein Zugeständnis
    35 --	an Kalendertools, die das nicht mitbekommen haben:
    36 
    37 local umlconv = { 
    38 	["ä"] = "ae", ["Ä"] = "Ae", 
    39 	["ö"] = "oe", ["Ö"] = "Oe",
    40 	["ü"] = "ue", ["Ü"] = "Ue",
    41 	["ß"] = "ss"
    42 }
    43 
    44 local state = "waitcontent"
    45 local record, cell, result = { }, { }, { }
    46 local lnr = 1
    47 local parser = lxp.new {
    48 	StartElement = function(parser, tagname, attr)
    49 		if state == "waitcontent" then
    50 			if tagname == "div" and attr.id == "allrisContent" then
    51 				state = "waittab"
    52 			end
    53 		elseif state == "waittab" then
    54 			if tagname == "table" and attr.class == "tl1" then
    55 				state = "waitrow"
    56 			end
    57 		elseif state == "waitrow" then
    58 			if tagname == "tr" then
    59 				state = "waitcell"
    60 			end
    61 		end
    62 	end,
    63 	EndElement = function(parser, tagname)
    64 		if state == "waitrow" or state == "waittable" then
    65 			if tagname == "table" then
    66 				state = "end"
    67 			end
    68 		elseif state == "waitcell" then
    69 			if tagname == "tr" then
    70 				local day = table.remove(record, 1)
    71 				local sdate = table.remove(record, 1)
    72 				local d, m, y = (sdate or ""):match("^%s*(%d+)%.(%d+)%.(%d+)")
    73 				local time = table.remove(record, 1)
    74 				if d and m and y and time then
    75 					local sH, sM, eH, eM = time:match("^%s*(%d+):(%d+)%s*%-%s*(%d+):(%d+)")
    76 					if not sH then
    77 						sH, sM = time:match("^%s*(%d+):(%d+)")
    78 					end
    79 					if sH then
    80 						-- convert dates to GMT
    81 						local startdate = os.date("!%s", os.time { day=d, month=m, year=y, hour=sH, min=sM })
    82 						local enddate = eH and os.date("!%s", os.time { day=d, month=m, year=y, hour=eH, min=eM })
    83 						local what = table.concat(record, " "):match("^%s*(.-)%s*$")
    84 						what = what:gsub("([\128-\255].)", umlconv)
    85 						table.insert(result, { startdate, enddate, what })
    86 					end
    87 				end
    88 				state = "waitrow"
    89 				record = { }
    90 			elseif tagname == "td" then
    91 				table.insert(record, table.concat(cell))
    92 				cell = { }
    93 			end
    94 		end
    95 	end,
    96 	CharacterData = function(parser, s)
    97 		if state == "waitcell" then
    98 			table.insert(cell, s)
    99 		end
   100 	end
   101 }
   102 
   103 parser:setencoding("ISO-8859-1")
   104 
   105 ----- --- --- -- - -  -   -    -
   106 
   107 local cmd = ("wget --quiet --post-data 'kaldatvon=%s&kaldatbis=%s' 'http://www.berlin.de/ba-%s/bvv-online/si010.asp' -O - | tidy -latin1 -asxml -i -w 0 2>/dev/null -q"):format(startdate, enddate, district)
   108 -- print(cmd)
   109 local f = io.popen(cmd)
   110 for line in f:lines() do
   111 -- 	print(lnr .. " : " .. line)
   112 	parser:parse(line)
   113 	lnr = lnr + 1
   114 end
   115 
   116 ----- --- --- -- - -  -   -    -
   117 
   118 print "BEGIN:VCALENDAR"
   119 print "PRODID:BVV2CAL"
   120 print "VERSION:2.0"
   121 print ""
   122 for i = 1, #result do
   123 	local r = result[i]
   124 	print "BEGIN:VEVENT"
   125 	local startdate = os.date("%Y%m%dT%H%M", r[1])
   126 	print("UID:" .. startdate .. "." .. uid_uri)
   127 	print("SUMMARY:" .. r[3])
   128 	print("DTSTART:" .. startdate .. "00Z")
   129 	if r[2] then
   130 		local enddate = r[2] and os.date("%Y%m%dT%H%M", r[2])
   131 		print("DTEND:" .. enddate .. "00Z")
   132 	end
   133 	print "END:VEVENT"
   134 	print ""
   135 end
   136 print "END:VCALENDAR"