bvv2cal
author Timm S. Mueller <tmueller@schulze-mueller.de>
Fri, 12 Nov 2010 10:56:02 +0100
changeset 2 f4034a59feaa
parent 1 522bf1c3dd7c
permissions -rwxr-xr-x
Dates in daylight saving time were still incorrect, fixed
     1 #!/usr/bin/env lua
     2 --
     3 --	$ bvvcal [bezirksname] [startdatum] [enddatum]
     4 --
     5 --	bezirksname : Default "friedrichshain-kreuzberg"
     6 --	startdatum : in deutscher Notation (z.B. 31.12.2010), Default: heute
     7 --	enddatum : Default: startdatum
     8 --
     9 --	Konvertiert den Berliner BVV-Sitzungskalender 
    10 --	ins iCalendar 2.0 (vCal) Format
    11 --
    12 --	Benötigt: 
    13 --	- htmltidy : http://tidy.sourceforge.net/
    14 --	- wget : http://www.gnu.org/software/wget/
    15 --	- Lua 5.1.x : http://www.lua.org/
    16 --	- LuaExpat : http://www.keplerproject.org/luaexpat/
    17 --
    18 --	Revisionen:
    19 --	- 0.2 : Uhrzeiten waren lokal, nicht GMT. Korrigiert.
    20 --	- 0.1 : initial release
    21 --
    22 --	Autor: Timm S. Müller <timm-pirat@schulze-mueller.de>
    23 --
    24 
    25 local lxp = require "lxp"
    26 
    27 local district = arg[1] or "friedrichshain-kreuzberg"
    28 local startdate = arg[2] or os.date("%d.%m.%Y")
    29 local enddate = arg[3] or startdate
    30 local uid_uri = district .. ".bvv.berlin.piratenpartei.de"
    31 
    32 ----- --- --- -- - -  -   -    -
    33 
    34 --	Encoding ist laut RFC 2445 UTF-8, dies ist ein Zugeständnis
    35 --	an Kalendertools, die das nicht mitbekommen haben:
    36 
    37 local umlconv = { 
    38 	["ä"] = "ae", ["Ä"] = "Ae", 
    39 	["ö"] = "oe", ["Ö"] = "Oe",
    40 	["ü"] = "ue", ["Ü"] = "Ue",
    41 	["ß"] = "ss"
    42 }
    43 
    44 local function local2gmt(y, m, d, hour, min)
    45 	return os.date("%s", os.time { day=d, month=m, year=y, hour=hour, min=min })
    46 end
    47 
    48 
    49 local state = "waitcontent"
    50 local record, cell, result = { }, { }, { }
    51 local lnr = 1
    52 local parser = lxp.new {
    53 	StartElement = function(parser, tagname, attr)
    54 		if state == "waitcontent" then
    55 			if tagname == "div" and attr.id == "allrisContent" then
    56 				state = "waittab"
    57 			end
    58 		elseif state == "waittab" then
    59 			if tagname == "table" and attr.class == "tl1" then
    60 				state = "waitrow"
    61 			end
    62 		elseif state == "waitrow" then
    63 			if tagname == "tr" then
    64 				state = "waitcell"
    65 			end
    66 		end
    67 	end,
    68 	EndElement = function(parser, tagname)
    69 		if state == "waitrow" or state == "waittable" then
    70 			if tagname == "table" then
    71 				state = "end"
    72 			end
    73 		elseif state == "waitcell" then
    74 			if tagname == "tr" then
    75 				local day = table.remove(record, 1)
    76 				local sdate = table.remove(record, 1)
    77 				local d, m, y = (sdate or ""):match("^%s*(%d+)%.(%d+)%.(%d+)")
    78 				local time = table.remove(record, 1)
    79 				if d and m and y and time then
    80 					local sH, sM, eH, eM = time:match("^%s*(%d+):(%d+)%s*%-%s*(%d+):(%d+)")
    81 					if not sH then
    82 						sH, sM = time:match("^%s*(%d+):(%d+)")
    83 					end
    84 					if sH then
    85 						-- convert dates to GMT
    86 						local startdate = local2gmt(y, m, d, sH, sM)
    87 						local enddate = eH and local2gmt(y, m, d, eH, eM)
    88 						local what = table.concat(record, " "):match("^%s*(.-)%s*$")
    89 						what = what:gsub("([\128-\255].)", umlconv)
    90 						table.insert(result, { startdate, enddate, what })
    91 					end
    92 				end
    93 				state = "waitrow"
    94 				record = { }
    95 			elseif tagname == "td" then
    96 				table.insert(record, table.concat(cell))
    97 				cell = { }
    98 			end
    99 		end
   100 	end,
   101 	CharacterData = function(parser, s)
   102 		if state == "waitcell" then
   103 			table.insert(cell, s)
   104 		end
   105 	end
   106 }
   107 
   108 parser:setencoding("ISO-8859-1")
   109 
   110 ----- --- --- -- - -  -   -    -
   111 
   112 local cmd = ("wget --quiet --post-data 'kaldatvon=%s&kaldatbis=%s' 'http://www.berlin.de/ba-%s/bvv-online/si010.asp' -O - | tidy -latin1 -asxml -i -w 0 2>/dev/null -q"):format(startdate, enddate, district)
   113 -- print(cmd)
   114 local f = io.popen(cmd)
   115 for line in f:lines() do
   116 -- 	print(lnr .. " : " .. line)
   117 	parser:parse(line)
   118 	lnr = lnr + 1
   119 end
   120 
   121 ----- --- --- -- - -  -   -    -
   122 
   123 print "BEGIN:VCALENDAR"
   124 print "PRODID:BVV2CAL"
   125 print "VERSION:2.0"
   126 print ""
   127 for i = 1, #result do
   128 	local r = result[i]
   129 	print "BEGIN:VEVENT"
   130 	local startdate = os.date("!%Y%m%dT%H%M", r[1])
   131 	print("UID:" .. startdate .. "." .. uid_uri)
   132 	print("SUMMARY:" .. r[3])
   133 	print("DTSTART:" .. startdate .. "00Z")
   134 	if r[2] then
   135 		local enddate = r[2] and os.date("!%Y%m%dT%H%M", r[2])
   136 		print("DTEND:" .. enddate .. "00Z")
   137 	end
   138 	print "END:VEVENT"
   139 	print ""
   140 end
   141 print "END:VCALENDAR"