Lua Project List To Xml


This sample program will read the list of projects using Lua from the Lua website and outputs the list as a valid XML document. This program requires GNU wget installed in the system (it's available for major Unix and Windows systems, and it's are installed as default in most Linux distributions.

WARNING: The program will read and interpret HTML. Future changes in the format of the list *WILL* break the parser.

#!/usr/bin/env lua
local fname = "uses.html"
os.execute("wget -q -O " .. fname .. " http://www.lua.org/uses.html")
local fp = io.open(fname, "r")
if fp == nil then
 print("Error opening file '" .. fname .. "'.")
 os.exit(1)
end
local s = fp:read("*a")
fp:close()
-- Remove optional spaces from the tags.
s = string.gsub(s, "\n", " ")
s = string.gsub(s, " *< *", " <")
s = string.gsub(s, " *> *", "> ")
s = string.gsub(s, "> *<", "><")
-- Put all the tags in lowercase.
s = string.gsub(s, "(<[^ >]+)", string.lower)
-- Remove images, scripts, etc.
s = string.gsub(s, "<img[^>]*>", "")
s = string.gsub(s, "<script[^>]*>.-</script>", "")
-- "Normalize" links for future use
s = string.gsub(s, "(<a[^>]*HREF *=)", string.lower)
s = string.gsub(s, "<a[^>]*href *= *", "<a href=")
print("<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>")
print("<luauses>")
for tmp in string.gfind(s, "<h3>.-<hr>") do
 -- Current data format (without spaces and line-breaks):
 -- <h3>
 -- <a NAME="1" HREF="APPURL">APPNAM</a>
 -- <br><small><em>USER</em></small>
 -- </h3>
 -- DESCR [can have html here]
 -- <p> Contact: <a HREF="EMAIL">CONTACT</a>
 -- <hr>
 local i, f, app = string.find(tmp, "<h3>(.-)</h3>")
 if app then
 app = string.gsub(app, "</?em>", "")
 app = string.gsub(app, "<br>", "")
 i, f, appurl, appnam = string.find(app, "<a href=\"([^\"> ]*)\"[^>]*>([^<]*)<")
 if appurl == nil then
 i, f, appnam = string.find(app, "<a[^>]*>([^<]*)</a>")
 appurl = ""
 end
 end
 i, f, user = string.find(tmp, "<small>(.-)</small>")
 if user then
 user = string.gsub(user, "</?.->", "")
 user = string.gsub(user, "&", "&amp;")
 else
 user = ""
 end
 i, f, desc = string.find(tmp, "</h3>(.-)<hr>")
 if desc then
 i, f, cont = string.find(desc, "<p> *Contact: *(.*)")
 if cont then
 desc = string.gsub(desc, "<p> *Contact:(.*)", "")
 cont = string.gsub(cont, "<p> *Contact: *", "")
 i, f, email, name = string.find(cont, "<a href=\"([^ \"]+)\"[^>]*>([^<]+)<")
 if name == nil then
 name = cont
 email = ""
 end
 if email then
 email = string.gsub(email, "mailto:/?/?", "")
 else
 email = ""
 end
 else
 name = ""
 email = ""
 end
 desc = string.gsub(desc, "&", "&amp;")
 desc = string.gsub(desc, "<", "&lt;")
 desc = string.gsub(desc, ">", "&gt;")
 else
 desc = ""
 end
 print(" <use>")
 print(" <app>" .. appnam .. "</app>")
 print(" <url>" .. appurl .. "</url>")
 print(" <user>" .. user .. "</user>")
 print(" <desc>" .. desc .. "</desc>")
 print(" <contact>" .. name .. "</contact>")
 print(" <email>" .. email .. "</email>")
 print(" </use>")
end
print("</luauses>")

-- AlexandreErwinIttner


RecentChanges · preferences
edit · history
Last edited May 28, 2007 4:29 pm GMT (diff)

AltStyle によって変換されたページ (->オリジナル) /