Module:URLutil
Appearance
From Wikipedia, the free encyclopedia
You might want to create a documentation page for this Scribunto module.
Editors can experiment in this module's sandbox (create | mirror) and testcases (create) pages.
Add categories to the /doc subpage. Subpages of this module.
Editors can experiment in this module's sandbox (create | mirror) and testcases (create) pages.
Add categories to the /doc subpage. Subpages of this module.
localURLutil={suite="URLutil", serial="2022年04月05日", item=10859193} --[=[ Utilities for URL etc. on www. * decode() * encode() * getAuthority() * getFragment() * getHost() * getLocation() * getNormalized() * getPath() * getPort() * getQuery() * getQueryTable() * getRelativePath() * getScheme() * getSortkey() * getTLD() * getTop2domain() * getTop3domain() * isAuthority() * isDomain() * isDomainExample() * isDomainInt() * isHost() * isHostPathResource() * isIP() * isIPlocal() * isIPv4() * isIPv6() * isMailAddress() * isMailLink() * isProtocolDialog() * isProtocolWiki() * isResourceURL() * isSuspiciousURL() * isUnescapedURL() * isWebURL() * wikiEscapeURL() * failsafe() Only [[dotted decimal]] notation for IPv4 expected. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. ]=] localFailsafe=URLutil localdecodeComponentProtect={F="\"#%<>[\]^`{|}", P="\"#%<>[\]^`{|}/?", Q="\"#%<>[\]^`{|}&=+;,", X="\"#%<>[\]^`{|}&=+;,/?"} localdecodeComponentEscape=function(averse,adapt) returnadapt==20oradapt==127or decodeComponentProtect[averse]:find(string.char(adapt), 1, true) end-- decodeComponentEscape() localdecodeComponentML=function(ask) locali=1 localj,n,s while(i)do i=ask:find("&#[xX]%x%x+;",i) ifithen j=ask:find(";",i+3,true) s=ask:sub(i+2,j-1):upper() n=s:byte(1,1) ifn==88then n=tonumber(s:sub(2),16) elseifs:match("^%d+$")then n=tonumber(s) else n=false end ifnthen ifn>=128then s=string.format("&#%d;",n) elseifdecodeComponentEscape("X",n)then s=string.format("%%%02X",n) else s=string.format("%c",n) end j=j+1 ifi==1then ask=s..ask:sub(j) else ask=string.format("%s%s%s", ask:sub(1,i-1), s, ask:sub(j)) end end i=i+1 end end-- while i returnask end-- decodeComponentML() localdecodeComponentPercent=function(ask,averse) locali=1 localj,k,m,n while(i)do i=ask:find("%%[2-7]%x",i) ifithen j=i+1 k=j+1 n=ask:byte(k,k) k=k+1 m=(n>96) ifmthen n=n-32 m=n end ifn>57then n=n-55 else n=n-48 end n=(ask:byte(j,j)-48)*16+n ifn==39and ask:sub(i+3,i+5)=="%27"then j=i+6 while(ask:sub(j,j+2)=="%27")do j=j+3 end-- while "%27" elseifdecodeComponentEscape(averse,n)then ifmthen ask=string.format("%s%c%s", ask:sub(1,j), m, ask:sub(k)) end elseifi==1then ask=string.format("%c%s",n,ask:sub(k)) else ask=string.format("%s%c%s", ask:sub(1,i-1), n, ask:sub(k)) end i=j end end-- while i returnask end-- decodeComponentPercent() localgetTopDomain=function(url,mode) localr=URLutil.getHost(url) ifrthen localpattern="[%w%%%-]+%.%a[%w%-]*%a)$" ifmode==3then pattern="[%w%%%-]+%."..pattern end r=mw.ustring.match("."..r,"%.("..pattern) ifnotrthen r=false end else r=false end returnr end-- getTopDomain() localgetHash=function(url) localr=url:find("#",1,true) ifrthen locali=url:find("&#",1,true) ifithen locals while(i)do s=url:sub(i+2) ifs:match("^%d+;")ors:match("^x%x+;")then r=url:find("#",i+4,true) ifrthen i=url:find("&#",i+4,true) else i=false end else r=i+1 i=false end end-- while i end end returnr end-- getHash() URLutil.decode=function(url,enctype) localr,s iftype(enctype)=="string"then s=mw.text.trim(enctype) ifs==""then s=false else s=s:upper() end end r=mw.text.encode(mw.uri.decode(url,s)) ifr:find("[%[|%]]")then localk r,k=r:gsub("%[","[") :gsub("|","|") :gsub("%]","]") end returnr end-- URLutil.decode() URLutil.encode=function(url,enctype) localk,r,s iftype(enctype)=="string"then s=mw.text.trim(enctype) ifs==""then s=false else s=s:upper() end end r=mw.uri.encode(url,s) k=r:byte(1,1) if-- k == 35 or -- # k==42or-- * k==58or-- : k==59then-- ; r=string.format("%%%X%s",k,r:sub(2)) end ifr:find("[%[|%]]")then r,k=r:gsub("%[","%5B") :gsub("|","%7C") :gsub("%]","%5D") end returnr end-- URLutil.encode() URLutil.getAuthority=function(url) localr iftype(url)=="string"then localcolon,host,port localpattern="^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/" locals=mw.text.decode(url) locali=s:find("#",6,true) ifithen s=s:sub(1,i-1).."/" else s=s.."/" end host,colon,port=mw.ustring.match(s,pattern) ifURLutil.isHost(host)then host=mw.ustring.lower(host) ifcolon==":"then ifport:find("^[1-9]")then r=(host..":"..port) end elseif#port==0then r=host end end else r=false end returnr end-- URLutil.getAuthority() URLutil.getFragment=function(url,decode) localr iftype(url)=="string"then locali=getHash(url) ifithen r=mw.text.trim(url:sub(i)):sub(2) iftype(decode)=="string"then localencoding=mw.text.trim(decode) locallaunch ifencoding=="%"then launch=true elseifencoding=="WIKI"then r=r:gsub("%.(%x%x)","%%%1") :gsub("_"," ") launch=true end iflaunchthen r=mw.uri.decode(r,"PATH") end end else r=false end else r=nil end returnr end-- URLutil.getFragment() URLutil.getHost=function(url) localr=URLutil.getAuthority(url) ifrthen r=mw.ustring.match(r,"^([%w%.%%_%-]+):?[%d]*$") end returnr end-- URLutil.getHost() URLutil.getLocation=function(url) localr iftype(url)=="string"then r=mw.text.trim(url) ifr==""then r=false else locali i=getHash(r) ifithen ifi==1then r=false else r=r:sub(1,i-1) end end end else r=nil end returnr end-- URLutil.getLocation() URLutil.getNormalized=function(url) localr iftype(url)=="string"then r=mw.text.trim(url) ifr==""then r=false else r=decodeComponentML(r) end else r=false end ifrthen localk=r:find("//",1,true) ifkthen localj=r:find("/",k+2,true) localsF,sP,sQ ifr:find("%%[2-7]%x")then locali=getHash(r) ifithen sF=r:sub(i+1) r=r:sub(1,i-1) ifsF==""then sF=false else sF=decodeComponentPercent(sF,"F") end end i=r:find("?",1,true) ifithen sQ=r:sub(i) r=r:sub(1,i-1) sQ=decodeComponentPercent(sQ,"Q") end ifjthen if#r>jthen sP=r:sub(j+1) sP=decodeComponentPercent(sP,"P") end r=r:sub(1,j-1) end elseifjthen localn=#r ifr:byte(n,n)==35then-- '#' n=n-1 r=r:sub(1,n) end ifn>jthen sP=r:sub(j+1) end r=r:sub(1,j-1) end r=mw.ustring.lower(r).."/" ifsPthen r=r..sP end ifsQthen r=r..sQ end ifsFthen r=string.format("%s#%s",r,sF) end end r=r:gsub(" ","%%20") :gsub("%[","%%5B") :gsub("|","%%7C") :gsub("%]","%%5D") :gsub("%<","%%3C") :gsub("%>","%%3E") end returnr end-- URLutil.getNormalized() URLutil.getPath=function(url) localr=URLutil.getRelativePath(url) ifrthen locals=r:match("^([^%?]*)%?") ifsthen r=s end s=r:match("^([^#]*)#") ifsthen r=s end end returnr end-- URLutil.getPath() URLutil.getPort=function(url) localr=URLutil.getAuthority(url) ifrthen r=r:match(":([1-9][0-9]*)$") ifrthen r=tonumber(r) else r=false end end returnr end-- URLutil.getPort() URLutil.getQuery=function(url,key,separator) localr=URLutil.getLocation(url) ifrthen r=r:match("^[^%?]*%?(.+)$") ifrthen iftype(key)=="string"then localsingle=mw.text.trim(key) localsep="&" locals,scan iftype(separator)=="string"then s=mw.text.trim(separator) ifs:match("^[&;,/]$")then sep=s end end s=string.format("%s%s%s",sep,r,sep) scan=string.format("%s%s=([^%s]*)%s", sep,key,sep,sep) r=s:match(scan) end end ifnotrthen r=false end end returnr end-- URLutil.getQuery() URLutil.getQueryTable=function(url,separator) localr=URLutil.getQuery(url) ifrthen localsep="&" localn,pairs,s,set iftype(separator)=="string"then s=mw.text.trim(separator) ifs:match("^[&;,/]$")then sep=s end end pairs=mw.text.split(r,sep,true) n=#pairs r={} fori=1,ndo s=pairs[i] ifs:find("=",2,true)then s,set=s:match("^([^=]+)=(.*)$") ifsthen r[s]=set end else r[s]=false end end-- for i end returnr end-- URLutil.getQueryTable() URLutil.getRelativePath=function(url) localr iftype(url)=="string"then locals=url:match("^%s*[a-zA-Z]*://(.*)$") ifsthen s=s:match("[^/]+(/.*)$") else localx x,s=url:match("^%s*(/?)(/.*)$") ifx=="/"then s=s:match("/[^/]+(/.*)$") end end ifsthen r=mw.text.trim(s) elseifURLutil.isResourceURL(url)then r="/" else r=false end else r=nil end returnr end-- URLutil.getRelativePath() URLutil.getScheme=function(url) localr iftype(url)=="string"then localpattern="^%s*([a-zA-Z]*)(:?)(//)" localprot,colon,slashes=url:match(pattern) r=false ifslashes=="//"then ifcolon==":"then if#prot>2then r=prot:lower().."://" end elseif#prot==0then r="//" end end else r=nil end returnr end-- URLutil.getScheme() URLutil.getSortkey=function(url) localr=url iftype(url)=="string"then locali=url:find("//") ifithen localscheme ifi==0then scheme="" else scheme=url:match("^%s*([a-zA-Z]*)://") end ifschemethen locals=url:sub(i+2) localcomps,site,m,suffix scheme=scheme:lower() i=s:find("/") ifiandi>1then suffix=s:sub(i+1)-- mw.uri.encode() s=s:sub(1,i-1) suffix=suffix:gsub("#"," ") else suffix="" end site,m=s:match("^(.+)(:%d+)$") ifnotmthen site=s m=0 end comps=mw.text.split(site:lower(),".",true) r="///" fori=#comps,2,-1do r=string.format("%s%s.",r,comps[i]) end-- for --i r=string.format("%s%s %d %s: %s", r,comps[1],m,scheme,suffix) end end end returnr end-- URLutil.getSortkey() URLutil.getTLD=function(url) localr=URLutil.getHost(url) ifrthen r=mw.ustring.match(r,"%w+%.(%a[%w%-]*%a)$") ifnotrthen r=false end end returnr end-- URLutil.getTLD() URLutil.getTop2domain=function(url) returngetTopDomain(url,2) end-- URLutil.getTop2domain() URLutil.getTop3domain=function(url) returngetTopDomain(url,3) end-- URLutil.getTop3domain() URLutil.isAuthority=function(s) localr iftype(s)=="string"then localpattern="^%s*([%w%.%%_-]+)(:?)(%d*)%s*$" localhost,colon,port=mw.ustring.match(s,pattern) ifcolon==":"then port=port:match("^[1-9][0-9]*$") iftype(port)~="string"then r=false end elseifport~=""then r=false end r=URLutil.isHost(host) else r=nil end returnr end-- URLutil.isAuthority() URLutil.isDomain=function(s) localr iftype(s)=="string"then localscan="^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$" localscope s,scope=mw.ustring.match(s,scan) iftype(s)=="string"then ifmw.ustring.find(s,"^%w")then ifmw.ustring.find(s,"..",1,true)then r=false else r=true end end end else r=nil end returnr end-- URLutil.isDomain() URLutil.isDomainExample=function(url) -- RFC 2606: example.com example.net example.org example.edu localr=getTopDomain(url,2) ifrthen locals=r:lower():match("^example%.([a-z][a-z][a-z])$") ifsthen r=(s=="com"or s=="edu"or s=="net"or s=="org") else r=false end end returnr end-- URLutil.isDomainExample() URLutil.isDomainInt=function(url) -- Internationalized Domain Name (Punycode) localr=URLutil.getHost(url) ifrthen ifr:match("^[!-~]+$")then locals="."..r ifs:find(".xn--",1,true)then r=true else r=false end else r=true end end returnr end-- URLutil.isDomainInt() URLutil.isHost=function(s) returnURLutil.isDomain(s)orURLutil.isIP(s) end-- URLutil.isHost() URLutil.isHostPathResource=function(s) localr=URLutil.isResourceURL(s) ifnotrandsthen r=URLutil.isResourceURL("//"..mw.text.trim(s)) end returnr end-- URLutil.isHostPathResource() URLutil.isIP=function(s) returnURLutil.isIPv4(s)and4orURLutil.isIPv6(s)and6 end-- URLutil.isIP() URLutil.isIPlocal=function(s) -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) localr=false localnum=s:match("^ *([01][0-9]*)%.") ifnumthen num=tonumber(num) ifnum==0then r=s:match("^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$") elseifnum==10ornum==127then -- loopback; private/local host: 127.0.0.1 r=URLutil.isIPv4(s) elseifnum==169then -- 169.254.*.* elseifnum==172then -- 172.(16...31).*.* num=s:match("^ *0*172%.([0-9]+)%.") ifnumthen num=tonumber(num) ifnum>=16andnum<=31then r=URLutil.isIPv4(s) end end elseifbeg==192then -- 192.168.*.* num=s:match("^ *0*192%.([0-9]+)%.") ifnumthen num=tonumber(num) ifnum==168then r=URLutil.isIPv4(s) end end end end ifrthen r=true end returnr end-- URLutil.isIPlocal() URLutil.isIPv4=function(s) localfunctionlegal(n) return(tonumber(n)<256) end localr=false iftype(s)=="string"then localp1,p2,p3,p4=s:match("^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$") ifp1andp2andp3andp4then r=legal(p1)andlegal(p2)andlegal(p3)andlegal(p4) end end returnr end-- URLutil.isIPv4() URLutil.isIPv6=function(s) localdcolon,groups iftype(s)~="string" ors:len()==0 ors:find("[^:%x]")-- only colon and hex digits are legal chars ors:find("^:[^:]")-- can begin or end with :: but not with single : ors:find("[^:]:$") ors:find(":::") then returnfalse end s=mw.text.trim(s) s,dcolon=s:gsub("::",":") ifdcolon>1then returnfalse end-- at most one :: s=s:gsub("^:?",":")-- prepend : if needed, upper s,groups=s:gsub(":%x%x?%x?%x?","")-- remove valid groups, and count them return((dcolon==1andgroups<8)or (dcolon==0andgroups==8)) and(s:len()==0or(dcolon==1ands==":"))-- might be one dangling : if original ended with :: end-- URLutil.isIPv6() URLutil.isMailAddress=function(s) iftype(s)=="string"then s=mw.ustring.match(s,"^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$") returnURLutil.isDomain(s) end returnfalse end-- URLutil.isMailAddress() URLutil.isMailLink=function(s) iftype(s)=="string"then localaddr s,addr=mw.ustring.match(s,"^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$") iftype(s)=="string"then ifs:lower()=="mailto"then returnURLutil.isMailAddress(addr) end end end returnfalse end-- URLutil.isMailLink() localfunctionisProtocolAccepted(prot,supplied) iftype(prot)=="string"then localscheme,colon,slashes=mw.ustring.match(prot,"^%s*([a-zA-Z]*)(:?)(/?/?)%s*$") ifslashes~="/"then ifscheme==""then ifcolon~=":"andslashes=="//"then returntrue end elseifcolon==":"orslashes==""then locals=supplied:match(" "..scheme:lower().." ") iftype(s)=="string"then returntrue end end end end returnfalse end-- isProtocolAccepted() URLutil.isProtocolDialog=function(prot) returnisProtocolAccepted(prot," mailto irc ircs ssh telnet ") end-- URLutil.isProtocolDialog() URLutil.isProtocolWiki=function(prot) returnisProtocolAccepted(prot, " ftp ftps git http https nntp sftp svn worldwind ") end-- URLutil.isProtocolWiki() URLutil.isResourceURL=function(url) localscheme=URLutil.getScheme(url) ifschemethen locals=" // http:// https:// ftp:// sftp:// " s=s:find(string.format(" %s ",scheme)) ifsthen ifURLutil.getAuthority(url)then ifnoturl:match("%S%s+%S")then locals1,s2=url:match("^([^#]+)(#.*)$") ifs2then ifurl:match("^%s*[a-zA-Z]*:?//(.+)/")then returntrue end else returntrue end end end end end returnfalse end-- URLutil.isResourceURL() URLutil.isSuspiciousURL=function(url) ifURLutil.isResourceURL(url)then locals=URLutil.getAuthority(url) localpat="[%[|%]".. mw.ustring.char(34, 8201,45,8207, 8234,45,8239, 8288) .."]" ifs:find("@") orurl:find("''") orurl:find(pat) orurl:find("[%.,]$")then returntrue end -- TODO zero width character ?? returnfalse end returntrue end-- URLutil.isSuspiciousURL() URLutil.isUnescapedURL=function(url,trailing) iftype(trailing)~="string"then ifURLutil.isWebURL(url)then ifurl:match("[%[|%]]")then returntrue end end end returnfalse end-- URLutil.isUnescapedURL() URLutil.isWebURL=function(url) ifURLutil.getScheme(url)andURLutil.getAuthority(url)then ifnoturl:find("%S%s+%S")and noturl:find("''",1,true)then returntrue end end returnfalse end-- URLutil.isWebURL() URLutil.wikiEscapeURL=function(url) ifurl:find("[%[|%]]")then localn url,n=url:gsub("%[","[") :gsub("|","|") :gsub("%]","]") end returnurl end-- URLutil.wikiEscapeURL() Failsafe.failsafe=function(atleast) -- Retrieve versioning and check for compliance -- Precondition: -- atleast -- string, with required version -- or wikidata|item|~|@ or false -- Postcondition: -- Returns string -- with queried version/item, also if problem -- false -- if appropriate -- 2020年08月17日 localsince=atleast locallast=(since=="~") locallinked=(since=="@") locallink=(since=="item") localr iflastorlinkorlinkedorsince=="wikidata"then localitem=Failsafe.item since=false iftype(item)=="number"anditem>0then localsuited=string.format("Q%d",item) iflinkthen r=suited else localentity=mw.wikibase.getEntity(suited) iftype(entity)=="table"then localseek=Failsafe.serialPropertyor"P348" localvsn=entity:formatPropertyValues(seek) iftype(vsn)=="table"and type(vsn.value)=="string"and vsn.value~=""then iflastandvsn.value==Failsafe.serialthen r=false elseiflinkedthen ifmw.title.getCurrentTitle().prefixedText ==mw.wikibase.getSitelink(suited)then r=false else r=suited end else r=vsn.value end end end end end end iftype(r)=="nil"then ifnotsinceorsince<=Failsafe.serialthen r=Failsafe.serial else r=false end end returnr end-- Failsafe.failsafe() localfunctionTemplate(frame,action,amount) -- Run actual code from template transclusion -- Precondition: -- frame -- object -- action -- string, with function name -- amount -- number, of args if > 1 -- Postcondition: -- Return string or not localn=amountor1 localv={} localr,s fori=1,ndo s=frame.args[i] ifsthen s=mw.text.trim(s) ifs~=""then v[i]=s end end end-- for i ifv[1]then r=URLutil[action](v[1],v[2],v[3]) end returnr end-- Template() localp={} functionp.decode(frame) returnTemplate(frame,"decode",2)or"" end functionp.encode(frame) returnTemplate(frame,"encode",2)or"" end functionp.getAuthority(frame) returnTemplate(frame,"getAuthority")or"" end functionp.getFragment(frame) localr=Template(frame,"getFragment",2) ifrthen r="#"..r else r="" end returnr end functionp.getHost(frame) returnTemplate(frame,"getHost")or"" end functionp.getLocation(frame) returnTemplate(frame,"getLocation")or"" end functionp.getNormalized(frame) returnTemplate(frame,"getNormalized")or"" end functionp.getPath(frame) returnTemplate(frame,"getPath")or"" end functionp.getPort(frame) returnTemplate(frame,"getPort")or"" end functionp.getQuery(frame) localr=Template(frame,"getQuery",3) ifrthen localkey=frame.args[2] ifkeythen key=mw.text.trim(key) ifkey==""then key=nil end end ifnotkeythen r="?"..r end else r="" end returnr end functionp.getRelativePath(frame) returnTemplate(frame,"getRelativePath")or"" end functionp.getScheme(frame) returnTemplate(frame,"getScheme")or"" end functionp.getSortkey(frame) returnTemplate(frame,"getSortkey")or"" end functionp.getTLD(frame) returnTemplate(frame,"getTLD")or"" end functionp.getTop2domain(frame) returnTemplate(frame,"getTop2domain")or"" end functionp.getTop3domain(frame) returnTemplate(frame,"getTop3domain")or"" end functionp.isAuthority(frame) returnTemplate(frame,"isAuthority")and"1"or"" end functionp.isDomain(frame) returnTemplate(frame,"isDomain")and"1"or"" end functionp.isDomainExample(frame) returnTemplate(frame,"isDomainExample")and"1"or"" end functionp.isDomainInt(frame) returnTemplate(frame,"isDomainInt")and"1"or"" end functionp.isHost(frame) returnTemplate(frame,"isHost")and"1"or"" end functionp.isHostPathResource(frame) returnTemplate(frame,"isHostPathResource")and"1"or"" end functionp.isIP(frame) returnTemplate(frame,"isIP")or"" end functionp.isIPlocal(frame) returnTemplate(frame,"isIPlocal")and"1"or"" end functionp.isIPv4(frame) returnTemplate(frame,"isIPv4")and"1"or"" end functionp.isIPv6(frame) returnTemplate(frame,"isIPv6")and"1"or"" end functionp.isMailAddress(frame) returnTemplate(frame,"isMailAddress")and"1"or"" end functionp.isMailLink(frame) returnTemplate(frame,"isMailLink")and"1"or"" end functionp.isProtocolDialog(frame) returnTemplate(frame,"isProtocolDialog")and"1"or"" end functionp.isProtocolWiki(frame) returnTemplate(frame,"isProtocolWiki")and"1"or"" end functionp.isResourceURL(frame) returnTemplate(frame,"isResourceURL")and"1"or"" end functionp.isSuspiciousURL(frame) returnTemplate(frame,"isSuspiciousURL")and"1"or"" end functionp.isUnescapedURL(frame) returnTemplate(frame,"isUnescapedURL",2)and"1"or"" end functionp.isWebURL(frame) returnTemplate(frame,"isWebURL")and"1"or"" end functionp.wikiEscapeURL(frame) returnTemplate(frame,"wikiEscapeURL") end p.failsafe=function(frame) locals=type(frame) localsince ifs=="table"then since=frame.args[1] elseifs=="string"then since=frame end ifsincethen since=mw.text.trim(since) ifsince==""then since=false end end returnFailsafe.failsafe(since)or"" end functionp.URLutil() returnURLutil end returnp