Module:OsmPageTitleParser
Jump to navigation
Jump to search
This documentation is transcluded from Module:OsmPageTitleParser/doc. (Edit | history)
Note to editors: Please don't categorize this template by editing it directly. Instead, place the category in its documentation page, in its "includeonly" section.
Note to editors: Please don't categorize this template by editing it directly. Instead, place the category in its documentation page, in its "includeonly" section.
This module parses title object into language, key value, and an optional tag value. This module is designed to be used by other modules. See also testcases code.
2 tests failed.
Text | Expected | Actual | |
---|---|---|---|
Key:something:* | {key="something:",language="en"} | {language="en",key="something:*"} |
Text | Expected | Actual | |
---|---|---|---|
Key:something | {key="something",language="en"} | {language="en",key="something"} | |
Key:some thing | {key="some_thing",language="en"} | {language="en",key="some_thing"} | |
Key:some_thing | {key="some_thing",language="en"} | {language="en",key="some_thing"} | |
Key:some:thing | {key="some:thing",language="en"} | {language="en",key="some:thing"} | |
KEY::some:thing | {key=":some:thing",language="en"} | {language="en",key=":some:thing"} | |
Key:some:thing:aa | {key="some:thing:aa",language="en"} | {language="en",key="some:thing:aa"} | |
Key:some:thing:aa: | {key="some:thing:aa:",language="en"} | {language="en",key="some:thing:aa:"} |
Text | Expected | Actual | |
---|---|---|---|
cnr:Key:name | {key="name",language="cnr"} | {language="cnr",key="name"} | |
cnr:Key:name:cnr | {key="name:cnr",language="cnr"} | {language="cnr",key="name:cnr"} | |
ko:key:some:thing | {key="some:thing",language="ko"} | {language="ko",key="some:thing"} | |
kO:key:some:thing:o | {key="some:thing:o",language="ko"} | {language="ko",key="some:thing:o"} | |
kO:key:so me:thing:o: | {key="so_me:thing:o:",language="ko"} | {language="ko",key="so_me:thing:o:"} | |
No:key:abc | {key="abc",language="no"} | {language="no",key="abc"} |
Text | Expected | Actual | |
---|---|---|---|
ru:Key:something | {key="something",language="ru"} | {language="ru",key="something"} | |
rU:KEY:some:thing:o | {key="some:thing:o",language="ru"} | {language="ru",key="some:thing:o"} | |
rU:keY:so me:thing:o: | {key="so_me:thing:o:",language="ru"} | {language="ru",key="so_me:thing:o:"} |
Text | Expected | Actual | |
---|---|---|---|
en | |||
EN | |||
fr | FR: | FR: | |
Fr | FR: | FR: | |
PT | Pt: | Pt: | |
(nil) |
Text | Expected | Actual | |
---|---|---|---|
bh:Main | {language="bh",_parseFailed=true} | {language="bh",_parseFailed=true} | |
ca-valencia:Main | {language="ca-valencia",_parseFailed=true} | {language="ca-valencia",_parseFailed=true} | |
cnr:Main | {language="cnr",_parseFailed=true} | {language="cnr",_parseFailed=true} | |
gcf:Main | {language="gcf",_parseFailed=true} | {language="gcf",_parseFailed=true} | |
gsw:Main | {language="gsw",_parseFailed=true} | {language="gsw",_parseFailed=true} | |
kbp:Main | {language="kbp",_parseFailed=true} | {language="kbp",_parseFailed=true} | |
kfa:Main | {language="kfa",_parseFailed=true} | {language="kfa",_parseFailed=true} | |
mrw:Main | {language="mrw",_parseFailed=true} | {language="mrw",_parseFailed=true} | |
rcf:Main | {language="rcf",_parseFailed=true} | {language="rcf",_parseFailed=true} | |
sr-cyrl:Main | {language="sr-cyrl",_parseFailed=true} | {language="sr-cyrl",_parseFailed=true} | |
sr-latn:Main | {language="sr-latn",_parseFailed=true} | {language="sr-latn",_parseFailed=true} | |
sxu:Main | {language="sxu",_parseFailed=true} | {language="sxu",_parseFailed=true} | |
swg:Main | {language="swg",_parseFailed=true} | {language="swg",_parseFailed=true} | |
sxu:Main | {language="sxu",_parseFailed=true} | {language="sxu",_parseFailed=true} | |
trp:Main | {language="trp",_parseFailed=true} | {language="trp",_parseFailed=true} | |
tzm:Main | {language="tzm",_parseFailed=true} | {language="tzm",_parseFailed=true} | |
zgh:Main | {language="zgh",_parseFailed=true} | {language="zgh",_parseFailed=true} | |
Nds:Main | {language="nds",_parseFailed=true} | {language="nds",_parseFailed=true} | |
Gcf:Test | {language="gcf",_parseFailed=true} | {language="gcf",_parseFailed=true} | |
Zh-hant:Relation | {language="zh-hant",_parseFailed=true} | {language="zh-hant",_parseFailed=true} | |
Pt-br:Main | {language="pt-br",_parseFailed=true} | {language="pt-br",_parseFailed=true} | |
No:Main | {language="no",_parseFailed=true} | {language="no",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
POI:Scotiabank | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} | |
Switzerland:Berne | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
akey=avalue | {k="akey",v="avalue"} | {k="akey",v="avalue"} | |
akey | {k="akey"} | {k="akey"} | |
akey= | {k="akey",v=""} | {k="akey",v=""} | |
akey=ava=lue | {k="akey",v="ava=lue"} | {k="akey",v="ava=lue"} | |
akey==ava=lue= | {k="akey",v="=ava=lue="} | {k="akey",v="=ava=lue="} | |
{k=""} | {k=""} | ||
(nil) | {} | {} |
Text | Expected | Actual | |
---|---|---|---|
Tag:something=abc | {value="abc",key="something",language="en"} | {value="abc",key="something",language="en"} | |
Tag:some:thing=abc:xyz | {value="abc:xyz",key="some:thing",language="en"} | {value="abc:xyz",key="some:thing",language="en"} | |
TAG::some:thing=aa=bb=c | {value="aa=bb=c",key=":some:thing",language="en"} | {value="aa=bb=c",key=":some:thing",language="en"} | |
taG:some:thing:aa bb=yy | {value="yy",key="some:thing:aa_bb",language="en"} | {value="yy",key="some:thing:aa_bb",language="en"} | |
Tag:some:thing:aa:=a b | {value="a_b",key="some:thing:aa:",language="en"} | {value="a_b",key="some:thing:aa:",language="en"} |
Text | Expected | Actual | |
---|---|---|---|
cnr:Tag:name:cnr=Studentska | {value="Studentska",key="name:cnr",language="cnr"} | {value="Studentska",key="name:cnr",language="cnr"} | |
ko:key:some:thing=abc x:yz | {value="abc_x:yz",key="some:thing",language="ko"} | {value="abc_x:yz",key="some:thing",language="ko"} | |
kO:key:some:thing:o=:a: | {value=":a:",key="some:thing:o",language="ko"} | {value=":a:",key="some:thing:o",language="ko"} | |
kO:key:so me:thing:o:=* | {value="*",key="so_me:thing:o:",language="ko"} | {value="*",key="so_me:thing:o:",language="ko"} |
Text | Expected | Actual | |
---|---|---|---|
ru:Key:something=abc | {value="abc",key="something",language="ru"} | {value="abc",key="something",language="ru"} | |
rU:KEY:some:thing:o=a=b:c | {value="a=b:c",key="some:thing:o",language="ru"} | {value="a=b:c",key="some:thing:o",language="ru"} | |
rU:keY:so me:thing:o:=== | {value="==",key="so_me:thing:o:",language="ru"} | {value="==",key="so_me:thing:o:",language="ru"} | |
RU:Moscow | {language="ru",_parseFailed=true} | {language="ru",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
Talk:Main | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} | |
Talk:Pt:Creating an Account | {language="pt",_parseFailed=true} | {language="pt",_parseFailed=true} | |
Talk:Tag:phone=tag | {value="tag",key="phone",language="en",_parseFailed=true} | {value="tag",key="phone",language="en"} | |
JA talk:Bus routes in Kanagawa | {language="ja",_parseFailed=true} | {language="ja",_parseFailed=true} | |
Talk:POI:The Church of Jesus Christ of Latter-day Saints | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
something | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} | |
FR:something | {language="fr",_parseFailed=true} | {language="fr",_parseFailed=true} | |
ko:something | {language="ko",_parseFailed=true} | {language="ko",_parseFailed=true} | |
some:thing | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} | |
FR:some:thing | {language="fr",_parseFailed=true} | {language="fr",_parseFailed=true} | |
KO:some:thing | {language="ko",_parseFailed=true} | {language="ko",_parseFailed=true} | |
{_parseFailed=true} | {_parseFailed=true} |
local p = {}
local data = mw.loadData('Module:OSM Constants')
-- Module_talk:OsmPageTitleParser/testcases has many test cases showing how to use this module
-- Simple debugging:
-- =p.parseTitle(mw.title.new('Key:test')).key
-- A wrapper to return the parse results. See p.parseTitleToObj
function p.parseTitle(title)
local result = {}
p.parseTitleToObj(result, title)
return result
end
-- given a title string in a form of (lang-code:)?(tag|key):(tagkey)(=tagvalue)?
-- tries to parse it into language (object), key, and optional value strings
-- We have to do it manually because Lua regex support is not that great
-- The actual tag and key prefixes are ignored. Value will be nil if no equal sign.
-- The output result object will always get the language unless title is nil,
-- but no other values will be set unless successfuly parsed
-- Params: title object, result table to get the results
-- Returns true if parsed, false otherwise
function p.parseTitleToObj(result, title)
if not title then return false end
local language, prefix, tagkey, tagvalue
-- if this is one of the known language namespaces, do not allow more language codes
local ns = title.namespace / 2 * 2
ns = ns - ns % 2 -- treat talk pages as their corresponding main pages
local langCode = data.nsToLangCodeMap[ns]
if langCode then
language = mw.getLanguage(langCode)
end
local canonicalTitle = mw.ustring.gsub(title.text, ' ', '_')
local keyvalue
for _, val in ipairs(mw.text.split(canonicalTitle, ':', true)) do
if not prefix then
-- this could be the language code or the prefix (tag or key)
local lval = string.lower(val)
if lval == 'tag' or lval == 'key' then
prefix = lval
elseif not language and (data.customLangCodes[lval] or mw.language.isSupportedLanguage(lval)) then
-- mw.getLanguage will creat an object even if the language is not supported
language = mw.getLanguage(lval)
else
-- unrecognized, there was no tag or key as first or second part
break
end
else
-- combine the values back into a single string after we found the prefix
if keyvalue then
keyvalue = keyvalue .. ':' .. val
else
keyvalue = val
end
end
end
if keyvalue then
-- now split the keyvalue into key and (optional) value
tagkey, tagvalue = p.splitKeyValue(keyvalue)
if not tagkey and not tagvalue then
keyvalue = nil
end
end
if not language then
result.language = mw.language.getContentLanguage()
else
result.language = language
end
if keyvalue then
result.key = tagkey
if tagvalue then result.value = tagvalue end
return true
else
return false
end
end
-- given a key=value string, split it into two parts and return both
-- if there is no equal sign, return key and nil value
function p.splitKeyValue(keyvalue)
if not keyvalue then return end
local tagkey, tagvalue
local eqlSignPos = mw.ustring.find(keyvalue, '=', 1, true)
if not eqlSignPos then
tagkey = keyvalue
else
local keyvalLen = mw.ustring.len(keyvalue)
if eqlSignPos > 1 then
tagkey = mw.ustring.sub(keyvalue, 1, eqlSignPos - 1)
tagvalue = mw.ustring.sub(keyvalue, eqlSignPos + 1)
end
end
return tagkey, tagvalue
end
-- Given a language code, returns proper title prefix:
-- empty string for English, all caps for the namespaces, first letter cap for rest
function p.langPrefix(langCode)
langCode = langCode and mw.ustring.lower(langCode) or ''
if not langCode or langCode == '' or langCode == 'en' then
return ''
end
for _,v in pairs(data.nsToLangCodeMap) do
if v == langCode then
return string.upper(langCode) .. ':'
end
end
return mw.getContentLanguage():ucfirst(langCode) .. ':'
end
function p.languageCodeInTitle(frame)
local title = mw.title.new(frame.args[1])
local language = p.parseTitle(title).language
return language.code
end
function p.keyInTitle(frame)
local title = mw.title.new(frame.args[1])
return p.parseTitle(title).key
end
function p.valueInTitle(frame)
local title = mw.title.new(frame.args[1])
return p.parseTitle(title).value
end
function p.dataItemLabelFromTitle(frame)
local title = mw.title.new(frame.args[1])
local parsedTitle = p.parseTitle(title)
if parsedTitle.value then
return parsedTitle.key .. "=" .. parsedTitle.value
end
return parsedTitle.key
end
return p