Module:DPLlua: Difference between revisions

From Ato Wiki
en>Alistair3149
(Created page with "-- <nowiki> local dpl = {} dpl.pipe = '¦' local dataContentMarker = '`#@@#`' local usesInclude = {} -- Custom function for splitting a string because mw.text.split() is waa...")
 
starcitizen>Alistair3149
m (1 revision imported)
(No difference)

Revision as of 14:54, 17 March 2020

Documentation for this module may be created at Module:DPLlua/doc

-- <nowiki>
local dpl = {}

dpl.pipe = '¦'
local dataContentMarker = '`#@@#`'
local usesInclude = {}

-- Custom function for splitting a string because mw.text.split() is waaay too slow
local function split( str, pattern, plain )
	local res = {}
	continue = true
	local startIndex = 1

	while continue do
		local i, j = string.find( str, pattern, startIndex, plain )
		if i then
			table.insert( res, string.sub( str, startIndex, i-1 ) )
			startIndex = j + 1
		else
			table.insert( res, string.sub( str, startIndex ) )
			continue = false
		end
	end

	return res
end

-- Also custom function for speed
local function trim( str )
	return string.match( str, '^%s*(.-)%s*$' )
end

local escapeChars = {
	['{'] = '&#123;',
	['\180'] = '&#123;', -- Wtf dpl...
	['}'] = '&#125;',
	['\181'] = '&#125;',
	['['] = '&#91;',
	[']'] = '&#93;',
	['|'] = '&#124;',
	['-'] = '&#8208;',
	['\226'] = '',
	['\157'] = '',
}
local function escape( str )
	-- the \226\157\180\181 are used to match ❴ (U+E29DB4) and ❵ (U+E29DB5) wich are 3 bytes long so
	-- we can't use them directly inside [] patterns. Ustring would fix this but it's way too slow.
	str = string.gsub( str, '[{}%[%]|%-\226\157\180\181]', escapeChars )
	return str
end

local unEscapeChars = {
	['&#123;'] = '{',
	['&#125;'] = '}',
	['&#91;'] = '[',
	['&#93;'] = ']',
	['&#124;'] = '|',
	['&#8208;'] = '-'
}
local function unEscape( str )
	str = string.gsub( str, '&#%d+;', unEscapeChars )
	return str
end

local function removeFormattingSettings( query )
	local toRemove = {
		'mode',
		'table',
		'tablerow',
		'tablesortcol',
		'headingmode',
		'headingcount',
		'listattr',
		'itemattr',
		'hlistattr',
		'hitemattr',
		'userdateformat',
		'shownamespace',
		'escapelinks',
		'titlemaxlength',
		'replaceintitle',
		'columns',
		'rows',
		'rowsize',
		'rowcolformat',
		'resultsheader',
		'resultsfooter',
		'oneresultheader',
		'oneresultfooter',
		'noresultsheader',
		'suppresserrors',
		'noresultsfooter',
		'format'
	}

	for _, k in ipairs( toRemove ) do
		query[k] = nil
	end
end

local function formatInclude( query )
	query = split( query, ',', true )
	local count = #query

	for i = 1, #query do
		if query[i]:match( '%b{}' ) then -- Check if we are including a template
			local templateName, params = query[i]:match( '{(.-)[¦|}]([^,]*)' )
			if params:find( '%S' ) then
				params:gsub( '^:%-', '' )
				query[i] = string.format( '{%s}%s', templateName, params )
				for _ in params:gmatch( ':' ) do
					count = count + 1
				end
			else
				query[i] = string.format( '{%s¦DPLlua helper}', templateName ) -- Use a helper template to get all the parameters of our included template
			end
		end
	end

	return table.concat( query, ',' ), count
end

local function formatDpl( query )
	local queries = {}
	local count = query.count or 500
	local offset = query.offset or 0
	local _usesInclude = false
	query.count = nil
	query.offset = nil

	-- We use table format when the include parameter is used to make sure we can
	-- differentiate between the results in case more than one item is included
	local dplStringInclude =
[=[
{{#dpl:
|noresultsheader = @@
|count=%s
|offset=%s
|%s
|table=,
|tablerow=%s
}}]=]

	-- Table format requires an include statement so we use format instead.
	-- This is also a lot faster than adding an empty include statement
	local dplStringNoInclude =
[=[
{{#dpl:
|noresultsheader = @@
|count=%s
|offset=%s
|%s
|format=,¦-¦[[%%PAGE%%¦]],,
}}]=]

	-- Auto generate more than one dpl if count > 500
	-- The results of these are later combined
	for i = 1, math.ceil( count / 500 ) do
		local params = {}
		local includeCount = 0

		for k, v in pairs( query ) do
			if k == 'include' then
				v, includeCount = formatInclude( v )
				_usesInclude =  true
			end
			table.insert( params, k .. '=' .. tostring( v ):gsub( '|', '¦' ) )
		end

		if _usesInclude then
			table.insert( queries, string.format(
				dplStringInclude,
				count > 500 and 500 or count,
				offset,
				table.concat( params, '\n|' ),
				string.rep( dataContentMarker..'%%'..dataContentMarker..',', includeCount )
			) )
		else
			table.insert( queries, string.format(
				dplStringNoInclude,
				count > 500 and 500 or count,
				offset,
				table.concat( params, '\n|' )
			) )
		end

		count = count - 500
		offset = offset + 500
	end

	table.insert( usesInclude, _usesInclude )

	return table.concat( queries )
end

local function toTable( query )
	local _usesInclude = table.remove( usesInclude, 1 )
	local res = {}
	
	query = query:gsub( '<p>Extension:DynamicPageList .-</p>', function(item) res.error = item; return '' end)
	
	if not query:match( '^@@' ) then -- @@ is used when no result is found
		if _usesInclude then
			query = query:gsub( '\127\'"`UNIQ%-%-nowiki%-%x+%-QINU`"\'\127', function(item) return '<nowiki>' .. mw.text.unstripNoWiki( item ) .. '</nowiki>' end )
			query = query:gsub( dataContentMarker..'(.-)'..dataContentMarker, escape )
			query = query:gsub( '{|.-|%-', '') -- Remove the header of the table
			-- Replace the footer of the table width a row indicator. This effectively
			-- combines the output of multiple dpl queries when count > 500
			query = query:gsub( '|}', '|-' )
		end
	
		query = trim( query )
		query = split( query, '|-', true ) -- Results of the returned pages are separated by |-
	
		for _, v in ipairs( query ) do
			if v:match( '%S' ) then
				v = trim( v )
				local title = v:match( '^|%[%[(.-)|' )
				local dataList = v:match( '^|.-|.-|(.*)' ) -- This is everything after the title
	
				if not _usesInclude then
					if title and title ~= '' then
						table.insert( res, title )
					end
				else
					-- When multiple includes are used (e.g. include={Template1},{Template2}) its results are separated by a pipe
					dataList = split( dataList, '|', true )
					local _dataList = {}
	
					for _, dataItem in ipairs( dataList ) do
						dataItem = unEscape( dataItem )
						-- When we include an entire template we use the %ARGS% parameter supplied by dpl.
						-- However all | characters are repaced with §, e.g.:
						-- §nameLessParam
						-- §param = text [[wowee§link text]]
						-- §param2 = text {{something§something else}}
						dataItem = dataItem:gsub( '%b{}', function(x) return x:gsub( '§', '|' ) end ) -- Restore pipe characters inside links and templates
						dataItem = dataItem:gsub( '%b[]', function(x) return x:gsub( '§', '|' ) end )
						dataItem = trim( dataItem )
	
						if dataItem:match( '§' ) then -- Check if we included a template
							dataItem = split( dataItem, '§', true )
							local _dataItem = {}
	
							for i, item in ipairs( dataItem ) do
								if i ~= 1 then -- skip first item as it is a false empty string created by splitting on § when the string started with a §
									if item:find( '=' ) then -- Check if the parameter is named or unnamed
										local param, value = item:match( '^%s*(.-)%s*=%s*(.-)%s*$' )
										_dataItem[ param ] = value
									else
										table.insert( _dataItem, trim( item ) )
									end
								end
							end
	
							dataItem = _dataItem
						end
	
						table.insert( _dataList, dataItem )
					end
	
					if title and title ~= '' then
						table.insert( res, { title=title, include=_dataList } )
					end
				end
			end
		end
	end

	return res
end

-- Accepts a series of tables each containig the settings for a dpl query.
-- Combinig multiple dpl queries yields better performance than doing them sequentially
function dpl.ask( ... )
	local queries = { ... }

	for i = 1, #queries do
		removeFormattingSettings( queries[i] )
		queries[i] = formatDpl( queries[i] )
	end

	queries = table.concat( queries, '$@µ@$' )
	local time = os.clock()
	queries = mw.getCurrentFrame():preprocess( queries )
	time = os.clock() - time
	queries = split( queries, '$@µ@$', true )

	for i = 1, #queries do
		queries[i] = toTable( queries[i] )
		queries[i].time = time
	end

	return unpack( queries )
end

return dpl
-- </nowiki>