Module:Excerpt

This module is rated as beta, and is ready for widespread use. It is still new and should be used with some caution to ensure the results are as expected.
This Lua module is used on approximately 3,300 pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages. Consider discussing changes on the talk page before implementing them.
Transclusion count updated automatically (see documentation).
This module depends on the following other modules:
Module:Redirect
Usage

{{#invoke:Excerpt|lead}}
Main documentation: {{Transclude lead excerpt/doc}}
Transcludes the lead of an article as an excerpt.
{{#invoke:Excerpt|linked}}
Main documentation: {{Transclude linked excerpt/doc}}
Transcludes as an excerpt the lead of an article selected randomly from wikilinks on a page.
{{#invoke:Excerpt|listitem}}
Main documentation: {{Transclude list item excerpt/doc}}
Transcludes as an excerpt the lead of an article selected randomly from list items on a page.
{{#invoke:Excerpt|random}}
Main documentation: {{Transclude random excerpt/doc}}
Transcludes as an excerpt the lead of an article selected randomly from the parameters.
{{#invoke:Excerpt|selected}}
Main documentation: {{Transclude selected excerpt/doc}}
Transcludes the lead of a selected article as an excerpt.
{{#invoke:Excerpt|excerpt}}
Main documentation: {{Excerpt/doc}}
Transcludes part of an article into another article.
Note

Before saving a change to this module, please preview with:
-- Get localized data
local d = require("Module:Excerpt/i18n")

local p = {}

-- Helper function to debug
-- Returns blank text or an error message if requested
local errors
local function err(msg,a,b)
	local text = mw.ustring.format(d.error[msg] or msg or '',a,b)
	if errors then error(text, 2) end
	return ""
end

-- Helper function to test for truthy and falsy values
local function is(value)
	if not value or value == "" or value == "0" or value == "false" or value == "no" then
		return false
	end
	return true
end

-- Helper function to match from a list regular expressions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function matchAny(text, pre, list, post, init)
	local match = {}
	for i = 1, #list do
		match = { mw.ustring.match(text, pre .. list[i] .. post, init) }
		if match[1] then return unpack(match) end
	end
	return nil
end

-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT
local function stripTemplate(t)
	-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
	if matchAny(t, "^{{%s*", d.unwantedInlineTemplates, "%s*%f[|}]") then return "" end

	-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
	local noRef = mw.ustring.gsub(t, "|%s*shortref%s*%f[|}]", "")
	noRef = mw.ustring.gsub(noRef, "|%s*ref%s*%f[|}]", "")

	-- If a wanted template has unwanted nested templates, purge them too
	noRef = mw.ustring.sub(noRef, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noRef, 3), "%b{}", stripTemplate)

	-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
	noRef = mw.ustring.gsub(noRef, "^{{%s*[Aa]udio.-|.-|(.-)%f[|}].*", "%1")

	-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot|English|英語|eigo}} → English
	noRef = mw.ustring.gsub(noRef, "^{{%s*[Nn]ihongo[ _]+foot%s*|(.-)%f[|}].*", "%1")

	if noRef ~= t then return noRef end

	return nil -- not an unwanted template: keep
end

-- Get a page's content, following redirects, and processing file description pages for files.
-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found
local function getContent(page, frame)
	local title = mw.title.new(page) -- Read description page (for :File:Foo rather than File:Foo)
	if not title then return false, false end

	local target = title.redirectTarget
	if target then title = target end

	return title:getContent(), title.prefixedText
end

-- Check image for suitability
local function checkImage(image)
	local page = matchAny(image, "", d.fileNamespaces, "%s*:[^|%]]*") -- match File:(name) or Image:(name)
	if not page then return false end

	-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)
	if not matchAny(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s*$") then
		return false
	end

	local fileDescription, fileTitle = getContent(page) -- get file description and title after following any redirect
	if fileDescription and fileDescription ~= "" then -- found description on local wiki
		if mw.ustring.match(fileDescription, "[Nn]on%-free") then return false end
		fileDescription = mw.ustring.gsub(fileDescription, "%b{}", stripTemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess
	elseif not fileTitle then
		return false
	else
	-- try commons
		fileDescription = "{{" .. fileTitle .. "}}"
	end
	frame = frame or mw.getCurrentFrame()
	fileDescription = frame:preprocess(fileDescription)

	return ( fileDescription and fileDescription ~= "" and not mw.ustring.match(fileDescription, "[Nn]on%-free") ) and true or false -- hide non-free image
end

-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local function parseImage(text, start)
	local startre = ""
	if start then startre = "^" end -- a true flag restricts search to start of string
	local image = matchAny(text, startre .. "%[%[%s*", d.fileNamespaces, "%s*:.*") -- [[File: or [[Image: ...
	if image then
		image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
	end
	return image
end

-- Parse a caption, which ends at a | (end of parameter) or } (end of infobox) but may contain nested [..] and {..}
local function parseCaption(caption)
	if not caption then return nil end
	local length = mw.ustring.len(caption)
	local position = 1
	while position <= length do
		local linkStart, linkEnd = mw.ustring.find(caption, "%b[]", position)
		linkStart = linkStart or length + 1 -- avoid comparison with nil when no link
		local templateStart, templateEnd = mw.ustring.find(caption, "%b{}", position)
		templateStart = templateStart or length + 1 -- avoid comparison with nil when no template
		local argEnd = mw.ustring.find(caption, "[|}]", position) or length + 1
		if linkStart < templateStart and linkStart < argEnd then
			position = linkEnd + 1 -- skip wikilink
		elseif templateStart < argEnd then
			position = templateEnd + 1 -- skip template
		else -- argument ends before the next wikilink or template
			return mw.ustring.sub(caption, 1, argEnd - 1)
		end
	end
	return caption -- No terminator found: return entire caption
end					

-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local function argImage(text)
	local token = nil
	local hasNamedArgs = mw.ustring.find(text, "|") and mw.ustring.find(text, "=")
	if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image

	-- ensure image map is captured
	text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '|imagemap=')

	-- find all images
	local hasImages = false
	local images = {}
	local captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local argname, position, image = mw.ustring.match(text, "|%s*([^=|]-[Ii][Mm][Aa][Gg][Ee][^=|]-)%s*=%s*()(.*)", captureFrom)
		if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image
			local lcArgName = mw.ustring.lower(argname)
			if mw.ustring.find(lcArgName, "caption")
			 or mw.ustring.find(lcArgName, "size")
			 or mw.ustring.find(lcArgName, "upright") then
				image = nil
			end
		end
		if image then
			hasImages = true
			images[position] = image
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end
	captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local position, image = mw.ustring.match(text, "|%s*[^=|]-[Pp][Hh][Oo][Tt][Oo][^=|]-%s*=%s*()(.*)", captureFrom)
		if image then
			hasImages = true
			images[position] = image
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end
	captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local position, image = mw.ustring.match(text, "|%s*[^=|{}]-%s*=%s*()%[?%[?([^|{}]*%.%a%a%a%a?)%s*%f[|}]", captureFrom)
		if image then
			hasImages = true
			if not images[position] then
				images[position] = image
			end
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end

	if not hasImages then return nil end

	-- find all captions
	local captions = {}
	captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local position, caption = matchAny(text, "|%s*", d.captionParams, "%s*=%s*()([^\n]+)", captureFrom)
		if caption then
			-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
			local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)
			if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end
			caption = mw.text.trim(caption)
			local captionStart = mw.ustring.sub(caption, 1, 1)
			if captionStart == '|' or captionStart == '}' then caption = nil end
		end
		if caption then
			-- find nearest image, and use same index for captions table
			local i = position
			while i > 0 and not images[i] do
				i = i - 1
				if images[i] then
					if not captions[i] then
						captions[i] = parseCaption(caption)
					end
				end
			end
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end

	-- find all alt text
	local altTexts = {}
	for position, altText in mw.ustring.gmatch(text, "|%s*[Aa][Ll][Tt]%s*=%s*()([^\n]*)") do
		if altText then

			-- altText is terminated by }} or |, but first skip any matched [[...]] and {{...}}
			local lookFrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}
			 mw.ustring.match(altText, ".*{%b{}}()") or 1, -- if multiple {{...}}, .* consumes all but one, leaving the last for %b
			 mw.ustring.match(altText, ".*%[%b[]%]()") or 1)

			local length = mw.ustring.len(altText)
			local afterText = math.min( -- find position after whichever comes first: end of string, }} or |
			 mw.ustring.match(altText, "()}}", lookFrom) or length+1,
			 mw.ustring.match(altText, "()|", lookFrom) or length+1)
			altText = mw.ustring.sub(altText, 1, afterText-1) -- chop off |... or }}... which is not part of [[...]] or {{...}}

			altText = mw.text.trim(altText)
			local altTextStart = mw.ustring.sub(altText, 1, 1)
			if altTextStart == '|' or altTextStart == '}' then altText = nil end
		end
		if altText then
			-- find nearest image, and use same index for altTexts table
			local i = position
			while i > 0 and not images[i] do
				i = i - 1
				if images[i] then
					if not altTexts[i] then
						altTexts[i] = altText
					end
				end
			end
		end
	end

	-- find all image sizes
	local imageSizes = {}
	for position, imageSizeMatch in mw.ustring.gmatch(text, "|%s*[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s*=%s*()([^}|\n]*)") do
		local imageSize = mw.ustring.match(imageSizeMatch, "=%s*([^}|\n]*)")
		if imageSize then
			imageSize = mw.text.trim(imageSize )
			local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)
			if imageSizeStart == '|' or imageSizeStart == '}' then imageSize = nil end
		end
		if imageSize then
			-- find nearest image, and use same index for imageSizes table
			local i = position
			while i > 0 and not images[i] do
				i = i - 1
				if images[i] then
					if not imageSizes[i] then
						imageSizes[i] = imageSize
					end
				end
			end
		end
	end

	-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order
	local keys = {}
	for key, val in pairs(images) do
		table.insert(keys, key)
	end
	table.sort(keys)

	 -- add in relevant optional parameters for each image: caption, alt text and image size
	local imageTokens = {}
	for _, index in ipairs(keys) do
		local image = images[index]
		local token = parseImage(image, true) -- look for image=[[File:...]] etc.
		if not token then
			image = mw.ustring.match(image, "^[^}|\n]*") -- remove later arguments
			token = "[[" -- Add File: unless name already begins File: or Image:
			if not matchAny(image, "^", d.fileNamespaces, "%s*:") then
				token = token .. "File:"
			end
			token = token .. image
			local caption = captions[index]
			if caption and mw.ustring.match(caption, "%S") then token = token .. "|" .. caption end
			local alt = altTexts[index]
			if alt then token = token .. "|alt=" .. alt end
			local image_size = imageSizes[index]
			if image_size and mw.ustring.match(image_size, "%S") then token = token .. "|" .. image_size end
			token = token .. "]]"
		end
		token = mw.ustring.gsub(token, "\n","") .. "\n"
		table.insert(imageTokens, token)
	end
	return imageTokens
end

-- Help gsub convert imagemaps into standard images
local function convertImageMap(imagemap)
	local image = matchAny(imagemap, "[>\n]%s*", d.fileNamespaces, "[^\n]*")
	if image then
		return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s*", "", 1) .. "]]"
	else
		return "" -- remove entire block if image can't be extracted
	end
end

-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
local function numberFlags(str)
	local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
	local flags = {}
	for _, r in pairs(ranges) do
		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
		if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
		if max then
			for p = min, max do flags[p] = true end
		end
	end
	return flags
end

local imageArgGroups = {
	{"thumb", "thumbnail", "frame", "framed", "frameless"},
	{"right", "left", "center", "none"},
	{"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom"}
}

local function modifyImage(image, fileArgs)
	if fileArgs then
		for _, filearg in pairs(mw.text.split(fileArgs, "|")) do -- handle fileArgs=left|border etc.
			local fa = mw.ustring.gsub(filearg, "=.*", "") -- "upright=0.75" → "upright"
			local group = {fa} -- group of "border" is ["border"]...
			for _, g in pairs(imageArgGroups) do
				for _, a in pairs(g) do
					if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]
				end
			end
			for _, a in pairs(group) do
				image = mw.ustring.gsub(image, "|%s*" .. a .. "%f[%A]%s*=[^|%]]*", "") -- remove "|upright=0.75" etc.
				image = mw.ustring.gsub(image, "|%s*" .. a .. "%s*([|%]])", "%1") -- replace "|left|" by "|" etc.
			end

			image = mw.ustring.gsub(image, "([|%]])", "|" .. filearg .. "%1", 1) -- replace "|" by "|left|" etc.
		end
	end
	return image
end

-- a basic parser to trim down extracted wikitext
--   @param text : Wikitext to be processed
--   @param options : A table of options...
--          options.paraflags : Which number paragraphs to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`. If not present, all paragraphs will be kept.
--          options.fileflags : table of which files to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`
--          options.fileargs : args for the [[File:]] syntax, such as `left`
--   @param filesOnly : If set, only return the files and not the prose
local function parse(text, options, filesOnly)
	local allParagraphs = true -- keep all paragraphs?
	if options.paraflags then
		if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end
		for _, v in pairs(options.paraflags) do
			if v then allParagraphs = false end -- if any para specifically requested, don't keep all
		end
	end
	if filesOnly then
		allParagraphs = false
		options.paraflags = {}
	end

	local maxfile = 0 -- for efficiency, stop checking images after this many have been found
	if options.fileflags then
		if type(options.fileflags) ~= "table" then options.fileflags = numberFlags(options.fileflags) end
		for k, v in pairs(options.fileflags) do
			if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
		end
	end
	
	local fileArgs = options.fileargs and mw.text.trim(options.fileargs)
	if fileArgs == '' then fileArgs = nil end

	local leadStart = nil -- have we found some text yet?
	local t = "" -- the stripped down output text
	local fileText = "" -- output text with concatenated [[File:Foo|...]]\n entries
	local files = 0 -- how many images so far
	local paras = 0 -- how many paragraphs so far
	local startLine = true -- at the start of a line (no non-spaces found since last \n)?

	text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space

	-- Add named files
	local f = options.files
	if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list
		f = mw.ustring.gsub(f, "^%s*File%s*:%s*", "", 1)
		f = mw.ustring.gsub(f, "^%s*Image%s*:%s*", "", 1)
		f = "[[File:" .. f .. "]]"
		f = modifyImage(f, "thumb")
		f = modifyImage(f, fileArgs)
		if checkImage(f) then fileText = fileText .. f .. "\n" end
	end

	repeat -- loop around parsing a template, image or paragraph
		local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}} or {| Table |}
		if not leadStart and not token then token = mw.ustring.match(text, "^%b<>%s*%b{}%s*") end -- allow <tag>{{template}} before lead has started

		local line = mw.ustring.match(text, "[^\n]*")
		if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)
			line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line
			line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line
			-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line
			if mw.ustring.find(line, "%S") and not matchAny(line, "^%s*", { "{{", "%[%[%s*[Ff]ile:", "%[%[%s*[Ii]mage:" }, "") then
				token = nil
			end
		end

		if token then -- found a template which is not the prefix to a line of text

			if leadStart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)
				if not filesOnly and not startLine then t = t .. token end

			elseif matchAny(token, "{{%s*", d.wantedBlockTemplates, "%s*%f[|}]") then
				t = t .. token -- keep wanted block templates

			elseif is(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{|' then
				t = t .. token -- keep tables

			elseif files < maxfile then -- discard template, but if we are still collecting images...
				local images = argImage(token) or {}
				if not images then
					local image = parseImage(token, false) -- look for embedded [[File:...]], |image=, etc.
					if image then table.insert(images, image) end
				end
				for _, image in ipairs(images) do
					if files < maxfile and checkImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
						files = files + 1 -- count the file, whether displaying it or not
						if options.fileflags and options.fileflags[files] then -- if displaying this image
							image = modifyImage(image, "thumb")
							image = modifyImage(image, fileArgs)
							fileText = fileText .. image
						end
					end
				end
			end
		else -- the next token in text is not a template
			token = parseImage(text, true)
			if token then -- the next token in text looks like an image
				if files < maxfile and checkImage(token) then -- if more images are wanted and this is a wanted image
					files = files + 1
					if options.fileflags and options.fileflags[files] then
						local image = token -- copy token for manipulation by adding |right etc. without changing the original
						image = modifyImage(image, fileArgs)
						fileText = fileText .. image
					end
				end
			else -- got a paragraph, which ends at a file, image, blank line or end of text
				local afterEnd = mw.ustring.len(text) + 1
				local blankPosition = mw.ustring.find(text, "\n%s*\n") or afterEnd -- position of next paragraph delimiter (or end of text)
				local endPosition = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterEnd,
				 mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterEnd,
				 blankPosition)
				token = mw.ustring.sub(text, 1, endPosition-1)
				if blankPosition < afterEnd and blankPosition == endPosition then -- paragraph ends with a blank line
					token = token .. mw.ustring.match(text, "\n%s*\n", blankPosition)
				end
				local isHatnote = not(leadStart) and mw.ustring.sub(token, 1, 1) == ':'
				if not isHatnote then
					leadStart = leadStart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section
					paras = paras + 1
					if allParagraphs or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
				end
			end -- of "else got a paragraph"
		end -- of "else not a template"

		if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
		startLine = mw.ustring.find(token, "\n%s*$") -- will the next token be the first non-space on a line?
	until not text or text == "" or not token or token == "" -- loop until all text parsed

	text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
	return fileText, text
end

local function cleanupText(text, options)
	text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
	text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits
	if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible
		text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections
		text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section
		text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.*", "") -- remove text after last onlyinclude section
	end
	if not is(options.keepSubsections) then
		text = mw.ustring.gsub(text, "\n==.*","") -- remove first ==Heading== and everything after it
		text = mw.ustring.gsub(text, "^==.*","") -- ...even if the lead is empty
	end
	if not is(options.keepRefs) then
		text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere
		text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs
		text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references
	end
	text = mw.ustring.gsub(text, "<%s*[Ss][Cc][Oo][Rr][Ee].->.-<%s*/%s*[Ss][Cc][Oo][Rr][Ee]%s*>", "") -- remove musical scores
	text = mw.ustring.gsub(text, "<%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s*/%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s*>", convertImageMap) -- convert imagemaps into standard images
	text = mw.ustring.gsub(text, "%s*{{%s*[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents
	text = mw.ustring.gsub(text, "%s*__[A-Z]*TOC__", "") -- remove TOC behavior switches
	text = mw.ustring.gsub(text, "\n%s*{{%s*[Pp]p%-.-}}", "\n") -- remove protection templates
	text = mw.ustring.gsub(text, "%s*{{[^{|}]*[Ss]idebar%s*}}", "") -- remove most sidebars
	text = mw.ustring.gsub(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}", "") -- remove most stub templates
	text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories
	text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon
	return text
end

-- Parse a ==Section== from a page
local function getSection(text, section, mainOnly)
	local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
	local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
	if not content then return nil end -- no such section
	local nextSection
	if mainOnly then
		nextSection = "\n==.*" -- Main part of section terminates at any level of header
	else
		nextSection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=].*" -- "===" → "\n===?[^=].*", matching "==" or "===" but not "===="
	end
	content = mw.ustring.gsub(content, nextSection, "") -- remove later sections with headings at this level or higher
	return content
end

-- Remove unmatched <tag> or </tag> tags
local function fixTags(text, tag)
	local startCount = 0
	for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startCount = startCount + 1 end

	local endCount = 0
	for i in mw.ustring.gmatch(text, "<%s*/" .. tag .. "%f[^%w_].->") do endCount = endCount + 1 end

	if startCount > endCount then -- more <tag> than </tag>: remove the last few <tag>s
		local i = 0
		text = mw.ustring.gsub(text, "<%s*" .. tag .. "%f[^%w_].->", function(t)
			i = i + 1
			if i > endCount then return "" else return nil end
		end) -- "end" here terminates the anonymous replacement function(t) passed to gsub
	elseif endCount > startCount then -- more </tag> than <tag>: remove the first few </tag>s
		text = mw.ustring.gsub(text, "<%s*/" .. tag .. "%f[^%w_].->", "", endCount - startCount)
	end
	return text
end

-- Main function returns a string value: text of the lead of a page
local function main(pageNames, options)
	if not pageNames or #pageNames < 1 then return err("No page names given") end
	local pageName
	local text
	local pageCount = #pageNames
	local firstPage = pageNames[1] or "(nil)" -- save for error message, as it the name will be deleted
	local gotOptions
	local pageOptionsString
	local section

	-- read the page, or a random one if multiple pages were provided
	if pageCount > 1 then math.randomseed(os.time()) end
	while not text and pageCount > 0 do
		local pageNumber = 1
		if pageCount > 1 then pageNumber = math.random(pageCount) end -- pick a random title
		pageName = pageNames[pageNumber]
		if pageName and pageName ~= "" then
			-- We have page or [[page]] or [[page|text]], possibly followed by |opt1|opt2...
			local pn
			pn, gotOptions, pageOptionsString = mw.ustring.match(pageName, "^%s*(%[%b[]%])%s*(|?)(.*)")
			if pn then
				pageName = mw.ustring.match(pn, "%[%[([^|%]]*)") -- turn [[page|text]] into page, discarding text
			else -- we have page or page|opt...
				pageName, gotOptions, pageOptionsString = mw.ustring.match(pageName, "%s*([^|]*[^|%s])%s*(|?)(.*)")
			end

			if pageName and pageName ~= "" then
				local pn
				pn, section = mw.ustring.match(pageName, "(.-)#(.*)")
				pageName = pn or pageName
				text, normalisedPageName = getContent(pageName)
				if is(options.fragment) then
					local frame = mw.getCurrentFrame()
					text = frame:callParserFunction('#lst', normalisedPageName, options.fragment)
				end
				if not normalisedPageName then
					return err("No title for page name " .. pageName)
				else
					pageName = normalisedPageName
				end
				if text and options.nostubs then
					local isStub = mw.ustring.find(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}")
					if isStub then text = nil end
				end
				if not section then
					section = mw.ustring.match(pageName, ".-#(.*)") -- parse redirect to Page#Section
				end
				if text and section and section ~= "" then text = getSection(text, section) end
			end
		end
		if not text then table.remove(pageNames, pageNumber) end -- this one didn't work; try another
		pageCount = pageCount - 1 -- ensure that we exit the loop after at most #pageNames iterations
	end
	if not text then return err("Cannot read a valid page: first name is " .. firstPage) end

	text = cleanupText(text, options)

	local pageOptions = {} -- pageOptions (even if value is "") have priority over global options
	for k, v in pairs(options) do pageOptions[k] = v end
	if gotOptions and gotOptions ~= "" then
		for _, t in pairs(mw.text.split(pageOptionsString, "|")) do
			local k, v = mw.ustring.match(t, "%s*([^=]-)%s*=(.-)%s*$")
			pageOptions[k] = v
		end
		pageOptions.paraflags = numberFlags(pageOptions["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
		pageOptions.fileflags = numberFlags(pageOptions["files"] or "") -- parse file numbers
		if pageOptions.more and pageOptions.more == "" then pageOptions.more = "Read more..." end -- more= is short for this default text
	end

	local fileText
	fileText, text = parse(text, pageOptions)

	-- replace the bold title or synonym near the start of the article by a wikilink to the article
	local lang = mw.language.getContentLanguage()
	local pos = mw.ustring.find(text, "'''" .. lang:ucfirst(pageName) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
	 or mw.ustring.find(text, "'''" .. lang:lcfirst(pageName) .. "'''", 1, true) -- plain search: special characters in pageName represent themselves
	if pos then
		local len = mw.ustring.len(pageName)
		text = mw.ustring.sub(text, 1, pos + 2) .. "[[" .. mw.ustring.sub(text, pos + 3, pos + len + 2) .. "]]" .. mw.ustring.sub(text, pos + len + 3, -1) -- link it
	else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
		text = mw.ustring.gsub(text, "()'''(.-'*)'''", function(a, b)
			if a < 100 and not mw.ustring.find(b, "%[") then ---if early in article and not wikilinked
				return "'''[[" .. pageName .. "|" .. b .. "]]'''" -- replace '''Foo''' by '''[[pageName|Foo]]'''
			else
				return nil -- instruct gsub to make no change
			end
		 end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
	end

	-- remove '''bold text''' if requested
	if is(pageOptions.nobold) then text = mw.ustring.gsub(text, "'''", "") end

	text = fileText .. text

	-- Seek and destroy unterminated templates and wikilinks
	repeat -- hide matched {{template}}s including nested templates
		local t = text
		text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape
		text = mw.ustring.gsub(text, "(< *math[^>]*>[^<]-)}}(.-< */math *>)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>
	until text == t
	repeat -- do similar for [[wikilink]]s
		local t = text
		text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")
	until text == t

	text = text.gsub(text, "([{}%[%]])%1[^\27].*", "") -- remove unmatched {{, }}, [[ or ]] and everything thereafter, avoiding ]E]E etc.
	text = text.gsub(text, "([{}%[%]])%1$", "") -- remove unmatched {{, }}, [[ or ]] at end of text
	text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, ]E]E → ]], etc.

	-- Ensure div tags match
	text = fixTags(text, "div")

	if pageOptions.more then text = text .. " '''[[" .. pageName .. "|" .. pageOptions.more .. "]]'''" end -- wikilink to article for more info

	if pageOptions.list and not pageOptions.showall then -- add a collapsed list of pages which might appear
		local listtext = pageOptions.list
		if listtext == "" then listtext = "Other articles" end
		text = text .. "{{collapse top|title={{resize|85%|" ..listtext .. "}}|bg=fff}}{{hlist"
		for _, p in pairs(pageNames) do
			if mw.ustring.match(p, "%S") then text = text .. "|[[" .. mw.text.trim(p) .. "]]" end
		end
		text = text .. "}}\n{{collapse bottom}}"
	end

	return text
end

-- Shared template invocation code for lead and random functions
local function invoke(frame, template)
	-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}
	local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
	for k, v in pairs(frame:getParent().args) do args[k] = v end
	for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template
	errors = args["errors"] -- set the module level boolean used in local function err

	local articleCount = #args -- must be 1 except with selected=Foo and Foo=Somepage
	if articleCount < 1 and not (template == "selected" and args[template] and args[args[template]]) then
		return err("No articles provided")
	end

	local pageNames = {}
	if template == "lead" then
		pageNames = { args[1] }
	elseif template == "linked" or template == "listitem" then
		-- Read named page and find its wikilinks
		local page = args[1]
		local text, title = getContent(page)
		if not title then
			return err("No title for page name " .. page)
		elseif not text then
			return err("No content for page name " .. page)
		end
		if args["section"] then -- check relevant section only
			text = getSection(text, args["section"], args["sectiononly"])
			if not text then return err("No section " .. args["section"] .. " in page " .. page) end
		end
		-- replace annotated links with real links
		text = mw.ustring.gsub(text, "{{%s*[Aa]nnotated[ _]link%s*|%s*(.-)%s*}}", "[[%1]]")
		if template == "linked" then
			for p in mw.ustring.gmatch(text, "%[%[%s*([^%]|\n]*)") do table.insert(pageNames, p) end
		else -- listitem: first wikilink on a line beginning *, :#, etc. except in "See also" or later section
			text = mw.ustring.gsub(text, "\n== *See also.*", "")
			for p in mw.ustring.gmatch(text, "\n:*[%*#][^\n]-%[%[%s*([^%]|\n]*)") do table.insert(pageNames, p) end
		end
	elseif template == "random" then
		-- accept any number of page names.  If more than one, we'll pick one randomly
		for i, p in pairs(args) do
			if p and type(i) == 'number' then table.insert(pageNames, p) end
		end
	elseif template == "selected" then
		local articleKey = args[template]
		if tonumber(articleKey) then -- normalise article number into the range 1..#args
			articleKey = articleKey % articleCount
			if articleKey == 0 then articleKey = articleCount end
		end
		pageNames = { args[articleKey] }
	end

	local options = args -- pick up miscellaneous options: more, errors, fileargs
	options.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
	options.fileflags = numberFlags(args["files"] or "") -- parse file numbers
	if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text

	local text = ""
	if options.showall then
		local separator = ""
		for _, p in pairs(pageNames) do
			local t = main({ p }, options)
			if t ~= "" then
				text = text .. separator .. t
				separator = options.showall
				if separator == "" then separator = "{{clear}}{{hr}}" end
			end
		end
	else
		text = main(pageNames, options)
	end

	if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
		return "[[Category:" .. d.brokenCategory .. "]]"
	else
		return frame:preprocess(text)
	end
end

-- Replicate {{Excerpt}} entirely in Lua for reduced Post-expand include size
local function excerpt(frame)
	local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
	for k, v in pairs(frame:getParent().args) do args[k] = v end
	for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template

	local tag = is(args.tag) and args.tag or 'div'
	local article = is(args.article) and args.article or args[1] or '{{{1}}}'
	local section = is(args.section) and args.section or args[2]

	local output = {}
	output[1] = frame:extensionTag{ name = 'templatestyles', args = {src='Excerpt/styles.css'} }
	output[2] = '<' .. tag ..  ' class="excerpt-block">'
		output[3] = is(args.indicator) and ('<' .. tag .. ' class="excerpt-indicator">') or ''
			if is(args.nohat) then
				output[4] = ''
			else
				local hatnote = {}
				hatnote[1] = 'This' .. (is(args.indicator) and '' or ' section') .. ' is an excerpt from '
				hatnote[2] = '[['
					hatnote[3] = article .. (is(section) and ('#' .. frame:callParserFunction( 'urlencode', section, 'WIKI' )) or '')
					hatnote[4] = '|' 
					hatnote[5] = article .. (is(section) and (frame:callParserFunction( '#tag:nowiki', ' § ' ) .. section) or '')
				hatnote[6] = ']]'
				hatnote[7] = "''" .. '<span class="mw-editsection-like plainlinks"><span>[ </span>['
					local title = mw.title.new(article) or mw.title.getCurrentTitle()
					hatnote[8] = title:fullUrl('action=edit') .. ' edit'
				hatnote[9] = ']<span> ]</span></span>' .. "''"
			
				output[4] = require('Module:Hatnote')._hatnote(table.concat(hatnote), {selfref=true}) or err("Error generating hatnote")
			end
			output[5] =  '<' .. tag ..  ' class="excerpt">\n'
				if article ~= '{{{1}}}' then
					local options = args -- turn template arguments into module options
					options.paraflags = args.paragraphs
					options.fileflags = args.files or 1
					options.nobold = 1
					options.fragment = args.fragment
					options.keepTables = args.tables or 1
					options.keepRefs = args.references or 1
					options.keepSubsections = args.subsections

					local pageNames = { (article .. '#' .. (section or '')) }
					local text = main(pageNames, options)

					if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
						output[6] = "[[Category:" .. d.brokenCategory .. "]]"
					else
						output[6] = frame:preprocess(text) or err("Error processing text")
					end
				else
					output[6] = err("No article provided")
				end
			output[7] = '</' .. tag .. '>'
		output[8] = is(args.indicator) and ('</' .. tag .. '>') or ''
	output[9] = '</' .. tag .. '>'
	output[10] = mw.title.getCurrentTitle().isContentPage and '[[Category:Articles with excerpts]]' or ''
	
	return table.concat(output)
end

-- Entry points for template callers using #invoke:
function p.lead(frame) return invoke(frame, "lead") end -- {{Transclude lead excerpt}} reads the first and only article
function p.linked(frame) return invoke(frame, "linked") end -- {{Transclude linked excerpt}} reads a randomly selected article linked from the given page
function p.listitem(frame) return invoke(frame, "listitem") end -- {{Transclude list item excerpt}} reads a randomly selected article listed on the given page
function p.random(frame) return invoke(frame, "random") end -- {{Transclude random excerpt}} reads any article (default for invoke with one argument)
function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected excerpt}} reads the article whose key is in the selected= parameter
function p.excerpt(frame) return excerpt(frame) end -- {{Excerpt}} transcludes part of an article into another article

-- Entry points for other Lua modules
function p.getContent(page, frame) return getContent(page, frame) end
function p.getsection(text, section) return getSection(text, section) end
function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end
function p.argimage(text) return argImage(text) end
function p.checkimage(image) return checkImage(image) end
function p.parseimage(text, start) return parseImage(text, start) end
function p.cleanupText(text, options) return cleanupText(text, options) end
function p.main(pageNames, options) return main(pageNames, options) end
function p.numberflags(str) return numberFlags(str) end

return p
v t e Portal templates
Visual overview of template usage
Layout and formatting	{{Box-header}} color palette versions plain round square watch {{Box-header colour}}¹ {{Box-footer}} {{Flex columns}} {{Plain navboxes}}
Content transclusion	{{Transclude lead excerpt}}² {{Transclude random excerpt}}² {{Transclude selected excerpt}}² {{Transclude linked excerpt}}² {{Transclude list item excerpt}}² {{Transclude selected current events}}³ {{Transclude selected recent additions}}⁴ {{Transclude DYK}}⁹ {{Excerpt}}
Content slideshows	{{Random slideshow}}⁵ {{Transclude files as random slideshow}}⁵ {{Transclude excerpts as random slideshow}}⁶ {{Transclude linked excerpts as random slideshow}}⁶ {{Transclude list item excerpts as random slideshow}}⁶
Content randomisation	{{Random quotation}} {{Random portal component}}⁷
General	{{Portals}} {{Portals browsebar}} {{Portal description}} {{Wikimedia for portals}} {{Portal navbar no header2}} {{Portal information sidebar}}
Linking templates	{{Portal}} {{Portal-inline}}¹¹ {{Portal bar}}
Talk pages / Maintenance	{{Portal talk}} {{WikiProject Portals}}⁸ {{Portal maintenance status}}⁸ {{Portal suggestions}}
Images	{{Portal image banner}}¹⁰ {{Random slideshow}}⁵ {{Portal dynamic image}} {{Transclude files as random slideshow}}⁵
Modules	¹Module:Box-header ²Module:Excerpt ³Module:Selected current events ⁴Module:Selected recent additions ⁵Module:Random slideshow ⁶Module:Excerpt slideshow ⁷Module:Random portal component ⁸Module:Portal maintenance status ⁹Module:Transclude DYK ¹⁰Module:Portal image banner ¹¹Module:Portal-inline
Usage	Wikipedia:Portal Wikipedia:WikiProject Portals/Components
For more information, see: Wikipedia:WikiProject Portals · See also: Category:Template-Class Portal pages and Category:Wikipedia Portal templates
Anonymous

Search

Module:Excerpt

Namespaces

More

Page actions

Usage

Note

See also

Navigation

Navigation

Help

Translate

Wiki tools

Wiki tools

Anonymous

Search

Module:Excerpt

Usage

Note

See also

Navigation

Wiki tools

Page tools

Other projects

Hidden categories