|
| 1 | +--- jog.lua – walk the pandoc AST with context, and with inplace modification. |
| 2 | +--- |
| 3 | +--- Copyright: © 2024 Albert Krewinkel, Carlos Scheidegger |
| 4 | +--- License: MIT – see LICENSE for details |
| 5 | + |
| 6 | +local pandoc = require 'pandoc' |
| 7 | +local List = require 'pandoc.List' |
| 8 | + |
| 9 | +local debug_getmetatable = debug.getmetatable |
| 10 | + |
| 11 | +--- Get the element type; like pandoc.utils.type, but faster. |
| 12 | +local function ptype (x) |
| 13 | + local mt = debug_getmetatable(x) |
| 14 | + if mt then |
| 15 | + local name = mt.__name |
| 16 | + return name or type(x) |
| 17 | + else |
| 18 | + return type(x) |
| 19 | + end |
| 20 | +end |
| 21 | + |
| 22 | +--- Checks whether the object is a list type. |
| 23 | +local listy_type = { |
| 24 | + Blocks = true, |
| 25 | + Inlines = true, |
| 26 | + List = true, |
| 27 | +} |
| 28 | + |
| 29 | +local function run_filter_function (fn, element, context) |
| 30 | + if fn == nil then |
| 31 | + return element |
| 32 | + end |
| 33 | + |
| 34 | + local result, continue = fn(element, context) |
| 35 | + if result == nil then |
| 36 | + return element, continue |
| 37 | + else |
| 38 | + return result, continue |
| 39 | + end |
| 40 | +end |
| 41 | + |
| 42 | +--- Set of Block and Inline tags that are leaf nodes. |
| 43 | +local leaf_node_tags = { |
| 44 | + Code = true, |
| 45 | + CodeBlock = true, |
| 46 | + HorizontalRule = true, |
| 47 | + LineBreak = true, |
| 48 | + Math = true, |
| 49 | + RawBlock = true, |
| 50 | + RawInline = true, |
| 51 | + Space = true, |
| 52 | + SoftBreak = true, |
| 53 | + Str = true, |
| 54 | +} |
| 55 | + |
| 56 | +--- Set of Block and Inline tags that have nested items in `.contents` only. |
| 57 | +local content_only_node_tags = { |
| 58 | + -- Blocks with Blocks content |
| 59 | + BlockQuote = true, |
| 60 | + Div = true, |
| 61 | + Header = true, |
| 62 | + -- Blocks with Inlines content |
| 63 | + Para = true, |
| 64 | + Plain = true, |
| 65 | + -- Blocks with List content |
| 66 | + LineBlock = true, |
| 67 | + BulletList = true, |
| 68 | + OrderedList = true, |
| 69 | + DefinitionList = true, |
| 70 | + -- Inlines with Inlines content |
| 71 | + Cite = true, |
| 72 | + Emph = true, |
| 73 | + Link = true, |
| 74 | + Quoted = true, |
| 75 | + SmallCaps = true, |
| 76 | + Span = true, |
| 77 | + Strikeout = true, |
| 78 | + Strong = true, |
| 79 | + Subscript = true, |
| 80 | + Superscript = true, |
| 81 | + Underline = true, |
| 82 | + -- Inline with Blocks content |
| 83 | + Note = true, |
| 84 | +} |
| 85 | + |
| 86 | +--- Apply the filter on the nodes below the given element. |
| 87 | +local function recurse (element, tp, jogger) |
| 88 | + tp = tp or ptype(element) |
| 89 | + local tag = element.tag |
| 90 | + if leaf_node_tags[tag] then |
| 91 | + -- do nothing, cannot traverse any deeper |
| 92 | + elseif tp == 'table' then |
| 93 | + for key, value in pairs(element) do |
| 94 | + element[key] = jogger(value) |
| 95 | + end |
| 96 | + elseif content_only_node_tags[tag] or |
| 97 | + tp == 'Cell' or tp == 'pandoc Cell' then |
| 98 | + element.content = jogger(element.content) |
| 99 | + elseif tag == 'Image' then |
| 100 | + element.caption = jogger(element.caption) |
| 101 | + elseif tag == 'Table' then |
| 102 | + element.caption = jogger(element.caption) |
| 103 | + element.head = jogger(element.head) |
| 104 | + element.bodies = jogger(element.bodies) |
| 105 | + element.foot = jogger(element.foot) |
| 106 | + elseif tag == 'Caption' then |
| 107 | + element.long = jogger(element.long) |
| 108 | + element.short = element.short and jogger(element.short) |
| 109 | + elseif tag == 'Figure' then |
| 110 | + element.caption = jogger(element.caption) |
| 111 | + element.content = jogger(element.content) |
| 112 | + elseif tp == 'Meta' then |
| 113 | + for key, value in pairs(element) do |
| 114 | + element[key] = jogger(value) |
| 115 | + end |
| 116 | + elseif tp == 'Row' or tp == 'pandoc Row' then |
| 117 | + element.cells = jogger(element.cells) |
| 118 | + elseif tp == 'pandoc TableHead' or tp == 'pandoc TableFoot' or |
| 119 | + tp == 'TableHead' or tp == 'TableFoot' then |
| 120 | + element.rows = jogger(element.rows) |
| 121 | + elseif tp == 'Blocks' or tp == 'Inlines' then |
| 122 | + local expected_itemtype = tp == 'Inlines' and 'Inline' or 'Block' |
| 123 | + local pos = 0 |
| 124 | + local filtered_index = 1 |
| 125 | + local filtered_items = element:map(function (x) |
| 126 | + return jogger(x) |
| 127 | + end) |
| 128 | + local item = filtered_items[filtered_index] |
| 129 | + local itemtype |
| 130 | + while item ~= nil do |
| 131 | + itemtype = ptype(item) |
| 132 | + if itemtype ~= tp and itemtype ~= expected_itemtype then |
| 133 | + -- neither the list type nor the list's item type. Try to convert. |
| 134 | + item = pandoc[tp](item) |
| 135 | + itemtype = tp |
| 136 | + end |
| 137 | + if itemtype == tp then |
| 138 | + local sublist_index = 1 |
| 139 | + local sublistitem = item[sublist_index] |
| 140 | + while sublistitem ~= nil do |
| 141 | + pos = pos + 1 |
| 142 | + element[pos] = sublistitem |
| 143 | + sublist_index = sublist_index + 1 |
| 144 | + sublistitem = item[sublist_index] |
| 145 | + end |
| 146 | + else |
| 147 | + -- not actually a sublist, just an element |
| 148 | + pos = pos + 1 |
| 149 | + element[pos] = item |
| 150 | + end |
| 151 | + filtered_index = filtered_index + 1 |
| 152 | + item = filtered_items[filtered_index] |
| 153 | + end |
| 154 | + -- unset remaining indices if the new list is shorter than the old |
| 155 | + pos = pos + 1 |
| 156 | + while element[pos] do |
| 157 | + element[pos] = nil |
| 158 | + pos = pos + 1 |
| 159 | + end |
| 160 | + elseif tp == 'List' then |
| 161 | + local i, item = 1, element[1] |
| 162 | + while item do |
| 163 | + element[i] = jogger(item) |
| 164 | + i, item = i+1, element[i+1] |
| 165 | + end |
| 166 | + elseif tp == 'Caption' then |
| 167 | + element.long = jogger(element.long) |
| 168 | + element.short = element.short and jogger(element.short) |
| 169 | + elseif tp == 'Pandoc' then |
| 170 | + element.meta = jogger(element.meta) |
| 171 | + element.blocks = jogger(element.blocks) |
| 172 | + else |
| 173 | + error("Don't know how to traverse " .. (element.t or tp)) |
| 174 | + end |
| 175 | + return element |
| 176 | +end |
| 177 | + |
| 178 | +local non_joggable_types = { |
| 179 | + ['Attr'] = true, |
| 180 | + ['boolean'] = true, |
| 181 | + ['nil'] = true, |
| 182 | + ['number'] = true, |
| 183 | + ['string'] = true, |
| 184 | +} |
| 185 | + |
| 186 | +local function get_filter_function(element, filter, tp) |
| 187 | + if non_joggable_types[tp] or tp == 'table' then |
| 188 | + return nil |
| 189 | + elseif tp == 'Block' then |
| 190 | + return filter[element.tag] or filter.Block |
| 191 | + elseif tp == 'Inline' then |
| 192 | + return filter[element.tag] or filter.Inline |
| 193 | + else |
| 194 | + return filter[tp] |
| 195 | + end |
| 196 | +end |
| 197 | + |
| 198 | +local function make_jogger (filter, context) |
| 199 | + local is_topdown = filter.traverse == 'topdown' |
| 200 | + local jogger |
| 201 | + |
| 202 | + jogger = function (element) |
| 203 | + if context then |
| 204 | + context:insert(element) |
| 205 | + end |
| 206 | + local tp = ptype(element) |
| 207 | + local result, continue = nil, true |
| 208 | + if non_joggable_types[tp] then |
| 209 | + result = element |
| 210 | + elseif tp == 'table' then |
| 211 | + result = recurse(element, tp, jogger) |
| 212 | + else |
| 213 | + local fn = get_filter_function(element, filter, tp) |
| 214 | + if is_topdown then |
| 215 | + result, continue = run_filter_function(fn, element, context) |
| 216 | + if continue ~= false then |
| 217 | + -- the result might have a different type, so use nil |
| 218 | + result = recurse(result, nil, jogger) |
| 219 | + end |
| 220 | + else |
| 221 | + element = recurse(element, tp, jogger) |
| 222 | + result = run_filter_function(fn, element, context) |
| 223 | + end |
| 224 | + end |
| 225 | + |
| 226 | + if context then |
| 227 | + context:remove() -- remove this element from the context |
| 228 | + end |
| 229 | + return result |
| 230 | + end |
| 231 | + return jogger |
| 232 | +end |
| 233 | + |
| 234 | +local element_name_map = { |
| 235 | + Cell = 'pandoc Cell', |
| 236 | + Row = 'pandoc Row', |
| 237 | + TableHead = 'pandoc TableHead', |
| 238 | + TableFoot = 'pandoc TableFoot', |
| 239 | +} |
| 240 | + |
| 241 | +--- Function to traverse the pandoc AST with context. |
| 242 | +local function jog(element, filter) |
| 243 | + local context = filter.context and List{} or nil |
| 244 | + |
| 245 | + -- Table elements have a `pandoc ` prefix in the name |
| 246 | + for from, to in pairs(element_name_map) do |
| 247 | + filter[to] = filter[from] |
| 248 | + end |
| 249 | + |
| 250 | + -- Check if we can just call Pandoc and Meta and be done |
| 251 | + if ptype(element) == 'Pandoc' then |
| 252 | + local must_recurse = false |
| 253 | + for name in pairs(filter) do |
| 254 | + if name:match'^[A-Z]' and name ~= 'Pandoc' and name ~= 'Meta' then |
| 255 | + must_recurse = true |
| 256 | + break |
| 257 | + end |
| 258 | + end |
| 259 | + if not must_recurse then |
| 260 | + element.meta = run_filter_function(filter.Meta, element.meta, context) |
| 261 | + element = run_filter_function(filter.Pandoc, element, context) |
| 262 | + return element |
| 263 | + end |
| 264 | + end |
| 265 | + |
| 266 | + -- Create and call traversal function |
| 267 | + local jog_internal = make_jogger(filter, context) |
| 268 | + return jog_internal(element) |
| 269 | +end |
| 270 | + |
| 271 | +--- Add `jog` as a method to all pandoc AST elements |
| 272 | +-- This uses undocumented features and might break! |
| 273 | +local function add_method(funname) |
| 274 | + funname = funname or 'jog' |
| 275 | + pandoc.Space() -- init metatable 'Inline' |
| 276 | + pandoc.HorizontalRule() -- init metatable 'Block' |
| 277 | + pandoc.Meta{} -- init metatable 'Meta' |
| 278 | + pandoc.Pandoc{} -- init metatable 'Pandoc' |
| 279 | + pandoc.Blocks{} -- init metatable 'Blocks' |
| 280 | + pandoc.Inlines{} -- init metatable 'Inlines' |
| 281 | + pandoc.Caption{} -- init metatable 'Caption' |
| 282 | + pandoc.Cell{} -- init metatable 'Cell' |
| 283 | + pandoc.Row{} -- init metatable 'Row' |
| 284 | + pandoc.TableHead{} -- init metatable 'TableHead' |
| 285 | + pandoc.TableFoot{} -- init metatable 'TableFoot' |
| 286 | + local reg = debug.getregistry() |
| 287 | + List{ |
| 288 | + 'Block', 'Inline', 'Pandoc', |
| 289 | + 'pandoc Cell', 'pandoc Row', 'pandoc TableHead', 'pandoc TableFoot', |
| 290 | + 'Caption', 'Cell', 'Row', 'TableHead', 'TableFoot', |
| 291 | + }:map( |
| 292 | + function (name) |
| 293 | + if reg[name] then |
| 294 | + reg[name].methods[funname] = jog |
| 295 | + end |
| 296 | + end |
| 297 | + ) |
| 298 | + for name in pairs(listy_type) do |
| 299 | + if reg[name] then |
| 300 | + reg[name][funname] = jog |
| 301 | + end |
| 302 | + end |
| 303 | + if reg['Meta'] then |
| 304 | + reg['Meta'][funname] = jog |
| 305 | + end |
| 306 | +end |
| 307 | + |
| 308 | +local mt = { |
| 309 | + __call = function (_, ...) |
| 310 | + return jog(...) |
| 311 | + end |
| 312 | +} |
| 313 | + |
| 314 | +local M = setmetatable({}, mt) |
| 315 | +M.jog = jog |
| 316 | +M.add_method = add_method |
| 317 | + |
| 318 | +return M |
0 commit comments