Skip to content

Commit 52fb5fa

Browse files
thibaultchaagentzh
authored andcommitted
bugfix: ngx.re: fixed a split() edge-case when using control characters in the regex.
Signed-off-by: Yichun Zhang (agentzh) <[email protected]>
1 parent 8a9cd48 commit 52fb5fa

File tree

2 files changed

+76
-2
lines changed

2 files changed

+76
-2
lines changed

lib/ngx/re.lua

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ local _M = { version = base.version }
5050
local function re_split_helper(subj, compiled, compile_once, flags, ctx)
5151
local rc
5252
do
53-
local pos = math_max(ctx.pos - 1, 0)
53+
local pos = math_max(ctx.pos, 0)
5454

5555
rc = C.ngx_http_lua_ffi_exec_regex(compiled, flags, subj, #subj, pos)
5656
end
@@ -87,11 +87,18 @@ local function re_split_helper(subj, compiled, compile_once, flags, ctx)
8787
return nil, nil, nil
8888
end
8989

90+
if from == to then
91+
-- empty match, skip to next char
92+
ctx.pos = to + 1
93+
94+
else
95+
ctx.pos = to
96+
end
97+
9098
-- convert to Lua string indexes
9199

92100
from = from + 1
93101
to = to + 1
94-
ctx.pos = to + 1
95102

96103
-- retrieve the first sub-match capture if any
97104

@@ -153,6 +160,9 @@ function _M.split(subj, regex, opts, ctx, max, res)
153160
local res_idx = 0
154161
local last_empty_match
155162

163+
-- update to split_helper PCRE indexes
164+
ctx.pos = sub_idx - 1
165+
156166
-- splitting: with and without a max limiter
157167

158168
if max > 0 then

t/re-split.t

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,3 +1181,67 @@ len: 5
11811181
qr/\[TRACE \d+/
11821182
--- no_error_log
11831183
[error]
1184+
1185+
1186+
1187+
=== TEST 36: split by unit separator 1/2 (GH issue lua-nginx-module #1217)
1188+
--- http_config eval: $::HttpConfig
1189+
--- config
1190+
location /re {
1191+
content_by_lua_block {
1192+
local ngx_re = require "ngx.re"
1193+
1194+
local subjs = {
1195+
"1\x1fT\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f15",
1196+
"1\x1fT\x1fT\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f15",
1197+
"1\x1fT\x1fT\x1fT\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f15",
1198+
}
1199+
1200+
for _, subj in ipairs(subjs) do
1201+
local col_list = ngx_re.split(subj, "\\x1f")
1202+
ngx.say(#col_list, " ", table.concat(col_list, "|"))
1203+
end
1204+
}
1205+
}
1206+
--- request
1207+
GET /re
1208+
--- response_body
1209+
15 1|T|||||||||||||15
1210+
15 1|T|T||||||||||||15
1211+
15 1|T|T|T|||||||||||15
1212+
--- error_log eval
1213+
qr/\[TRACE \d+/
1214+
--- no_error_log
1215+
[error]
1216+
1217+
1218+
1219+
=== TEST 37: split by unit separator 2/2 (with ctx.pos)
1220+
--- http_config eval: $::HttpConfig
1221+
--- config
1222+
location /re {
1223+
content_by_lua_block {
1224+
local ngx_re = require "ngx.re"
1225+
1226+
local subjs = {
1227+
"1\x1fT\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f15",
1228+
"1\x1fT\x1fT\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f15",
1229+
"1\x1fT\x1fT\x1fT\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f15",
1230+
}
1231+
1232+
for _, subj in ipairs(subjs) do
1233+
local col_list = ngx_re.split(subj, "\\x1f", nil, { pos = 6 })
1234+
ngx.say(#col_list, " ", table.concat(col_list, "|"))
1235+
end
1236+
}
1237+
}
1238+
--- request
1239+
GET /re
1240+
--- response_body
1241+
12 |||||||||||15
1242+
13 ||||||||||||15
1243+
13 |T|||||||||||15
1244+
--- error_log eval
1245+
qr/\[TRACE \d+/
1246+
--- no_error_log
1247+
[error]

0 commit comments

Comments
 (0)