Skip to content

Commit ed80629

Browse files
committed
add streaming mode
1 parent 6808ba0 commit ed80629

File tree

1 file changed

+123
-30
lines changed

1 file changed

+123
-30
lines changed

js/check.js

Lines changed: 123 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#!/usr/bin/env node
22

33
const { text } = require('node:stream/consumers')
4+
const readline = require('node:readline')
5+
const fs = require('node:fs')
46

5-
const { ArgumentDefaultsHelpFormatter, ArgumentParser, FileType } = require('argparse')
7+
const { ArgumentParser, FileType } = require('argparse')
68

79
const adblockRust = require('./index.js')
810
const adblockRustPackage = require('./../package.json')
@@ -70,69 +72,160 @@ const chromiumRequestTypeMapping = {
7072
'XSL stylesheet': 'xslt'
7173
}
7274
/* eslint-enable quote-props */
75+
const chromiumRequestTypes = Object.keys(chromiumRequestTypeMapping)
76+
const requestTypeOptions = filterListRequestTypes.concat(chromiumRequestTypes)
77+
requestTypeOptions.sort()
7378

7479
const parser = new ArgumentParser({
7580
add_help: true,
76-
formatter_class: ArgumentDefaultsHelpFormatter,
7781
description: 'Check whether a URL would be blocked by given filter list rules'
7882
})
7983
parser.add_argument('-v', '--version', {
8084
action: 'version',
8185
version: adblockRustPackage.version
8286
})
87+
88+
parser.add_argument('--requests', {
89+
type: FileType('r'),
90+
default: process.stdin,
91+
help: 'Path to a file of requests to check filter list rules against (or, ' +
92+
'by default, STDIN). This input should be lines of JSON documents, ' +
93+
'one document per line. This JSON text must have the following keys: ' +
94+
'"url", "context", and "type", which corresponds to the --url, ' +
95+
'--context, and --type arguments.'
96+
})
97+
8398
parser.add_argument('--url', {
84-
required: true,
8599
type: URL,
86100
help: 'The full URL to check against the provided filter lists.'
87101
})
88-
parser.add_argument('--context-url', {
89-
required: true,
102+
parser.add_argument('--context', {
90103
type: URL,
91104
help: 'The security context the request occurred in, as a full URL'
92105
})
93-
parser.add_argument('--rule-files', {
94-
required: true,
106+
parser.add_argument('--type', {
107+
help: 'The type of the request, using either i. the types defined by ' +
108+
'filter list projects (which are all in lowercase, e.g., "xhr" or ' +
109+
'"stylesheet"), or ii. the types defined in the Chromium source ' +
110+
'(which start with an uppercase character, e.g., "XMLHttpRequest" or ' +
111+
'"CSS stylesheet")',
112+
choices: requestTypeOptions
113+
})
114+
115+
parser.add_argument('--rules', {
95116
type: FileType('r'),
96117
nargs: '*',
97118
help: 'One or more paths to files of filter list rules to check the ' +
98-
'request against'
119+
'request against. By default uses bundled old-and-outdated versions ' +
120+
'of easylist and easyprivacy'
99121
})
100122
parser.add_argument('--verbose', {
101123
default: false,
102124
action: 'store_true',
103125
help: 'Print information about what rule(s) the request matched.'
104126
})
105127

106-
const requestTypeGroup = parser.add_mutually_exclusive_group(true)
107-
requestTypeGroup.add_argument('--type', {
108-
help: 'The type of the request, using the types defined by ' +
109-
'filter list projects',
110-
choices: filterListRequestTypes
111-
})
112-
requestTypeGroup.add_argument('--chromium-type', {
113-
help: 'The type of the request, using the types defined by chromium',
114-
choices: Object.keys(chromiumRequestTypeMapping)
115-
})
128+
const checkRequest = (engine, request, requestType, requestContext) => {
129+
const requestTypeUnified = chromiumRequestTypeMapping[requestType] || requestType
130+
try {
131+
return engine.check(
132+
request.toString(),
133+
requestContext.toString(),
134+
requestTypeUnified,
135+
true
136+
)
137+
} catch (e) {
138+
console.error(`Error checking request: url:${request}, ` +
139+
`context:${requestContext}, type:${requestTypeUnified}`)
140+
console.error('adblock-rust error: ' + e.toString())
141+
return null
142+
}
143+
}
116144

117145
;(async () => {
118146
const args = parser.parse_args()
119147

120148
const filterSet = new adblockRust.FilterSet(true)
121-
for (const aRuleFile of args.rule_files) {
122-
const rulesText = await text(aRuleFile)
149+
let ruleStreams
150+
if (args.rules) {
151+
ruleStreams = args.rules
152+
} else {
153+
const defaultLists = [
154+
'./data/easylist.to/easylist/easylist.txt',
155+
'./data/easylist.to/easylist/easyprivacy.txt'
156+
]
157+
ruleStreams = defaultLists.map((x) => fs.createReadStream(x, {}))
158+
}
159+
160+
for (const aRuleStream of ruleStreams) {
161+
const rulesText = await text(aRuleStream)
123162
filterSet.addFilters(rulesText.split('\n'))
124163
}
125164

126165
const engine = new adblockRust.Engine(filterSet, true)
127-
const result = engine.check(
128-
args.url.toString(),
129-
args.context_url.toString(),
130-
args.type || chromiumRequestTypeMapping[args.chromium_type],
131-
true
132-
)
133-
134-
if (args.verbose) {
135-
console.log(result)
166+
const checkRequestFunc = checkRequest.bind(undefined, engine)
167+
168+
// This code can either be invoked to consider one request, using command
169+
// line flags, or read request descriptions from a handle. If
170+
// any of the following arguments were provided, then we assume we're in
171+
// "arguments" mode, otherwise we stream request descriptions from the
172+
// --requests argument.
173+
const requestDescArgs = ['url', 'context', 'type']
174+
const numRequestDescArgs = requestDescArgs.reduce((accumulator, curValue) => {
175+
return (args[curValue] !== undefined) ? accumulator + 1 : accumulator
176+
}, 0)
177+
const isReadingRequestFromArgs = (numRequestDescArgs > 0)
178+
179+
if (isReadingRequestFromArgs) {
180+
if (numRequestDescArgs < requestDescArgs.length) {
181+
throw new Error(
182+
'--url, --context, and --type must be either all provided, or none of ' +
183+
'them provided.')
184+
}
185+
const result = checkRequestFunc(args.url, args.type, args.context)
186+
if (result === null) {
187+
process.exit(1)
188+
}
189+
const resultMatched = result.matched
190+
console.log(args.verbose ? result : resultMatched)
191+
process.exit(0)
136192
}
137-
process.exit(result.matched ? 0 : 1)
193+
194+
// Otherwise, we're in "streaming" mode, and we read requests off whatever
195+
// was provided in --requests (which is either the path to a file, or
196+
// stdin).
197+
const readlineInterface = readline.createInterface({
198+
input: args.requests,
199+
terminal: false
200+
})
201+
let anyErrors = false
202+
readlineInterface.on('line', (line) => {
203+
let requestData
204+
try {
205+
requestData = JSON.parse(line)
206+
} catch (e) {
207+
const msg = 'Invalid JSON in requests input: ' + line
208+
throw new Error(msg)
209+
}
210+
211+
if (requestData.url === undefined ||
212+
requestData.type === undefined ||
213+
requestData.context === undefined) {
214+
throw new Error('Request description does not include all three ' +
215+
'required keys, "url", "type", "context".\n' + line)
216+
}
217+
218+
const result = checkRequestFunc(
219+
requestData.url, requestData.type, requestData.context)
220+
if (result === null) {
221+
anyErrors = true
222+
} else {
223+
const resultMatched = result.matched
224+
console.log(args.verbose ? JSON.stringify(result) : resultMatched)
225+
}
226+
})
227+
228+
readlineInterface.on('close', () => {
229+
process.exit(anyErrors === true ? 1 : 0)
230+
})
138231
})()

0 commit comments

Comments
 (0)