@@ -44,6 +44,8 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
44
44
switch ( request . commandName ) {
45
45
case 'Converse' :
46
46
return this . requestPreSpanHookConverse ( request , config , diag ) ;
47
+ case 'InvokeModel' :
48
+ return this . requestPreSpanHookInvokeModel ( request , config , diag ) ;
47
49
}
48
50
49
51
return {
@@ -94,6 +96,168 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
94
96
} ;
95
97
}
96
98
99
+ private requestPreSpanHookInvokeModel (
100
+ request : NormalizedRequest ,
101
+ config : AwsSdkInstrumentationConfig ,
102
+ diag : DiagLogger
103
+ ) : RequestMetadata {
104
+ let spanName : string | undefined ;
105
+ const spanAttributes : Attributes = {
106
+ [ ATTR_GEN_AI_SYSTEM ] : GEN_AI_SYSTEM_VALUE_AWS_BEDROCK ,
107
+ // add operation name for InvokeModel API
108
+ } ;
109
+
110
+ const modelId = request . commandInput ?. modelId ;
111
+ if ( modelId ) {
112
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MODEL ] = modelId ;
113
+ }
114
+
115
+ if ( request . commandInput ?. body ) {
116
+ const requestBody = JSON . parse ( request . commandInput . body ) ;
117
+ if ( modelId . includes ( 'amazon.titan' ) ) {
118
+ if ( requestBody . textGenerationConfig ?. temperature !== undefined ) {
119
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TEMPERATURE ] =
120
+ requestBody . textGenerationConfig . temperature ;
121
+ }
122
+ if ( requestBody . textGenerationConfig ?. topP !== undefined ) {
123
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TOP_P ] =
124
+ requestBody . textGenerationConfig . topP ;
125
+ }
126
+ if ( requestBody . textGenerationConfig ?. maxTokenCount !== undefined ) {
127
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MAX_TOKENS ] =
128
+ requestBody . textGenerationConfig . maxTokenCount ;
129
+ }
130
+ if ( requestBody . textGenerationConfig ?. stopSequences !== undefined ) {
131
+ spanAttributes [ ATTR_GEN_AI_REQUEST_STOP_SEQUENCES ] =
132
+ requestBody . textGenerationConfig . stopSequences ;
133
+ }
134
+ } else if ( modelId . includes ( 'amazon.nova' ) ) {
135
+ if ( requestBody . inferenceConfig ?. temperature !== undefined ) {
136
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TEMPERATURE ] =
137
+ requestBody . inferenceConfig . temperature ;
138
+ }
139
+ if ( requestBody . inferenceConfig ?. top_p !== undefined ) {
140
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TOP_P ] =
141
+ requestBody . inferenceConfig . top_p ;
142
+ }
143
+ if ( requestBody . inferenceConfig ?. max_new_tokens !== undefined ) {
144
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MAX_TOKENS ] =
145
+ requestBody . inferenceConfig . max_new_tokens ;
146
+ }
147
+ if ( requestBody . inferenceConfig ?. stopSequences !== undefined ) {
148
+ spanAttributes [ ATTR_GEN_AI_REQUEST_STOP_SEQUENCES ] =
149
+ requestBody . inferenceConfig . stopSequences ;
150
+ }
151
+ } else if ( modelId . includes ( 'anthropic.claude' ) ) {
152
+ if ( requestBody . max_tokens !== undefined ) {
153
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MAX_TOKENS ] =
154
+ requestBody . max_tokens ;
155
+ }
156
+ if ( requestBody . temperature !== undefined ) {
157
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TEMPERATURE ] =
158
+ requestBody . temperature ;
159
+ }
160
+ if ( requestBody . top_p !== undefined ) {
161
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TOP_P ] = requestBody . top_p ;
162
+ }
163
+ if ( requestBody . stop_sequences !== undefined ) {
164
+ spanAttributes [ ATTR_GEN_AI_REQUEST_STOP_SEQUENCES ] =
165
+ requestBody . stop_sequences ;
166
+ }
167
+ } else if ( modelId . includes ( 'meta.llama' ) ) {
168
+ if ( requestBody . max_gen_len !== undefined ) {
169
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MAX_TOKENS ] =
170
+ requestBody . max_gen_len ;
171
+ }
172
+ if ( requestBody . temperature !== undefined ) {
173
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TEMPERATURE ] =
174
+ requestBody . temperature ;
175
+ }
176
+ if ( requestBody . top_p !== undefined ) {
177
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TOP_P ] = requestBody . top_p ;
178
+ }
179
+ // request for meta llama models does not contain stop_sequences field
180
+ } else if ( modelId . includes ( 'cohere.command-r' ) ) {
181
+ if ( requestBody . max_tokens !== undefined ) {
182
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MAX_TOKENS ] =
183
+ requestBody . max_tokens ;
184
+ }
185
+ if ( requestBody . temperature !== undefined ) {
186
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TEMPERATURE ] =
187
+ requestBody . temperature ;
188
+ }
189
+ if ( requestBody . p !== undefined ) {
190
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TOP_P ] = requestBody . p ;
191
+ }
192
+ if ( requestBody . message !== undefined ) {
193
+ // NOTE: We approximate the token count since this value is not directly available in the body
194
+ // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
195
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
196
+ spanAttributes [ ATTR_GEN_AI_USAGE_INPUT_TOKENS ] = Math . ceil (
197
+ requestBody . message . length / 6
198
+ ) ;
199
+ }
200
+ if ( requestBody . stop_sequences !== undefined ) {
201
+ spanAttributes [ ATTR_GEN_AI_REQUEST_STOP_SEQUENCES ] =
202
+ requestBody . stop_sequences ;
203
+ }
204
+ } else if ( modelId . includes ( 'cohere.command' ) ) {
205
+ if ( requestBody . max_tokens !== undefined ) {
206
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MAX_TOKENS ] =
207
+ requestBody . max_tokens ;
208
+ }
209
+ if ( requestBody . temperature !== undefined ) {
210
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TEMPERATURE ] =
211
+ requestBody . temperature ;
212
+ }
213
+ if ( requestBody . p !== undefined ) {
214
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TOP_P ] = requestBody . p ;
215
+ }
216
+ if ( requestBody . prompt !== undefined ) {
217
+ // NOTE: We approximate the token count since this value is not directly available in the body
218
+ // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
219
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
220
+ spanAttributes [ ATTR_GEN_AI_USAGE_INPUT_TOKENS ] = Math . ceil (
221
+ requestBody . prompt . length / 6
222
+ ) ;
223
+ }
224
+ if ( requestBody . stop_sequences !== undefined ) {
225
+ spanAttributes [ ATTR_GEN_AI_REQUEST_STOP_SEQUENCES ] =
226
+ requestBody . stop_sequences ;
227
+ }
228
+ } else if ( modelId . includes ( 'mistral' ) ) {
229
+ if ( requestBody . prompt !== undefined ) {
230
+ // NOTE: We approximate the token count since this value is not directly available in the body
231
+ // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
232
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
233
+ spanAttributes [ ATTR_GEN_AI_USAGE_INPUT_TOKENS ] = Math . ceil (
234
+ requestBody . prompt . length / 6
235
+ ) ;
236
+ }
237
+ if ( requestBody . max_tokens !== undefined ) {
238
+ spanAttributes [ ATTR_GEN_AI_REQUEST_MAX_TOKENS ] =
239
+ requestBody . max_tokens ;
240
+ }
241
+ if ( requestBody . temperature !== undefined ) {
242
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TEMPERATURE ] =
243
+ requestBody . temperature ;
244
+ }
245
+ if ( requestBody . top_p !== undefined ) {
246
+ spanAttributes [ ATTR_GEN_AI_REQUEST_TOP_P ] = requestBody . top_p ;
247
+ }
248
+ if ( requestBody . stop !== undefined ) {
249
+ spanAttributes [ ATTR_GEN_AI_REQUEST_STOP_SEQUENCES ] = requestBody . stop ;
250
+ }
251
+ }
252
+ }
253
+
254
+ return {
255
+ spanName,
256
+ isIncoming : false ,
257
+ spanAttributes,
258
+ } ;
259
+ }
260
+
97
261
responseHook (
98
262
response : NormalizedResponse ,
99
263
span : Span ,
@@ -107,6 +271,8 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
107
271
switch ( response . request . commandName ) {
108
272
case 'Converse' :
109
273
return this . responseHookConverse ( response , span , tracer , config ) ;
274
+ case 'InvokeModel' :
275
+ return this . responseHookInvokeModel ( response , span , tracer , config ) ;
110
276
}
111
277
}
112
278
@@ -131,4 +297,137 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
131
297
span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [ stopReason ] ) ;
132
298
}
133
299
}
300
+
301
+ private responseHookInvokeModel (
302
+ response : NormalizedResponse ,
303
+ span : Span ,
304
+ tracer : Tracer ,
305
+ config : AwsSdkInstrumentationConfig
306
+ ) {
307
+ const currentModelId = response . request . commandInput ?. modelId ;
308
+ if ( response . data ?. body ) {
309
+ const decodedResponseBody = new TextDecoder ( ) . decode ( response . data . body ) ;
310
+ const responseBody = JSON . parse ( decodedResponseBody ) ;
311
+ if ( currentModelId . includes ( 'amazon.titan' ) ) {
312
+ if ( responseBody . inputTextTokenCount !== undefined ) {
313
+ span . setAttribute (
314
+ ATTR_GEN_AI_USAGE_INPUT_TOKENS ,
315
+ responseBody . inputTextTokenCount
316
+ ) ;
317
+ }
318
+ if ( responseBody . results ?. [ 0 ] ?. tokenCount !== undefined ) {
319
+ span . setAttribute (
320
+ ATTR_GEN_AI_USAGE_OUTPUT_TOKENS ,
321
+ responseBody . results [ 0 ] . tokenCount
322
+ ) ;
323
+ }
324
+ if ( responseBody . results ?. [ 0 ] ?. completionReason !== undefined ) {
325
+ span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [
326
+ responseBody . results [ 0 ] . completionReason ,
327
+ ] ) ;
328
+ }
329
+ } else if ( currentModelId . includes ( 'amazon.nova' ) ) {
330
+ if ( responseBody . usage !== undefined ) {
331
+ if ( responseBody . usage . inputTokens !== undefined ) {
332
+ span . setAttribute (
333
+ ATTR_GEN_AI_USAGE_INPUT_TOKENS ,
334
+ responseBody . usage . inputTokens
335
+ ) ;
336
+ }
337
+ if ( responseBody . usage . outputTokens !== undefined ) {
338
+ span . setAttribute (
339
+ ATTR_GEN_AI_USAGE_OUTPUT_TOKENS ,
340
+ responseBody . usage . outputTokens
341
+ ) ;
342
+ }
343
+ }
344
+ if ( responseBody . stopReason !== undefined ) {
345
+ span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [
346
+ responseBody . stopReason ,
347
+ ] ) ;
348
+ }
349
+ } else if ( currentModelId . includes ( 'anthropic.claude' ) ) {
350
+ if ( responseBody . usage ?. input_tokens !== undefined ) {
351
+ span . setAttribute (
352
+ ATTR_GEN_AI_USAGE_INPUT_TOKENS ,
353
+ responseBody . usage . input_tokens
354
+ ) ;
355
+ }
356
+ if ( responseBody . usage ?. output_tokens !== undefined ) {
357
+ span . setAttribute (
358
+ ATTR_GEN_AI_USAGE_OUTPUT_TOKENS ,
359
+ responseBody . usage . output_tokens
360
+ ) ;
361
+ }
362
+ if ( responseBody . stop_reason !== undefined ) {
363
+ span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [
364
+ responseBody . stop_reason ,
365
+ ] ) ;
366
+ }
367
+ } else if ( currentModelId . includes ( 'meta.llama' ) ) {
368
+ if ( responseBody . prompt_token_count !== undefined ) {
369
+ span . setAttribute (
370
+ ATTR_GEN_AI_USAGE_INPUT_TOKENS ,
371
+ responseBody . prompt_token_count
372
+ ) ;
373
+ }
374
+ if ( responseBody . generation_token_count !== undefined ) {
375
+ span . setAttribute (
376
+ ATTR_GEN_AI_USAGE_OUTPUT_TOKENS ,
377
+ responseBody . generation_token_count
378
+ ) ;
379
+ }
380
+ if ( responseBody . stop_reason !== undefined ) {
381
+ span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [
382
+ responseBody . stop_reason ,
383
+ ] ) ;
384
+ }
385
+ } else if ( currentModelId . includes ( 'cohere.command-r' ) ) {
386
+ if ( responseBody . text !== undefined ) {
387
+ // NOTE: We approximate the token count since this value is not directly available in the body
388
+ // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
389
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
390
+ span . setAttribute (
391
+ ATTR_GEN_AI_USAGE_OUTPUT_TOKENS ,
392
+ Math . ceil ( responseBody . text . length / 6 )
393
+ ) ;
394
+ }
395
+ if ( responseBody . finish_reason !== undefined ) {
396
+ span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [
397
+ responseBody . finish_reason ,
398
+ ] ) ;
399
+ }
400
+ } else if ( currentModelId . includes ( 'cohere.command' ) ) {
401
+ if ( responseBody . generations ?. [ 0 ] ?. text !== undefined ) {
402
+ span . setAttribute (
403
+ ATTR_GEN_AI_USAGE_OUTPUT_TOKENS ,
404
+ // NOTE: We approximate the token count since this value is not directly available in the body
405
+ // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
406
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
407
+ Math . ceil ( responseBody . generations [ 0 ] . text . length / 6 )
408
+ ) ;
409
+ }
410
+ if ( responseBody . generations ?. [ 0 ] ?. finish_reason !== undefined ) {
411
+ span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [
412
+ responseBody . generations [ 0 ] . finish_reason ,
413
+ ] ) ;
414
+ }
415
+ } else if ( currentModelId . includes ( 'mistral' ) ) {
416
+ if ( responseBody . outputs ?. [ 0 ] ?. text !== undefined ) {
417
+ span . setAttribute (
418
+ ATTR_GEN_AI_USAGE_OUTPUT_TOKENS ,
419
+ // NOTE: We approximate the token count since this value is not directly available in the body
420
+ // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
421
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
422
+ Math . ceil ( responseBody . outputs [ 0 ] . text . length / 6 )
423
+ ) ;
424
+ }
425
+ if ( responseBody . outputs ?. [ 0 ] ?. stop_reason !== undefined ) {
426
+ span . setAttribute ( ATTR_GEN_AI_RESPONSE_FINISH_REASONS , [
427
+ responseBody . outputs [ 0 ] . stop_reason ,
428
+ ] ) ;
429
+ }
430
+ }
431
+ }
432
+ }
134
433
}
0 commit comments