You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from openai import OpenAI
import base64
from qwen_vl_utils import process_vision_info
from transformers import Qwen2_5_VLProcessor
processor = Qwen2_5_VLProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
local_img = "imgs/zulassung.png"
with open(local_img, "rb") as f:
image = base64.b64encode(f.read()).decode("utf-8")
# init the client but point it to TGI
client = OpenAI(base_url="http://localhost:8080/v1", api_key="-")
msgs = [
{
"role": "user",
"content": [
#{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image}"
},
},
],
},
]
msgs_qwen = [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": f"data:image/png;base64,{image}"
},
],
},
]
chat_template = processor.apply_chat_template(msgs_qwen, add_generation_prompt=True)
image_inputs, _ = process_vision_info(msgs_qwen)
model_inputs = processor(
text=[chat_template],
images=image_inputs,
return_tensors="pt",
)
# Get detailed token counts
text_tokens = model_inputs["input_ids"].shape
print(model_inputs["pixel_values"].shape)
print(f"Input tokens: {text_tokens}")
chat_completion = client.chat.completions.create(
model="tgi",
messages=msgs,
stream=False,
)
print(chat_completion)
Expected behavior
Local and TGI inference token counts are supposed to be same/similar. In this case, the discrepancy is approximately 4x
The text was updated successfully, but these errors were encountered:
boris-lok-pentadoc
changed the title
Extremely calculated token count for VLM (Qwen 2.5 VL
Extremely high calculated token count for VLM (Qwen 2.5 VL
May 6, 2025
System Info
tgi 3.2.3, docker, CUDA
Information
Tasks
Reproduction
Expected behavior
Local and TGI inference token counts are supposed to be same/similar. In this case, the discrepancy is approximately 4x
The text was updated successfully, but these errors were encountered: