Skip to content

chore: Bump to CUDA 12.8 and TRT 10.8 for Blackwell support #3405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/scripts/generate-release-matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
import sys

RELEASE_CUDA_VERSION = {
"wheel": ["cu124"],
"tarball": ["cu124"],
"wheel": ["cu128"],
"tarball": ["cu128"],
}
RELEASE_PYTHON_VERSION = {
"wheel": ["3.8", "3.9", "3.10", "3.11", "3.12"],
"tarball": ["3.10"],
"wheel": ["3.9", "3.10", "3.11", "3.12"],
"tarball": ["3.11"],
}

CXX11_TARBALL_CONTAINER_IMAGE = {
"cu124": "pytorch/libtorch-cxx11-builder:cuda12.4-main",
"cu128": "pytorch/libtorch-cxx11-builder:cuda12.8-main",
}


Expand Down
34 changes: 13 additions & 21 deletions .github/scripts/generate-tensorrt-test-matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,23 @@
# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
CUDA_VERSIONS_DICT = {
"nightly": ["cu126"],
"test": ["cu124", "cu126"],
"release": ["cu124", "cu126"],
"nightly": ["cu128"],
"test": ["cu118", "cu126", "cu128"],
"release": ["cu118", "cu126", "cu128"],
}

# please update the python version you want to test with the future tensorRT version here
# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
PYTHON_VERSIONS_DICT = {
"nightly": ["3.9"],
"nightly": ["3.11"],
"test": ["3.9", "3.10", "3.11", "3.12"],
"release": ["3.9", "3.10", "3.11", "3.12"],
}

# please update the future tensorRT version you want to test here
TENSORRT_VERSIONS_DICT = {
"windows": {
"10.4.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.4.0.26",
},
"10.5.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.5.0.18",
},
"10.6.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.6.0.26",
Expand All @@ -44,16 +36,12 @@
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.7.0.23",
},
"10.8.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/zip/TensorRT-10.8.0.43.Windows.win10.cuda-12.8.zip",
"strip_prefix": "TensorRT-10.8.0.43",
},
},
"linux": {
"10.4.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.4.0.26",
},
"10.5.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.5.0.18",
},
"10.6.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.6.0.26",
Expand All @@ -62,6 +50,10 @@
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/tars/TensorRT-10.7.0.23.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.7.0.23",
},
"10.8.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/tars/TensorRT-10.8.0.43.Linux.x86_64-gnu.cuda-12.8.tar.gz",
"strip_prefix": "TensorRT-10.8.0.43",
},
},
}

Expand All @@ -87,7 +79,7 @@ def check_file_availability(url: str) -> bool:
# calculate the next minor version
minor = int(list(TENSORRT_VERSIONS_DICT["linux"].keys())[-1].split(".")[1]) + 1
trt_version = f"{major}.{minor}.0"
for patch in range(patch_from, 50):
for patch in range(patch_from, 80):
for cuda_minor in range(4, 11):
trt_linux_release_url_candidate = f"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/{trt_version}/tars/TensorRT-{trt_version}.{patch}.Linux.x86_64-gnu.cuda-12.{cuda_minor}.tar.gz"
if check_file_availability(trt_linux_release_url_candidate):
Expand Down
16 changes: 5 additions & 11 deletions .github/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,16 @@
"release": ["3.9", "3.10", "3.11", "3.12"],
}
CUDA_ARCHES_DICT = {
"nightly": ["11.8", "12.4", "12.6"],
"test": ["11.8", "12.1", "12.4"],
"release": ["11.8", "12.1", "12.4"],
"nightly": ["11.8", "12.6", "12.8"],
"test": ["11.8", "12.6", "12.8"],
"release": ["11.8", "12.6", "12.8"],
}
ROCM_ARCHES_DICT = {
"nightly": ["6.1", "6.2"],
"test": ["6.1", "6.2"],
"release": ["6.1", "6.2"],
}

CUDA_CUDDN_VERSIONS = {
"11.8": {"cuda": "11.8.0", "cudnn": "9"},
"12.1": {"cuda": "12.1.1", "cudnn": "9"},
"12.4": {"cuda": "12.4.1", "cudnn": "9"},
"12.6": {"cuda": "12.6.2", "cudnn": "9"},
}

PACKAGE_TYPES = ["wheel", "conda", "libtorch"]
PRE_CXX11_ABI = "pre-cxx11"
CXX11_ABI = "cxx11-abi"
Expand Down Expand Up @@ -151,6 +144,7 @@ def initialize_globals(channel: str, build_python_only: bool) -> None:
"12.1": "pytorch/manylinux2_28-builder:cuda12.1",
"12.4": "pytorch/manylinux2_28-builder:cuda12.4",
"12.6": "pytorch/manylinux2_28-builder:cuda12.6",
"12.8": "pytorch/manylinux2_28-builder:cuda12.8",
**{
gpu_arch: f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
for gpu_arch in ROCM_ARCHES
Expand Down Expand Up @@ -278,7 +272,7 @@ def get_wheel_install_command(
return f"{WHL_INSTALL_BASE} {PACKAGES_TO_INSTALL_WHL} --index-url {get_base_download_url_for_repo('whl', channel, gpu_arch_type, desired_cuda)}_pypi_pkg" # noqa: E501
else:
raise ValueError(
"Split build is not supported for this configuration. It is only supported for CUDA 11.8, 12.4, 12.6 on Linux nightly builds." # noqa: E501
"Split build is not supported for this configuration. It is only supported for CUDA 11.8, 12.4, 12.6, 12.8 on Linux nightly builds." # noqa: E501
)
if (
channel == RELEASE
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/build-test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11"]'

filter-matrix:
needs: [generate-matrix]
Expand All @@ -32,7 +33,7 @@ jobs:
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- uses: actions/checkout@v4
with:
repository: pytorch/tensorrt
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/build-test-tensorrt-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11"]'

generate-tensorrt-matrix:
needs: [generate-matrix]
Expand All @@ -29,7 +30,7 @@ jobs:
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- uses: actions/checkout@v4
with:
repository: pytorch/tensorrt
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/build-test-tensorrt-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11"]'

generate-tensorrt-matrix:
needs: [generate-matrix]
Expand All @@ -29,7 +30,7 @@ jobs:
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- uses: actions/checkout@v4
with:
repository: pytorch/tensorrt
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-test-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11"]'

substitute-runner:
needs: generate-matrix
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/docgen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ jobs:
if: ${{ ! contains(github.actor, 'pytorchbot') }}
environment: pytorchbot-env
container:
image: docker.io/pytorch/manylinux2_28-builder:cuda12.6
image: docker.io/pytorch/manylinux2_28-builder:cuda12.8
options: --gpus all
env:
CUDA_HOME: /usr/local/cuda-12.6
VERSION_SUFFIX: cu126
CU_VERSION: cu126
CUDA_HOME: /usr/local/cuda-12.8
VERSION_SUFFIX: cu128
CU_VERSION: cu128
CHANNEL: nightly
CI_BUILD: 1
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/generate_binary_build_matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- name: Checkout test-infra repository
uses: actions/checkout@v4
with:
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/release-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11"]'

generate-release-tarball-matrix:
needs: [generate-matrix]
Expand All @@ -33,7 +34,7 @@ jobs:
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- uses: actions/checkout@v4
with:
repository: pytorch/tensorrt
Expand Down Expand Up @@ -83,7 +84,7 @@ jobs:
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- uses: actions/checkout@v4
with:
repository: pytorch/tensorrt
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-wheel-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ jobs:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ inputs.repository }}/release/wheel/
- name: Upload pre-cxx11 tarball to GitHub
if: ${{ inputs.cxx11-tarball-release != 'true' && env.PYTHON_VERSION == '3.10' }}
if: ${{ inputs.cxx11-tarball-release != 'true' && env.PYTHON_VERSION == '3.11' }}
continue-on-error: true
uses: actions/upload-artifact@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/release-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11"]'

generate-release-matrix:
needs: [generate-matrix]
Expand All @@ -33,7 +34,7 @@ jobs:
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- uses: actions/checkout@v4
with:
repository: pytorch/tensorrt
Expand Down
18 changes: 9 additions & 9 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.
new_local_repository(
name = "cuda",
build_file = "@//third_party/cuda:BUILD",
path = "/usr/local/cuda-12.6/",
path = "/usr/local/cuda-12.8/",
)

new_local_repository(
name = "cuda_win",
build_file = "@//third_party/cuda:BUILD",
path = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6/",
path = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8/",
)

http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
Expand All @@ -55,21 +55,21 @@ http_archive(
name = "libtorch",
build_file = "@//third_party/libtorch:BUILD",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu126/libtorch-cxx11-abi-shared-with-deps-latest.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-cxx11-abi-shared-with-deps-latest.zip"],
)

http_archive(
name = "libtorch_pre_cxx11_abi",
build_file = "@//third_party/libtorch:BUILD",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu126/libtorch-shared-with-deps-latest.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-shared-with-deps-latest.zip"],
)

http_archive(
name = "libtorch_win",
build_file = "@//third_party/libtorch:BUILD",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu126/libtorch-win-shared-with-deps-latest.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-win-shared-with-deps-latest.zip"],
)

# Download these tarballs manually from the NVIDIA website
Expand All @@ -79,18 +79,18 @@ http_archive(
http_archive(
name = "tensorrt",
build_file = "@//third_party/tensorrt/archive:BUILD",
strip_prefix = "TensorRT-10.7.0.23",
strip_prefix = "TensorRT-10.8.0.43",
urls = [
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/tars/TensorRT-10.7.0.23.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/tars/TensorRT-10.8.0.43.Linux.x86_64-gnu.cuda-12.8.tar.gz",
],
)

http_archive(
name = "tensorrt_win",
build_file = "@//third_party/tensorrt/archive:BUILD",
strip_prefix = "TensorRT-10.7.0.23",
strip_prefix = "TensorRT-10.8.0.43",
urls = [
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip",
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/zip/TensorRT-10.8.0.43.Windows.win10.cuda-12.8.zip",
],
)

Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Torch-TensorRT
[![Documentation](https://img.shields.io/badge/docs-master-brightgreen)](https://nvidia.github.io/Torch-TensorRT/)
[![pytorch](https://img.shields.io/badge/PyTorch-2.4-green)](https://www.python.org/downloads/release/python-31013/)
[![cuda](https://img.shields.io/badge/CUDA-12.4-green)](https://developer.nvidia.com/cuda-downloads)
[![trt](https://img.shields.io/badge/TensorRT-10.7.0-green)](https://github.com/nvidia/tensorrt-llm)
[![trt](https://img.shields.io/badge/TensorRT-10.8.0-green)](https://github.com/nvidia/tensorrt-llm)
[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue)](./LICENSE)
[![linux_tests](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml/badge.svg)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml)
[![windows_tests](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml/badge.svg)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml)
Expand Down Expand Up @@ -117,9 +117,9 @@ auto results = trt_mod.forward({input_tensor});
These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.

- Bazel 6.3.2
- Libtorch 2.5.0.dev (latest nightly) (built with CUDA 12.4)
- CUDA 12.4
- TensorRT 10.7.0.23
- Libtorch 2.7.0.dev (latest nightly) (built with CUDA 12.8)
- CUDA 12.8
- TensorRT 10.8.0.43

## Deprecation Policy

Expand Down
4 changes: 2 additions & 2 deletions dev_dep_versions.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__cuda_version__: "12.6"
__tensorrt_version__: "10.7.0.post1"
__cuda_version__: "12.8"
__tensorrt_version__: "10.8.0"
7 changes: 4 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# syntax=docker/dockerfile:1

# Base image starts with CUDA
ARG BASE_IMG=nvidia/cuda:12.4.1-devel-ubuntu22.04
#TODO: cuda version
ARG BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04
FROM ${BASE_IMG} as base
ENV BASE_IMG=nvidia/cuda:12.4.1-devel-ubuntu22.04
ENV BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04

ARG TENSORRT_VERSION
ENV TENSORRT_VERSION=${TENSORRT_VERSION}
RUN test -n "$TENSORRT_VERSION" || (echo "No tensorrt version specified, please use --build-arg TENSORRT_VERSION=x.y to specify a version." && exit 1)

ARG PYTHON_VERSION=3.10
ARG PYTHON_VERSION=3.11
ENV PYTHON_VERSION=${PYTHON_VERSION}

ARG USE_PRE_CXX11_ABI
Expand Down
Loading