diff --git a/docker/Dockerfile b/docker/Dockerfile index d235408c4b..a1ea5871a4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,34 +1,58 @@ -ARG BASE=22.04 -ARG BASE_IMG=nvcr.io/nvidia/tensorrt:${BASE}-py3 +# Base image starts with CUDA +ARG BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu18.04 FROM ${BASE_IMG} as base -FROM base as torch-tensorrt-builder-base +# Install basic dependencies +RUN apt-get update +RUN apt install -y build-essential manpages-dev wget zlib1g software-properties-common git +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt install -y python3.8 python3.8-distutils python3.8-dev +RUN wget https://bootstrap.pypa.io/get-pip.py +RUN ln -s /usr/bin/python3.8 /usr/bin/python +RUN python get-pip.py +RUN pip3 install wheel + +# Install Pytorch +RUN pip3 install torch==2.0.0.dev20230103+cu117 torchvision==0.15.0.dev20230103+cu117 --extra-index-url https://download.pytorch.org/whl/nightly/cu117 + +# Install CUDNN + TensorRT +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin +RUN mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600 +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 536F8F1DE80F6A35 +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /" +RUN apt-get update +RUN apt-get install -y libcudnn8=8.5.0* libcudnn8-dev=8.5.0* + +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub +RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /" +RUN apt-get update + +RUN apt-get install -y libnvinfer8=8.5.1* libnvinfer-plugin8=8.5.1* libnvinfer-dev=8.5.1* libnvinfer-plugin-dev=8.5.1* libnvonnxparsers8=8.5.1-1* libnvonnxparsers-dev=8.5.1-1* libnvparsers8=8.5.1-1* libnvparsers-dev=8.5.1-1* + +# Setup Bazel +ARG BAZEL_VERSION=5.2.0 +RUN wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-x86_64 -O /usr/bin/bazel \ + && chmod a+x /usr/bin/bazel + +# Remove cuda symlink to avoid bazel circle symlink errors +RUN rm /usr/local/cuda-11.7/cuda-11.7 -# Removing any bazel or torch-tensorrt pre-installed from the base image -RUN rm -rf /opt/pytorch/torch_tensorrt /usr/bin/bazel +# Build Torch-TensorRT in an auxillary container +FROM base as torch-tensorrt-builder-base ARG ARCH="x86_64" ARG TARGETARCH="amd64" -ARG BAZEL_VERSION=5.2.0 - -RUN [[ "$TARGETARCH" == "amd64" ]] && ARCH="x86_64" || ARCH="${TARGETARCH}" \ - && wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel \ - && chmod a+x /usr/bin/bazel -# Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container -RUN touch /usr/lib/$HOSTTYPE-linux-gnu/libnvinfer_static.a - -RUN rm -rf /usr/local/cuda/lib* /usr/local/cuda/include \ - && ln -sf /usr/local/cuda/targets/$HOSTTYPE-linux/lib /usr/local/cuda/lib64 \ - && ln -sf /usr/local/cuda/targets/$HOSTTYPE-linux/include /usr/local/cuda/include +RUN apt-get install -y python3-setuptools +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub +RUN apt-get update RUN apt-get update && apt-get install -y --no-install-recommends locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8 FROM torch-tensorrt-builder-base as torch-tensorrt-builder -# Removing any bazel or torch-tensorrt pre-installed from the base image -RUN rm -rf /opt/pytorch/torch_tensorrt - COPY . /workspace/torch_tensorrt/src WORKDIR /workspace/torch_tensorrt/src RUN cp ./docker/WORKSPACE.docker WORKSPACE @@ -36,27 +60,20 @@ RUN cp ./docker/WORKSPACE.docker WORKSPACE # This script builds both libtorchtrt bin/lib/include tarball and the Python wheel, in dist/ RUN ./docker/dist-build.sh +# Copy and install Torch-TRT into the main container FROM base as torch-tensorrt -# Removing any bazel or torch-tensorrt pre-installed from the base image -RUN rm -rf /opt/pytorch/torch_tensorrt - -# copy source repo -COPY . /workspace/torch_tensorrt +COPY . /opt/torch_tensorrt COPY --from=torch-tensorrt-builder /workspace/torch_tensorrt/src/py/dist/ . -RUN pip3 install ipywidgets --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org -RUN jupyter nbextension enable --py widgetsnbextension - +RUN cp /opt/torch_tensorrt/docker/WORKSPACE.docker /opt/torch_tensorrt/WORKSPACE RUN pip3 install *.whl && rm -fr /workspace/torch_tensorrt/py/dist/* *.whl -ENV LD_LIBRARY_PATH /usr/local/lib/python3.8/dist-packages/torch/lib:/usr/local/lib/python3.8/dist-packages/torch_tensorrt/lib:${LD_LIBRARY_PATH} +# Install native tensorrt python package required by torch_tensorrt whl file +RUN pip install tensorrt==8.5.1.7 + +WORKDIR /opt/torch_tensorrt +ENV LD_LIBRARY_PATH /usr/local/lib/python3.8/dist-packages/torch/lib:/usr/local/lib/python3.8/dist-packages/torch_tensorrt/lib:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} ENV PATH /usr/local/lib/python3.8/dist-packages/torch_tensorrt/bin:${PATH} -# -WORKDIR /workspace -RUN mv /workspace/torch_tensorrt /opt/torch_tensorrt -RUN cp /opt/torch_tensorrt/docker/WORKSPACE.docker /opt/torch_tensorrt/WORKSPACE -RUN mkdir torch_tensorrt -RUN ln -s /opt/torch_tensorrt/notebooks /workspace/torch_tensorrt/notebooks -CMD /bin/bash +CMD /bin/bash \ No newline at end of file diff --git a/docker/Dockerfile.ngc b/docker/Dockerfile.ngc deleted file mode 100644 index f145fa8ff2..0000000000 --- a/docker/Dockerfile.ngc +++ /dev/null @@ -1,63 +0,0 @@ -ARG BASE=22.04 -ARG BASE_IMG=nvcr.io/nvidia/pytorch:${BASE}-py3 -FROM ${BASE_IMG} as base - -FROM base as torch-tensorrt-builder-base - -# Removing any bazel or torch-tensorrt pre-installed from the base image -RUN rm -rf /opt/pytorch/torch_tensorrt /usr/bin/bazel - -ARG ARCH="x86_64" -ARG TARGETARCH="amd64" -ARG BAZEL_VERSION=5.2.0 - -RUN [[ "$TARGETARCH" == "amd64" ]] && ARCH="x86_64" || ARCH="${TARGETARCH}" \ - && wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel \ - && chmod a+x /usr/bin/bazel - -# Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container -RUN touch /usr/lib/$HOSTTYPE-linux-gnu/libnvinfer_static.a - -RUN rm -rf /usr/local/cuda/lib* /usr/local/cuda/include \ - && ln -sf /usr/local/cuda/targets/$HOSTTYPE-linux/lib /usr/local/cuda/lib64 \ - && ln -sf /usr/local/cuda/targets/$HOSTTYPE-linux/include /usr/local/cuda/include - -RUN apt-get update && apt-get install -y --no-install-recommends locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8 - -FROM torch-tensorrt-builder-base as torch-tensorrt-builder - -# Removing any bazel or torch-tensorrt pre-installed from the base image -RUN rm -rf /opt/pytorch/torch_tensorrt - -COPY . /workspace/torch_tensorrt/src -WORKDIR /workspace/torch_tensorrt/src -RUN cp ./docker/WORKSPACE.docker WORKSPACE - -# This script builds both libtorchtrt bin/lib/include tarball and the Pythin wheel, in dist/ -RUN USE_CXX11=1 ./docker/dist-build.sh - -FROM base as torch-tensorrt - -# Removing any bazel or torch-tensorrt pre-installed from the base image -RUN rm -rf /opt/pytorch/torch_tensorrt - -# copy source repo -COPY . /workspace/torch_tensorrt -COPY --from=torch-tensorrt-builder /workspace/torch_tensorrt/src/py/dist/ . -RUN conda init bash - -RUN pip3 install ipywidgets --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org -RUN jupyter nbextension enable --py widgetsnbextension - -RUN pip3 install *.whl && rm -fr /workspace/torch_tensorrt/py/dist/* *.whl - -ENV LD_LIBRARY_PATH /opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib/python3.8/site-packages/torch_tensorrt/lib:${LD_LIBRARY_PATH} -ENV PATH /opt/conda/lib/python3.8/site-packages/torch_tensorrt/bin:${PATH} -# -WORKDIR /workspace -RUN mv /workspace/torch_tensorrt /opt/pytorch/torch_tensorrt -RUN cp /opt/pytorch/torch_tensorrt/docker/WORKSPACE.docker /opt/pytorch/torch_tensorrt/WORKSPACE -RUN mkdir torch_tensorrt -RUN ln -s /opt/pytorch/torch_tensorrt/notebooks /workspace/torch_tensorrt/notebooks - -CMD /bin/bash diff --git a/docker/README.md b/docker/README.md index 47c23c926b..9ca93749a5 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,19 +1,41 @@ # Building a Torch-TensorRT container -### Install Docker and NVIDIA Container Toolkit +* Use `Dockerfile` to build a container which provides the exact development environment that our master branch is usually tested against. -https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html +* `Dockerfile` currently uses the exact library versions (Torch, CUDA, CUDNN, TensorRT) listed in dependencies to build Torch-TensorRT. -### Build Container +* This `Dockerfile` installs `pre-cxx11-abi` versions of Pytorch and builds Torch-TRT using `pre-cxx11-abi` libtorch as well. +Note: To install `cxx11_abi` version of Torch-TensorRT, enable `USE_CXX11=1` flag so that `dist-build.sh` can build it accordingly. + +### Dependencies + +* Install nvidia-docker by following https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker + +### Instructions > From root of Torch-TensorRT repo +Build: +``` +DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile -t torch_tensorrt:latest . ``` -# Build: -DOCKER_BUILDKIT=1 docker build --build-arg BASE={TensorRT Base Container Version} -f docker/Dockerfile -t torch_tensorrt1.0:latest . -# Run: -docker run --gpus all -it \ - --shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \ - --name=torch_tensorrt1.0 --ipc=host --net=host torch_tensorrt1.0:latest +Run: ``` +nvidia-docker run --gpus all -it --shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" --name=torch_tensorrt --ipc=host --net=host torch_tensorrt:latest +``` + +Test: + + +You can run any converter test to verify if Torch-TRT built sucessfully inside the container. Once you launch the container, you can run +``` +bazel test //tests/core/conversion/converters:test_activation --compilation_mode=opt --test_output=summary --config use_precompiled_torchtrt --config pre_cxx11_abi +``` + +* `--config use_precompiled_torchtrt` : Indicates bazel to use pre-installed Torch-TRT library to test an application. +* `--config pre_cxx11_abi` : This flag ensures `bazel test` uses `pre_cxx11_abi` version of `libtorch`. Use this flag corresponding to the ABI format of your Torch-TensorRT installation. + +### Pytorch NGC containers + +We also ship Torch-TensorRT in Pytorch NGC containers . Release notes for these containers can be found here. Check out `release/ngc/23.XX` branch of Torch-TensorRT for source code that gets shipped with `23.XX` version of Pytorch NGC container. \ No newline at end of file diff --git a/docker/dist-test.sh b/docker/dist-test.sh deleted file mode 100755 index 29e84ef0d8..0000000000 --- a/docker/dist-test.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -pip3 install timm --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org -# Build and run unit tests -cd tests/modules && python3 ./hub.py -cd ../.. - -bazel test //tests:tests //tests:python_api_tests --compilation_mode=opt --jobs=4 --define=torchtrt_src=prebuilt diff --git a/docker/mha.patch b/docker/mha.patch deleted file mode 100644 index cc7aa22db1..0000000000 --- a/docker/mha.patch +++ /dev/null @@ -1,19 +0,0 @@ ---- torch/nn/functional.py 2021-10-01 16:53:42.827338664 -0700 -+++ functional.py 2021-10-01 16:53:34.639338618 -0700 -@@ -4975,7 +4975,7 @@ - f"was expecting embedding dimension of {embed_dim_to_check}, but got {embed_dim}" - if isinstance(embed_dim, torch.Tensor): - # embed_dim can be a tensor when JIT tracing -- head_dim = embed_dim.div(num_heads, rounding_mode='trunc') -+ head_dim = int(embed_dim.div(num_heads, rounding_mode='trunc')) - else: - head_dim = embed_dim // num_heads - assert head_dim * num_heads == embed_dim, f"embed_dim {embed_dim} not divisible by num_heads {num_heads}" -@@ -5044,6 +5044,7 @@ - # - # reshape q, k, v for multihead attention and make em batch first - # -+ bsz = int(bsz) - q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1) - if static_k is None: - k = k.contiguous().view(k.shape[0], bsz * num_heads, head_dim).transpose(0, 1) diff --git a/docker/qat.patch b/docker/qat.patch deleted file mode 100644 index cb39878201..0000000000 --- a/docker/qat.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- /opt/conda/lib/python3.8/site-packages/pytorch_quantization/nn/modules/tensor_quantizer.py 2021-08-16 22:50:37.000000000 +0000 -+++ tensor_quantizer.py 2021-10-19 20:41:54.288077426 +0000 -@@ -291,7 +291,7 @@ - quant_dim = list(amax.shape).index(list(amax_sequeeze.shape)[0]) - scale = amax_sequeeze / bound - outputs = torch.fake_quantize_per_channel_affine( -- inputs, scale.data, torch.zeros_like(scale, dtype=torch.long).data, quant_dim, -+ inputs, scale.data, torch.zeros_like(scale, dtype=torch.int32).data, quant_dim, - -bound - 1 if not self._unsigned else 0, bound) - - return outputs diff --git a/docker/setup_nox.sh b/docker/setup_nox.sh deleted file mode 100644 index 1f6d6e2006..0000000000 --- a/docker/setup_nox.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -o nounset -set -o errexit -set -o pipefail -set -e - -post=${1:-""} - -# fetch bazel executable -BAZEL_VERSION=5.2.0 -ARCH=$(uname -m) -if [[ "$ARCH" == "aarch64" ]]; then ARCH="arm64"; fi -wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel -chmod a+x /usr/bin/bazel -export NVIDIA_TF32_OVERRIDE=0 - -cd /opt/pytorch/torch_tensorrt -cp /opt/pytorch/torch_tensorrt/docker/WORKSPACE.docker /opt/pytorch/torch_tensorrt/WORKSPACE - -pip install --user --upgrade nox -TOP_DIR=/opt/pytorch/torch_tensorrt nox