Skip to content

TensorFlow with CUDA or Python might rebuilds more than necessary instead of re-using bazel cache #16585

Closed
@lissyx

Description

@lissyx

Context: for DeepSpeech, we perform tensorflow builds and then keep the cache in a tar (capturing the whole of the home directory of the build user). We then untar it and the deepspeech build through bazel build picks the proper cached items so it does not rebuild anything.

Recently, we started to have increased (2.5x) build time on CUDA-enabled builds. Debugging with Bazel showed that it was rebuilding because the actionKey computed for stream_executor_impl was different. Instrumenting Bazel to get more informations, I could get down to the reason of the different actionKey: the ordering of the CUDA includes was different. The list itself contained the exact same content, just a different ordering.

Those includes are symlinks, and they are generated from a genrule. This is all taken care of by

def _create_local_cuda_repository(repository_ctx):
"""Creates the repository containing files set up to build with CUDA."""
cuda_config = _get_cuda_config(repository_ctx)
cudnn_header_dir = _find_cudnn_header_dir(repository_ctx,
cuda_config.cudnn_install_basedir)
# Set up symbolic links for the cuda toolkit by creating genrules to do
# symlinking. We create one genrule for each directory we want to track under
# cuda_toolkit_path
cuda_toolkit_path = cuda_config.cuda_toolkit_path
cuda_include_path = cuda_toolkit_path + "/include"
genrules = [symlink_genrule_for_dir(repository_ctx,
cuda_include_path, "cuda/include", "cuda-include")]
genrules.append(symlink_genrule_for_dir(repository_ctx,
cuda_toolkit_path + "/nvvm", "cuda/nvvm", "cuda-nvvm"))
genrules.append(symlink_genrule_for_dir(repository_ctx,
cuda_toolkit_path + "/extras/CUPTI/include",
"cuda/extras/CUPTI/include", "cuda-extras"))
cuda_libs = _find_libs(repository_ctx, cuda_config)
cuda_lib_src = []
cuda_lib_dest = []
for lib in cuda_libs.values():
cuda_lib_src.append(lib.path)
cuda_lib_dest.append("cuda/lib/" + lib.file_name)
genrules.append(symlink_genrule_for_dir(repository_ctx, None, "", "cuda-lib",
cuda_lib_src, cuda_lib_dest))
# Set up the symbolic links for cudnn if cndnn was not installed to
# CUDA_TOOLKIT_PATH.
included_files = _read_dir(repository_ctx, cuda_include_path).replace(
cuda_include_path, '').splitlines()
if '/cudnn.h' not in included_files:
genrules.append(symlink_genrule_for_dir(repository_ctx, None,
"cuda/include/", "cudnn-include", [cudnn_header_dir + "/cudnn.h"],
["cudnn.h"]))
else:
genrules.append(
'filegroup(\n' +
' name = "cudnn-include",\n' +
' srcs = [],\n' +
')\n'
)
# Set up BUILD file for cuda/
_tpl(repository_ctx, "cuda:build_defs.bzl",
{
"%{cuda_is_configured}": "True",
"%{cuda_extra_copts}": _compute_cuda_extra_copts(
repository_ctx, cuda_config.compute_capabilities),
})
_tpl(repository_ctx, "cuda:BUILD",
{
"%{cuda_driver_lib}": cuda_libs["cuda"].file_name,
"%{cudart_static_lib}": cuda_libs["cudart_static"].file_name,
"%{cudart_static_linkopt}": _cudart_static_linkopt(
cuda_config.cpu_value),
"%{cudart_lib}": cuda_libs["cudart"].file_name,
"%{cublas_lib}": cuda_libs["cublas"].file_name,
"%{cusolver_lib}": cuda_libs["cusolver"].file_name,
"%{cudnn_lib}": cuda_libs["cudnn"].file_name,
"%{cufft_lib}": cuda_libs["cufft"].file_name,
"%{curand_lib}": cuda_libs["curand"].file_name,
"%{cupti_lib}": cuda_libs["cupti"].file_name,
"%{cuda_include_genrules}": "\n".join(genrules),
"%{cuda_headers}": ('":cuda-include",\n' +
' ":cudnn-include",')
})
is_cuda_clang = _use_cuda_clang(repository_ctx)
should_download_clang = is_cuda_clang and _flag_enabled(
repository_ctx, _TF_DOWNLOAD_CLANG)
if should_download_clang:
download_clang(repository_ctx, "crosstool/extra_tools")
# Set up crosstool/
cc = find_cc(repository_ctx)
cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
cuda_defines = {
"%{cuda_include_path}": _cuda_include_path(repository_ctx,
cuda_config),
"%{host_compiler_includes}": host_compiler_includes,
}
if is_cuda_clang:
cuda_defines["%{clang_path}"] = cc
_tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty"})
_tpl(repository_ctx, "crosstool:CROSSTOOL_clang", cuda_defines, out="crosstool/CROSSTOOL")
repository_ctx.file("crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
else:
nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" %
(cuda_config.cuda_toolkit_path,
".exe" if cuda_config.cpu_value == "Windows" else "")))
_tpl(repository_ctx, "crosstool:BUILD",
{"%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc"})
_tpl(repository_ctx, "crosstool:CROSSTOOL_nvcc", cuda_defines, out="crosstool/CROSSTOOL")
_tpl(repository_ctx,
"crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
{
"%{cpu_compiler}": str(cc),
"%{cuda_version}": cuda_config.cuda_version,
"%{nvcc_path}": nvcc_path,
"%{gcc_host_compiler_path}": str(cc),
"%{cuda_compute_capabilities}": ", ".join(
["\"%s\"" % c for c in cuda_config.compute_capabilities]),
})
# Set up cuda_config.h, which is used by
# tensorflow/stream_executor/dso_loader.cc.
_tpl(repository_ctx, "cuda:cuda_config.h",
{
"%{cuda_version}": cuda_config.cuda_version,
"%{cudnn_version}": cuda_config.cudnn_version,
"%{cuda_compute_capabilities}": ",".join(
["CudaVersion(\"%s\")" % c
for c in cuda_config.compute_capabilities]),
"%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path,
}, "cuda/cuda/cuda_config.h")
which generated shell script for the genrules, that actually do perform the symlinks. Checking those shell scripts revealed the exact same and different ordering.

Checking more carefully, one will see that the headers are discovered by _read_dir function:

find_result = _execute(
repository_ctx, ["find", src_dir, "-follow", "-type", "f"],
empty_stdout_fine=True)
result = find_result.stdout
, it does directly get the output of find. This is dependant on the ordering provided by readdir syscall.

In our case, the ordering on the filesystem before making the tar archive, and after untarring it would be different.

One simple fix for that is to force ordering the list of headers, this way we are sure the order is always the same and we are not dependant on what readdir is going to get us.

In the past, Bazel would force the ordering of the elements considered to compute the actionKey. This was removed with 0.3.0 but it might have make the issue hidden bazelbuild/bazel@9dc3211

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions