Open
Description
Bug Description
__________________________________________________________________________________________________________________________________________________ TestAutomaticPlugin.test_mul_plugin_float_0 __________________________________________________________________________________________________________________________________________________
a = (<dynamo.automatic_plugin.test_automatic_plugin.TestAutomaticPlugin testMethod=test_mul_plugin_float_0>,), kw = {}
@wraps(func)
def standalone_func(*a, **kw):
> return func(*(a + p.args), **p.kwargs, **kw)
../.venv/lib/python3.9/site-packages/parameterized/parameterized.py:620:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
automatic_plugin/test_automatic_plugin.py:79: in test_mul_plugin_float
self.run_test(elementwise_mul(), inputs)
conversion/harness.py:482: in run_test
super().run_test(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <dynamo.automatic_plugin.test_automatic_plugin.TestAutomaticPlugin testMethod=test_mul_plugin_float_0>, mod = GraphModule()
inputs = [tensor([[1., 4., 1., ..., 3., 0., 0.],
[0., 4., 1., ..., 0., 4., 0.],
[3., 2., 1., ..., 3., 4., 3...., ..., 0., 2., 2.],
[2., 4., 1., ..., 3., 4., 1.],
[2., 4., 3., ..., 2., 0., 1.]], device='cuda:0')]
interpreter = <torch_tensorrt.dynamo.conversion._TRTInterpreter.TRTInterpreter object at 0x72c4196865b0>, rtol = 0.005, atol = 0.005, check_dtype = True
pyt_inputs = [tensor([[1., 4., 1., ..., 3., 0., 0.],
[0., 4., 1., ..., 0., 4., 0.],
[3., 2., 1., ..., 3., 4., 3...., ..., 0., 2., 2.],
[2., 4., 1., ..., 3., 4., 1.],
[2., 4., 3., ..., 2., 0., 1.]], device='cuda:0')]
rt_cls = <class 'torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule.PythonTorchTensorRTModule'>
def run_test(
self,
mod,
inputs,
interpreter,
rtol=RTOL,
atol=ATOL,
check_dtype=True,
pyt_inputs=None,
rt_cls=PythonTorchTensorRTModule,
):
with torch.no_grad():
cuda_inputs = []
for i in inputs:
cuda_inputs.append(i.cuda())
start = time.perf_counter()
interpreter_result = interpreter.run()
sec = time.perf_counter() - start
_LOGGER.info(f"Interpreter run time(s): {sec}")
trt_mod = rt_cls(
serialized_engine=interpreter_result.serialized_engine,
input_binding_names=list(interpreter_result.input_names),
output_binding_names=list(interpreter_result.output_names),
name="test_engine",
requires_output_allocator=interpreter_result.requires_output_allocator,
)
mod = mod.cuda()
if pyt_inputs is not None:
pyt_inputs_cuda = [
i.cuda() if isinstance(i, torch.Tensor) else i for i in pyt_inputs
]
ref_outputs = mod(*pyt_inputs_cuda)
else:
ref_outputs = mod(*cuda_inputs)
torch.cuda.synchronize()
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
outputs = trt_mod(*cuda_inputs)
end_event.record()
torch.cuda.synchronize()
_LOGGER.info(
f"TRT run time(s)= {(start_event.elapsed_time(end_event) * 1.0e-3)}"
)
if type(outputs) not in (list, tuple):
outputs = [outputs]
if type(ref_outputs) not in (
list,
tuple,
torch.return_types.max,
torch.return_types.min,
):
ref_outputs = [ref_outputs]
for out, ref in zip(outputs, ref_outputs):
if not isinstance(ref, torch.Tensor):
if len(out.shape) == 0:
ref = torch.tensor(ref)
else:
ref = torch.tensor([ref])
ref = ref.cpu() # to_dtype test has cases with gpu output
> torch.testing.assert_close(
out.cpu(),
ref,
rtol=rtol,
atol=atol,
equal_nan=True,
check_dtype=check_dtype,
)
E AssertionError: Tensor-likes are not close!
E
E Mismatched elements: 4096 / 4096 (100.0%)
E Greatest absolute difference: 16.200000762939453 at index (0, 3) (up to 0.005 allowed)
E Greatest relative difference: inf at index (0, 2) (up to 0.005 allowed)
E
E To execute this test, run the following from the base repo dir:
E python test_automatic_plugin.py TestAutomaticPlugin.test_mul_plugin_float_0
E
E This message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0
conversion/harness.py:248: AssertionError
__________________________________________________________________________________________________________________________________________________ TestAutomaticPlugin.test_mul_plugin_float_1 __________________________________________________________________________________________________________________________________________________
a = (<dynamo.automatic_plugin.test_automatic_plugin.TestAutomaticPlugin testMethod=test_mul_plugin_float_1>,), kw = {}
@wraps(func)
def standalone_func(*a, **kw):
> return func(*(a + p.args), **p.kwargs, **kw)
../.venv/lib/python3.9/site-packages/parameterized/parameterized.py:620:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
automatic_plugin/test_automatic_plugin.py:79: in test_mul_plugin_float
self.run_test(elementwise_mul(), inputs)
conversion/harness.py:482: in run_test
super().run_test(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <dynamo.automatic_plugin.test_automatic_plugin.TestAutomaticPlugin testMethod=test_mul_plugin_float_1>, mod = GraphModule()
inputs = [tensor([[1, 4, 1, ..., 3, 2, 4],
[4, 4, 1, ..., 1, 0, 0],
[2, 0, 2, ..., 3, 2, 3],
...,
....., 2, 1, 1],
[0, 3, 3, ..., 2, 1, 0],
[4, 4, 4, ..., 2, 4, 3]], device='cuda:0', dtype=torch.int32)]
interpreter = <torch_tensorrt.dynamo.conversion._TRTInterpreter.TRTInterpreter object at 0x72c45aa30fa0>, rtol = 0.005, atol = 0.005, check_dtype = True
pyt_inputs = [tensor([[1, 4, 1, ..., 3, 2, 4],
[4, 4, 1, ..., 1, 0, 0],
[2, 0, 2, ..., 3, 2, 3],
...,
....., 2, 1, 1],
[0, 3, 3, ..., 2, 1, 0],
[4, 4, 4, ..., 2, 4, 3]], device='cuda:0', dtype=torch.int32)]
rt_cls = <class 'torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule.PythonTorchTensorRTModule'>
def run_test(
self,
mod,
inputs,
interpreter,
rtol=RTOL,
atol=ATOL,
check_dtype=True,
pyt_inputs=None,
rt_cls=PythonTorchTensorRTModule,
):
with torch.no_grad():
cuda_inputs = []
for i in inputs:
cuda_inputs.append(i.cuda())
start = time.perf_counter()
interpreter_result = interpreter.run()
sec = time.perf_counter() - start
_LOGGER.info(f"Interpreter run time(s): {sec}")
trt_mod = rt_cls(
serialized_engine=interpreter_result.serialized_engine,
input_binding_names=list(interpreter_result.input_names),
output_binding_names=list(interpreter_result.output_names),
name="test_engine",
requires_output_allocator=interpreter_result.requires_output_allocator,
)
mod = mod.cuda()
if pyt_inputs is not None:
pyt_inputs_cuda = [
i.cuda() if isinstance(i, torch.Tensor) else i for i in pyt_inputs
]
ref_outputs = mod(*pyt_inputs_cuda)
else:
ref_outputs = mod(*cuda_inputs)
torch.cuda.synchronize()
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
outputs = trt_mod(*cuda_inputs)
end_event.record()
torch.cuda.synchronize()
_LOGGER.info(
f"TRT run time(s)= {(start_event.elapsed_time(end_event) * 1.0e-3)}"
)
if type(outputs) not in (list, tuple):
outputs = [outputs]
if type(ref_outputs) not in (
list,
tuple,
torch.return_types.max,
torch.return_types.min,
):
ref_outputs = [ref_outputs]
for out, ref in zip(outputs, ref_outputs):
if not isinstance(ref, torch.Tensor):
if len(out.shape) == 0:
ref = torch.tensor(ref)
else:
ref = torch.tensor([ref])
ref = ref.cpu() # to_dtype test has cases with gpu output
> torch.testing.assert_close(
out.cpu(),
ref,
rtol=rtol,
atol=atol,
equal_nan=True,
check_dtype=check_dtype,
)
E AssertionError: Tensor-likes are not close!
E
E Mismatched elements: 41785 / 65536 (63.8%)
E Greatest absolute difference: 16 at index (0, 3) (up to 0.005 allowed)
E Greatest relative difference: 1.0 at index (0, 0) (up to 0.005 allowed)
E
E To execute this test, run the following from the base repo dir:
E python test_automatic_plugin.py TestAutomaticPlugin.test_mul_plugin_float_1
E
E This message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0
conversion/harness.py:248: AssertionError
===========================================
To Reproduce
Steps to reproduce the behavior:
Expected behavior
Environment
Build information about Torch-TensorRT can be found by turning on debug messages
- Torch-TensorRT Version (e.g. 1.0.0):
- PyTorch Version (e.g. 1.0):
- CPU Architecture:
- OS (e.g., Linux):
- How you installed PyTorch (
conda
,pip
,libtorch
, source): - Build command you used (if compiling from source):
- Are you using local sources or building from archives:
- Python version:
- CUDA version:
- GPU models and configuration:
- Any other relevant information: