Skip to content

Commit fe61881

Browse files
authored
Move most xfails to entries in tank/all_models.csv and temporarily remove multiprocessing and TF gpu support. (huggingface#602)
* Move most xfails to entries in tank/all_models.csv * enable usage of pytest without specifying tank/test_models.py * add dict_configs.py to gitignore. * Pin versions for runtimes and torch-mlir for setup.
1 parent 09c45bf commit fe61881

File tree

13 files changed

+134
-168
lines changed

13 files changed

+134
-168
lines changed

.github/workflows/nightly.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ jobs:
122122
continue-on-error: true
123123
run: |
124124
cd $GITHUB_WORKSPACE
125-
USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
125+
USE_IREE=1 VENV_DIR=iree.venv NIGHTLY=1 ./setup_venv.sh
126126
source iree.venv/bin/activate
127127
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
128128
SHARK_PACKAGE_VERSION=${package_version} \
@@ -146,7 +146,7 @@ jobs:
146146
if: ${{ matrix.backend == 'SHARK' }}
147147
run: |
148148
cd $GITHUB_WORKSPACE
149-
./setup_venv.sh
149+
NIGHTLY=1 ./setup_venv.sh
150150
source shark.venv/bin/activate
151151
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
152152
SHARK_PACKAGE_VERSION=${package_version} \

.github/workflows/test-models.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ jobs:
123123
echo "VULKAN SDK PATH with setup: $VULKAN_SDK"
124124
echo $PATH
125125
pip list | grep -E "torch|iree"
126-
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" tank/test_models.py -k vulkan --update_tank
126+
pytest -s --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" tank/test_models.py -k vulkan --update_tank
127127
128128
- name: Validate Vulkan Models (a100)
129129
if: matrix.suite == 'vulkan' && matrix.os != 'MacStudio'

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ cython_debug/
164164
shark_tmp/
165165
*.vmfb
166166
.use-iree
167+
tank/dict_configs.py
167168

168169
# ORT related artefacts
169170
cache_models/

build_tools/shark_versions.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# IREE Compiler/ Runtime Version:
2+
20221207.350
3+
# SHARK Compiler/ Runtime Version:
4+
20221207.236
5+
# Torch-MLIR Version for IREE:
6+
20221207.680
7+
# Torch-MLIR Version for SHARK:
8+
20221207.680

conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def pytest_addoption(parser):
3838
)
3939
parser.addoption(
4040
"--update_tank",
41-
action="store_true",
41+
action="store_false",
4242
default="False",
4343
help="Update local shark tank with latest artifacts.",
4444
)

pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[pytest]
22
addopts = --verbose -p no:warnings
3-
norecursedirs = inference tank/tflite
3+
norecursedirs = inference tank/tflite examples benchmarks shark

setup.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,18 @@
66
with open("README.md", "r", encoding="utf-8") as fh:
77
long_description = fh.read()
88

9+
with open("build_tools/shark_versions.txt", "r") as sv:
10+
lines = [line.rstrip() for line in sv]
11+
TM_VERSION = lines[7]
12+
IREE_VERSION = lines[3]
13+
14+
915
PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
1016
backend_deps = []
1117
if "NO_BACKEND" in os.environ.keys():
1218
backend_deps = [
13-
"iree-compiler>=20221022.190",
14-
"iree-runtime>=20221022.190",
19+
f"iree-compiler=={IREE_VERSION}",
20+
f"iree-runtime>={IREE_VERSION}",
1521
]
1622

1723
setup(
@@ -37,7 +43,7 @@
3743
install_requires=[
3844
"numpy",
3945
"PyYAML",
40-
"torch-mlir>=20221021.633",
46+
f"torch-mlir=={TM_VERSION}",
4147
]
4248
+ backend_deps,
4349
)

setup_venv.sh

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,15 @@ fi
7676
$PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
7777
$PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
7878
if [ "$torch_mlir_bin" = true ]; then
79+
TM_VERSION=$(sed '8q;d' build_tools/shark_versions.txt)
7980
if [[ $(uname -s) = 'Darwin' ]]; then
8081
echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch."
81-
$PYTHON -m pip install --pre --no-cache-dir torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
82-
else
82+
$PYTHON -m pip install --pre --no-cache-dir torch-mlir==${TM_VERSION} -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
83+
elif [[ ! -z "${NIGHTLY}" ]]; then
8384
$PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
84-
if [ $? -eq 0 ];then
85+
else
86+
$PYTHON -m pip install --pre torch-mlir==${TM_VERSION} -f https://llvm.github.io/torch-mlir/package-index/
87+
if [ $? -eq 0 ]; then
8588
echo "Successfully Installed torch-mlir"
8689
else
8790
echo "Could not install torch-mlir" >&2
@@ -96,13 +99,17 @@ fi
9699
if [[ -z "${USE_IREE}" ]]; then
97100
rm .use-iree
98101
RUNTIME="https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html"
102+
RUNTIME_VERSION=$(sed '4q;d' build_tools/shark_versions.txt)
103+
TM_VERSION=$(sed '8q;d' build_tools/shark_versions.txt)
99104
else
100105
touch ./.use-iree
101106
RUNTIME="https://iree-org.github.io/iree/pip-release-links.html"
107+
RUNTIME_VERSION=$(sed '2q;d' build_tools/shark_versions.txt)
108+
TM_VERSION=$(sed '6q;d' build_tools/shark_versions.txt)
102109
fi
103110
if [[ -z "${NO_BACKEND}" ]]; then
104111
echo "Installing ${RUNTIME}..."
105-
$PYTHON -m pip install --upgrade --find-links ${RUNTIME} iree-compiler iree-runtime
112+
$PYTHON -m pip install --upgrade --find-links ${RUNTIME} iree-compiler==${RUNTIME_VERSION} iree-runtime==${RUNTIME_VERSION}
106113
else
107114
echo "Not installing a backend, please make sure to add your backend to PYTHONPATH"
108115
fi
@@ -122,6 +129,7 @@ fi
122129

123130
$PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f https://download.pytorch.org/whl/nightly/torch/
124131

132+
125133
if [[ $(uname -s) = 'Linux' && ! -z "${BENCHMARK}" ]]; then
126134
$PYTHON -m pip uninstall -y torch torchvision
127135
$PYTHON -m pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu117
@@ -142,6 +150,15 @@ if [[ ! -z "${ONNX}" ]]; then
142150
fi
143151
fi
144152

153+
if [[ ! -z "${NIGHTLY}" ]]; then
154+
$PYTHON -m pip install --upgrade --pre iree-compiler iree-runtime torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f $RUNTIME -f https://download.pytorch.org/whl/nightly/torch/
155+
if [ $? -eq 0 ];then
156+
echo "Successfully Installed latest packages for nightly job."
157+
else
158+
echo "Could not install latest IREE and Torch-MLIR." >&2
159+
fi
160+
fi
161+
145162
if [[ -z "${CONDA_PREFIX}" ]]; then
146163
echo "${Green}Before running examples activate venv with:"
147164
echo " ${Green}source $VENV_DIR/bin/activate"

shark/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def dir_file(path):
108108
parser.add_argument(
109109
"--enable_conv_transform",
110110
default=False,
111-
action="store",
111+
action="store_false",
112112
help="Enables the --iree-flow-enable-conv-nchw-to-nhwc-transform flag.",
113113
)
114114

shark/shark_benchmark_runner.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def setup_cl(self, input_tensors):
100100
def benchmark_frontend(self, modelname):
101101
if self.mlir_dialect in ["linalg", "torch"]:
102102
return self.benchmark_torch(modelname)
103+
103104
elif self.mlir_dialect in ["mhlo", "tf"]:
104105
return self.benchmark_tf(modelname)
105106

@@ -138,9 +139,21 @@ def benchmark_torch(self, modelname):
138139

139140
def benchmark_tf(self, modelname):
140141
import tensorflow as tf
142+
143+
visible_default = tf.config.list_physical_devices("GPU")
144+
try:
145+
tf.config.set_visible_devices([], "GPU")
146+
visible_devices = tf.config.get_visible_devices()
147+
for device in visible_devices:
148+
assert device.device_type != "GPU"
149+
except:
150+
# Invalid device or cannot modify virtual devices once initialized.
151+
pass
152+
141153
from tank.model_utils_tf import get_tf_model
142154

143-
tf_device = "/GPU:0" if self.device == "cuda" else "/CPU:0"
155+
# tf_device = "/GPU:0" if self.device == "cuda" else "/CPU:0"
156+
tf_device = "/CPU:0"
144157
with tf.device(tf_device):
145158
model, input, = get_tf_model(
146159
modelname

shark/shark_downloader.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,7 @@ def check_dir_exists(model_name, frontend="torch", dynamic=""):
118118
and os.path.isfile(os.path.join(model_dir, "golden_out.npz"))
119119
and os.path.isfile(os.path.join(model_dir, "hash.npy"))
120120
):
121-
print(
122-
f"""Using cached models from {WORKDIR}..."""
123-
)
121+
print(f"""Using cached models from {WORKDIR}...""")
124122
return True
125123
return False
126124

tank/all_models.csv

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,34 @@
1-
resnet50,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc
2-
albert-base-v2,mhlo,tf,1e-2,1e-2,default,None
3-
roberta-base,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc
4-
bert-base-uncased,mhlo,tf,1e-2,1e-3,default,None
5-
camembert-base,mhlo,tf,1e-2,1e-3,default,None
6-
dbmdz/convbert-base-turkish-cased,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc
7-
distilbert-base-uncased,mhlo,tf,1e-2,1e-3,default,None
8-
facebook/convnext-tiny-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc,
9-
funnel-transformer/small,mhlo,tf,1e-2,1e-3,default,None
10-
google/electra-small-discriminator,mhlo,tf,1e-2,1e-3,default,None
11-
google/mobilebert-uncased,mhlo,tf,1e-2,1e-3,default,None
12-
google/vit-base-patch16-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc
13-
hf-internal-testing/tiny-random-flaubert,mhlo,tf,1e-2,1e-3,default,None
14-
microsoft/MiniLM-L12-H384-uncased,mhlo,tf,1e-2,1e-3,tf_hf,None
15-
microsoft/layoutlm-base-uncased,mhlo,tf,1e-2,1e-3,default,None
16-
microsoft/mpnet-base,mhlo,tf,1e-2,1e-2,default,None
17-
albert-base-v2,linalg,torch,1e-2,1e-3,default,None
18-
alexnet,linalg,torch,1e-2,1e-3,default,None
19-
bert-base-cased,linalg,torch,1e-2,1e-3,default,None
20-
bert-base-uncased,linalg,torch,1e-2,1e-3,default,None
21-
facebook/deit-small-distilled-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
22-
google/vit-base-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
23-
microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
24-
microsoft/MiniLM-L12-H384-uncased,linalg,torch,1e-2,1e-3,default,None
25-
microsoft/resnet-50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
26-
google/mobilebert-uncased,linalg,torch,1e-2,1e-3,default,None
27-
mobilenet_v3_small,linalg,torch,1e-1,1e-2,default,nhcw-nhwc
28-
nvidia/mit-b0,linalg,torch,1e-2,1e-3,default,None
29-
resnet101,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
30-
resnet18,linalg,torch,1e-2,1e-3,default,None
31-
resnet50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
32-
squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
33-
wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
34-
efficientnet-v2-s,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc
35-
mnasnet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
1+
resnet50,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error: mostly conv"
2+
albert-base-v2,mhlo,tf,1e-2,1e-2,default,None,False,False,False,""
3+
roberta-base,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,False,""
4+
bert-base-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
5+
camembert-base,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
6+
dbmdz/convbert-base-turkish-cased,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc,True,False,True,"https://github.com/iree-org/iree/issues/9971"
7+
distilbert-base-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
8+
facebook/convnext-tiny-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc,False,True,True,"https://github.com/nod-ai/SHARK/issues/311 & https://github.com/nod-ai/SHARK/issues/342"
9+
funnel-transformer/small,mhlo,tf,1e-2,1e-3,default,None,False,True,True,"https://github.com/nod-ai/SHARK/issues/201"
10+
google/electra-small-discriminator,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
11+
google/mobilebert-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
12+
google/vit-base-patch16-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
13+
microsoft/MiniLM-L12-H384-uncased,mhlo,tf,1e-2,1e-3,tf_hf,None,False,False,False,""
14+
microsoft/layoutlm-base-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
15+
microsoft/mpnet-base,mhlo,tf,1e-2,1e-2,default,None,False,False,False,""
16+
albert-base-v2,linalg,torch,1e-2,1e-3,default,None,False,False,False,""
17+
alexnet,linalg,torch,1e-2,1e-3,default,None,False,False,True,"Assertion Error: Zeros Output"
18+
bert-base-cased,linalg,torch,1e-2,1e-3,default,None,False,False,False,""
19+
bert-base-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,False,""
20+
facebook/deit-small-distilled-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"Fails during iree-compile."
21+
google/vit-base-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/311"
22+
microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/390"
23+
microsoft/MiniLM-L12-H384-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,True,""
24+
microsoft/resnet-50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
25+
google/mobilebert-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,True,"https://github.com/nod-ai/SHARK/issues/344"
26+
mobilenet_v3_small,linalg,torch,1e-1,1e-2,default,nhcw-nhwc,False,True,True,"https://github.com/nod-ai/SHARK/issues/388"
27+
nvidia/mit-b0,linalg,torch,1e-2,1e-3,default,None,True,True,True,"https://github.com/nod-ai/SHARK/issues/343"
28+
resnet101,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
29+
resnet18,linalg,torch,1e-2,1e-3,default,None,True,True,True,""
30+
resnet50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
31+
squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"https://github.com/nod-ai/SHARK/issues/388"
32+
wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
33+
efficientnet-v2-s,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,True,"https://github.com/nod-ai/SHARK/issues/575"
34+
mnasnet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"https://github.com/nod-ai/SHARK/issues/388"

0 commit comments

Comments
 (0)