Improve docs for the TF ResNet50 example (#1472)

RobertLucian · web-flow · commit 72624c9360c0 · 2020-10-21T21:29:37.000+03:00
diff --git a/build/lint.sh b/build/lint.sh
@@ -82,6 +82,7 @@ output=$(cd "$ROOT" && find . -type f \
 ! -name "go.*" \
 ! -name "*.md" \
 ! -name ".*" \
+! -name "*.bin" \
 ! -name "Dockerfile" \
 -exec grep -L "Copyright 2020 Cortex Labs, Inc" {} \;)
 if [[ $output ]]; then
@@ -101,6 +102,7 @@ if [ "$is_release_branch" = "true" ]; then
   ! -path "./bin/*" \
   ! -path "./.git/*" \
   ! -name ".*" \
+  ! -name "*.bin" \
   -exec grep -R -A 5 -e "CORTEX_VERSION" {} \;)
   output=$(echo "$output" | grep -e "master" || true)
   if [[ $output ]]; then
@@ -119,6 +121,7 @@ if [ "$is_release_branch" = "true" ]; then
   ! -path "./bin/*" \
   ! -path "./.git/*" \
   ! -name ".*" \
+  ! -name "*.bin" \
   -exec grep -l "WARNING: you are on the master branch" {} \;)
   if [[ $output ]]; then
     echo "file(s) have the master version warning:"
@@ -132,6 +135,7 @@ if [ "$is_release_branch" = "true" ]; then
   ! -name "*.json" \
   ! -name "*.txt" \
   ! -name ".*" \
+  ! -name "*.bin" \
   -exec grep -L -e "this is an example for cortex release ${git_branch} and may not deploy correctly on other releases of cortex" {} \;)
   if [[ $output ]]; then
     echo "examples file(s) are missing appropriate version comment:"
@@ -148,6 +152,7 @@ else
   ! -name "*.json" \
   ! -name "*.txt" \
   ! -name ".*" \
+  ! -name "*.bin" \
   -exec grep -L "WARNING: you are on the master branch, please refer to the docs on the branch that matches your \`cortex version\`" {} \;)
   if [[ $output ]]; then
     echo "docs file(s) are missing appropriate version comment:"
@@ -162,6 +167,7 @@ else
   ! -name "*.json" \
   ! -name "*.txt" \
   ! -name ".*" \
+  ! -name "*.bin" \
   -exec grep -L "WARNING: you are on the master branch; please refer to examples on the branch corresponding to your \`cortex version\` (e\.g\. for version [0-9]*\.[0-9]*\.\*, run \`git checkout -b [0-9]*\.[0-9]*\` or switch to the \`[0-9]*\.[0-9]*\` branch on GitHub)" {} \;)
   if [[ $output ]]; then
     echo "example file(s) are missing version appropriate comment:"
@@ -178,6 +184,7 @@ output=$(cd "$ROOT" && find . -type f \
 ! -path "./bin/*" \
 ! -path "./.git/*" \
 ! -name ".*" \
+! -name "*.bin" \
 -exec egrep -l " +$" {} \;)
 if [[ $output ]]; then
   echo "File(s) have lines with trailing whitespace:"
@@ -193,6 +200,7 @@ output=$(cd "$ROOT" && find . -type f \
 ! -path "./bin/*" \
 ! -path "./.git/*" \
 ! -name ".*" \
+! -name "*.bin" \
 -print0 | \
 xargs -0 -L1 bash -c 'test "$(tail -c 1 "$0")" && echo "No new line at end of $0"' || true)
 if [[ $output ]]; then
@@ -208,6 +216,7 @@ output=$(cd "$ROOT" && find . -type f \
 ! -path "./bin/*" \
 ! -path "./.git/*" \
 ! -name ".*" \
+! -name "*.bin" \
 -print0 | \
 xargs -0 -L1 bash -c 'test "$(tail -c 2 "$0")" || echo "Multiple new lines at end of $0"' || true)
 if [[ $output ]]; then
@@ -223,6 +232,7 @@ output=$(cd "$ROOT" && find . -type f \
 ! -path "./bin/*" \
 ! -path "./.git/*" \
 ! -name ".*" \
+! -name "*.bin" \
 -print0 | \
 xargs -0 -L1 bash -c 'test "$(head -c 1 "$0")" || echo "New line at beginning of $0"' || true)
 if [[ $output ]]; then
diff --git a/examples/tensorflow/image-classifier-resnet50/README.md b/examples/tensorflow/image-classifier-resnet50/README.md
@@ -55,6 +55,16 @@ Then, deploy each API one at a time and check the results:
 1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 24` with the [cortex_gpu.yaml](cortex_gpu.yaml) API running on an `g4dn.xlarge` instance will get **~125 inferences/sec** with an average latency of **85 ms**. Optimizing the model with TensorRT to use FP16 on TF-serving only seems to achieve a 10% performance improvement - one thing to consider is that the TensorRT engines hadn't been built beforehand, so this might have affected the results negatively.
 1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 60` with the [cortex_gpu_server_side_batching.yaml](cortex_gpu_batch_sized.yaml) API running on an `g4dn.xlarge` instance will get **~186 inferences/sec** with an average latency of **500 ms**. This achieves a 49% higher throughput than the [cortex_gpu.yaml](cortex_gpu.yaml) API, at the expense of increased latency.
 
+Alternatively to [throughput_test.py](../../utils/throughput_test.py), the `ab` GNU utility can also be used to benchmark the API. This has the advantage that it's not as taxing on your local machine, but the disadvantage that it doesn't implement a cooldown period. You can run `ab` like this:
+
+```bash
+# for making octet-stream requests, which is the default for throughput_test script
+ab -n <number-of-requests> -c <concurrency-level> -p sample.bin -T 'application/octet-stream' -rks 120 $ENDPOINT
+
+# for making json requests, will will have lower performance because the API has to download the image every time
+ab -n <number-of-requests> -c <concurrency-level> -p sample.json -T 'application/json' -rks 120 $ENDPOINT
+```
+
 *Note: `inf1.xlarge` isn't used because the major bottleneck with `inf` instances for this example is with the CPU, and `inf1.2xlarge` has twice the amount of CPU cores for same number of Inferentia ASICs (which is 1), which translates to almost double the throughput.*
 
 ## Exporting SavedModels
diff --git a/examples/tensorflow/image-classifier-resnet50/sample.bin b/examples/tensorflow/image-classifier-resnet50/sample.bin