Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# installTensorFlowTX2
April 1, 2017
Install TensorFlow v1.0.1 on NVIDIA Jetson TX2 Development Kit
Install TensorFlow v1.2.0 on NVIDIA Jetson TX2 Development Kit

Jetson TX2 is flashed with JetPack 3.0 which installs:
* L4T 27.1 an Ubuntu 16.04 64-bit variant (aarch64)
Expand All @@ -16,6 +16,7 @@ Note: This procedure was derived from these discussion threads:
<li>https://github.com/tensorflow/tensorflow/issues/851</li>
<li>http://stackoverflow.com/questions/39783919/tensorflow-on-nvidia-tx1/</li>
<li>https://devtalk.nvidia.com/default/topic/1000717/tensorflow-on-jetson-tx2/</li>
<li>https://github.com/tensorflow/tensorflow/issues/9697</li>
</ul>

TensorFlow should be built in the following order:
Expand All @@ -27,7 +28,7 @@ Installs Java and other dependencies needed. Also builds:
Builds version 0.4.5. Includes patches for compiling under aarch64.

#### cloneTensorFlow.sh
Git clones v1.0.1 from the TensorFlow repository and patches the source code for aarch64
Git clones v1.2.0 from the TensorFlow repository and patches the source code for aarch64

#### setTensorFlowEV.sh
Sets up the TensorFlow environment variables. This script will ask for the default python library path. There are many settings to chose from, the script picks the usual suspects. Uses python 2.7.
Expand Down
2 changes: 1 addition & 1 deletion cloneTensorFlow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ INSTALL_DIR=$PWD
cd $HOME
git clone https://github.com/tensorflow/tensorflow.git
cd tensorflow
git checkout v1.0.1
git checkout v1.2.0
patch -p1 < $INSTALL_DIR/patches/tensorflow.patch


Expand Down
2 changes: 1 addition & 1 deletion packageTensorFlow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
# NVIDIA Jetson TX1
cd $HOME/tensorflow
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
mv /tmp/tensorflow_pkg/tensorflow-1.0.1-cp27-cp27mu-linux_aarch64.whl $HOME/
mv /tmp/tensorflow_pkg/tensorflow-1.2.0-cp27-cp27mu-linux_aarch64.whl $HOME/

95 changes: 20 additions & 75 deletions patches/tensorflow.patch
Original file line number Diff line number Diff line change
@@ -1,75 +1,20 @@
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 2e04827..9d81923 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1184,7 +1184,7 @@ tf_kernel_libraries(
"segment_reduction_ops",
"scan_ops",
"sequence_ops",
- "sparse_matmul_op",
+ #DC "sparse_matmul_op",
],
deps = [
":bounds_check",
diff --git a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
index 02058a8..880a0c3 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
@@ -43,8 +43,14 @@ struct BatchSelectFunctor<GPUDevice, T> {
const int all_but_batch = then_flat_outer_dims.dimension(1);

#if !defined(EIGEN_HAS_INDEX_LIST)
- Eigen::array<int, 2> broadcast_dims{{ 1, all_but_batch }};
- Eigen::Tensor<int, 2>::Dimensions reshape_dims{{ batch, 1 }};
+ // Eigen::array<int, 2> broadcast_dims{{ 1, all_but_batch }};
+ Eigen::array<int, 2> broadcast_dims;
+ broadcast_dims[0] = 1;
+ broadcast_dims[1] = all_but_batch;
+ // Eigen::Tensor<int, 2>::Dimensions reshape_dims{{ batch, 1 }};
+ Eigen::Tensor<int, 2>::Dimensions reshape_dims;
+ reshape_dims[0] = batch;
+ reshape_dims[1] = 1;
#else
Eigen::IndexList<Eigen::type2index<1>, int> broadcast_dims;
broadcast_dims.set(1, all_but_batch);
diff --git a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
index a177696..28d2f59 100644
--- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
@@ -104,9 +104,17 @@ struct SparseTensorDenseMatMulFunctor<GPUDevice, T, ADJ_A, ADJ_B> {
int n = (ADJ_B) ? b.dimension(0) : b.dimension(1);

#if !defined(EIGEN_HAS_INDEX_LIST)
- Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz{{ 1, nnz }};
- Eigen::array<int, 2> n_by_1{{ n, 1 }};
- Eigen::array<int, 1> reduce_on_rows{{ 0 }};
+ // Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz{{ 1, nnz }};
+ Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz;
+ matrix_1_by_nnz[0] = 1;
+ matrix_1_by_nnz[1] = nnz;
+ // Eigen::array<int, 2> n_by_1{{ n, 1 }};
+ Eigen::array<int, 2> n_by_1;
+ n_by_1[0] = n;
+ n_by_1[1] = 1;
+ // Eigen::array<int, 1> reduce_on_rows{{ 0 }};
+ Eigen::array<int, 1> reduce_on_rows;
+ reduce_on_rows[0]= 0;
#else
Eigen::IndexList<Eigen::type2index<1>, int> matrix_1_by_nnz;
matrix_1_by_nnz.set(1, nnz);
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index b2da109..8ee1f3a 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -870,7 +870,10 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; }
// For anything more complicated/prod-focused than this, you'll likely want to
// turn to gsys' topology modeling.
static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
-#if defined(__APPLE__)
+#ifdef __aarch64__
+ LOG(INFO) << "ARM64 does not support NUMA - returning NUMA node zero";
+ return 0;
+#elif defined(__APPLE__)
LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero";
return 0;
#elif defined(PLATFORM_WINDOWS)
diff --git a/a/tensorflow/workspace.bzl b/b/tensorflow/workspace.bzl
index 2a206b0..d785473 100644
--- a/a/tensorflow/workspace.bzl
+++ b/b/tensorflow/workspace.bzl
@@ -150,11 +150,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
native.new_http_archive(
name = "eigen_archive",
urls = [
- "http://mirror.bazel.build/bitbucket.org/eigen/eigen/get/f3a22f35b044.tar.gz",
- "https://bitbucket.org/eigen/eigen/get/f3a22f35b044.tar.gz",
+ "http://mirror.bazel.build/bitbucket.org/eigen/eigen/get/d781c1de9834.tar.gz",
+ "https://bitbucket.org/eigen/eigen/get/d781c1de9834.tar.gz",
],
- sha256 = "ca7beac153d4059c02c8fc59816c82d54ea47fe58365e8aded4082ded0b820c4",
- strip_prefix = "eigen-eigen-f3a22f35b044",
+ sha256 = "a34b208da6ec18fa8da963369e166e4a368612c14d956dd2f9d7072904675d9b",
+ strip_prefix = "eigen-eigen-d781c1de9834",
build_file = str(Label("//third_party:eigen.BUILD")),
)