stan-dev
diff --git a/‎Jenkinsfile
Lines changed: 3 additions & 3 deletions b/‎Jenkinsfile
Lines changed: 3 additions & 3 deletions
diff --git a/‎stan/math/fwd/mat/fun/dot_product.hpp
Lines changed: 54 additions & 0 deletions b/‎stan/math/fwd/mat/fun/dot_product.hpp
Lines changed: 54 additions & 0 deletions
diff --git a/‎stan/math/opencl/copy.hpp
Lines changed: 75 additions & 4 deletions b/‎stan/math/opencl/copy.hpp
Lines changed: 75 additions & 4 deletions
@@ -11,8 +11,8 @@ def runTests(String testPath) {
 def runTestsWin(String testPath) {
     withEnv(['PATH+TBB=./lib/tbb']) {
        bat "echo $PATH"
-       bat "runTests.py -j${env.PARALLEL} ${testPath} --make-only"
-       try { bat "runTests.py -j${env.PARALLEL} ${testPath}" }
+       bat "runTests.py -j12 ${testPath} --make-only"
+       try { bat "runTests.py -j12 ${testPath}" }
        finally { junit 'test/**/*.xml' }
     }
 }
@@ -160,7 +160,7 @@ pipeline {
                         deleteDir()
                         unstash 'MathSetup'
                         sh "echo CXX=${MPICXX} >> make/local"
-                        sh "echo CXX_TYPE=gcc >> make/local"                        
+                        sh "echo CXX_TYPE=gcc >> make/local"
                         sh "echo STAN_MPI=true >> make/local"
                         runTests("test/unit")
                     }
 
@@ -157,6 +157,60 @@ inline fvar<T> dot_product(const std::vector<fvar<T> >& v1,
   return ret;
 }
 
+/**
+ * Return dot product of specified pointers up to specified length.
+ *
+ * @tparam T type of scalar within fvar
+ * @param v1 pointer to first sequence
+ * @param v2 pointer second sequence
+ * @param length number of elements to multiply from each sequence
+ * @return dot product of sequences up to length
+ */
+template <typename T>
+inline fvar<T> dot_product(const fvar<T>* v1, const fvar<T>* v2,
+                           size_type length) {
+  fvar<T> y = 0;
+  for (size_t i = 0; i < length; ++i)
+    y += v1[i] * v2[i];
+  return y;
+}
+
+/**
+ * Return dot product of specified pointers up to specified length.
+ *
+ * @tparam T type of scalar within fvar
+ * @param v1 pointer to first sequence
+ * @param v2 pointer second sequence
+ * @param length number of elements to multiply from each sequence
+ * @return dot product of sequences up to length
+ */
+template <typename T>
+inline fvar<T> dot_product(const double* v1, const fvar<T>* v2,
+                           size_type length) {
+  fvar<T> y = 0;
+  for (size_t i = 0; i < length; ++i)
+    y += v1[i] * v2[i];
+  return y;
+}
+
+/**
+ * Return dot product of specified pointers up to specified length.
+ *
+ * @tparam T type of scalar within fvar
+ * @param v1 pointer to first sequence
+ * @param v2 pointer second sequence
+ * @param length number of elements to multiply from each sequence
+ * @return dot product of sequences up to length
+ */
+template <typename T>
+inline fvar<T> dot_product(const fvar<T>* v1, const double* v2,
+                           size_type length) {
+  fvar<T> y = 0;
+  for (size_t i = 0; i < length; ++i)
+    y += v1[i] * v2[i];
+  return y;
+}
+
 }  // namespace math
 }  // namespace stan
 #endif
@@ -36,7 +36,24 @@ namespace math {
 template <typename Mat, typename Mat_scalar = scalar_type_t<Mat>,
           require_eigen_t<Mat>...>
 inline matrix_cl<Mat_scalar> to_matrix_cl(Mat&& src) {
-  return matrix_cl<Mat_scalar>(src);
+  matrix_cl<Mat_scalar> dst(src.rows(), src.cols());
+  if (src.size() == 0) {
+    return dst;
+  }
+  try {
+    cl::Event transfer_event;
+    cl::CommandQueue& queue = opencl_context.queue();
+    queue.enqueueWriteBuffer(
+        dst.buffer(),
+        opencl_context.in_order()
+            || std::is_rvalue_reference<Mat_scalar&&>::value,
+        0, sizeof(Mat_scalar) * src.size(), src.eval().data(), nullptr,
+        &transfer_event);
+    dst.add_write_event(transfer_event);
+  } catch (const cl::Error& e) {
+    check_opencl_error("copy Eigen->(OpenCL)", e);
+  }
+  return dst;
 }
 
 /**
@@ -50,7 +67,24 @@ inline matrix_cl<Mat_scalar> to_matrix_cl(Mat&& src) {
 template <typename Vec, typename Vec_scalar = scalar_type_t<Vec>,
           require_std_vector_t<Vec>...>
 inline matrix_cl<Vec_scalar> to_matrix_cl(Vec&& src) {
-  return matrix_cl<Vec_scalar>(src);
+  matrix_cl<Vec_scalar> dst(src.size(), 1);
+  if (src.size() == 0) {
+    return dst;
+  }
+  try {
+    cl::Event transfer_event;
+    cl::CommandQueue& queue = opencl_context.queue();
+    queue.enqueueWriteBuffer(
+        dst.buffer(),
+        opencl_context.in_order()
+            || std::is_rvalue_reference<Vec_scalar&&>::value,
+        0, sizeof(Vec_scalar) * src.size(), src.data(), nullptr,
+        &transfer_event);
+    dst.add_write_event(transfer_event);
+  } catch (const cl::Error& e) {
+    check_opencl_error("copy Eigen->(OpenCL)", e);
+  }
+  return dst;
 }
 
 /**
@@ -181,7 +215,28 @@ inline matrix_cl<Vec_scalar> packed_copy(Vec&& src, int rows) {
  */
 template <typename T, typename = require_arithmetic_t<T>>
 inline matrix_cl<T> copy_cl(const matrix_cl<T>& src) {
-  return matrix_cl<T>(src);
+  matrix_cl<T> dst(src.rows(), src.cols(), src.view());
+  if (src.size() == 0) {
+    return dst;
+  }
+  try {
+    /**
+     * Copies the contents of the src buffer to the dst buffer
+     * see the matrix_cl(matrix_cl&) constructor
+     *  for explanation
+     */
+    cl::CommandQueue queue = opencl_context.queue();
+    const std::vector<cl::Event> mat_events
+        = vec_concat(dst.read_write_events(), src.write_events());
+    cl::Event copy_event;
+    queue.enqueueCopyBuffer(src.buffer(), dst.buffer(), 0, 0,
+                            sizeof(T) * src.size(), &mat_events, &copy_event);
+    dst.add_write_event(copy_event);
+    src.add_read_event(copy_event);
+  } catch (const cl::Error& e) {
+    check_opencl_error("copy_cl (OpenCL)->(OpenCL)", e);
+  }
+  return dst;
 }
 
 /**
@@ -218,7 +273,23 @@ inline T from_matrix_cl_error_code(const matrix_cl<T>& src) {
  */
 template <typename T, typename = require_arithmetic_t<std::decay_t<T>>>
 inline matrix_cl<std::decay_t<T>> to_matrix_cl(T&& src) {
-  return matrix_cl<std::decay_t<T>>(src);
+  matrix_cl<std::decay_t<T>> dst(1, 1);
+  check_size_match("to_matrix_cl ((OpenCL) -> (OpenCL))", "src.rows()",
+                   dst.rows(), "dst.rows()", 1);
+  check_size_match("to_matrix_cl ((OpenCL) -> (OpenCL))", "src.cols()",
+                   dst.cols(), "dst.cols()", 1);
+  try {
+    cl::Event copy_event;
+    const cl::CommandQueue queue = opencl_context.queue();
+    queue.enqueueWriteBuffer(
+        dst.buffer(),
+        opencl_context.in_order() || std::is_rvalue_reference<T&&>::value, 0,
+        sizeof(std::decay_t<T>), &src, &dst.write_events(), &copy_event);
+    dst.add_write_event(copy_event);
+  } catch (const cl::Error& e) {
+    check_opencl_error("to_matrix_cl (OpenCL)->(OpenCL)", e);
+  }
+  return dst;
 }
 
 }  // namespace math
Original file line number	Diff line number	Diff line change
`@@ -11,8 +11,8 @@ def runTests(String testPath) {`
`11`	`11`	`def runTestsWin(String testPath) {`
`12`	`12`	`withEnv(['PATH+TBB=./lib/tbb']) {`
`13`	`13`	`bat "echo $PATH"`
`14`		`- bat "runTests.py -j${env.PARALLEL} ${testPath} --make-only"`
`15`		`- try { bat "runTests.py -j${env.PARALLEL} ${testPath}" }`
	`14`	`+ bat "runTests.py -j12 ${testPath} --make-only"`
	`15`	`+ try { bat "runTests.py -j12 ${testPath}" }`
`16`	`16`	`finally { junit 'test/*/.xml' }`
`17`	`17`	`}`
`18`	`18`	`}`
`@@ -160,7 +160,7 @@ pipeline {`
`160`	`160`	`deleteDir()`
`161`	`161`	`unstash 'MathSetup'`
`162`	`162`	`sh "echo CXX=${MPICXX} >> make/local"`
`163`		`- sh "echo CXX_TYPE=gcc >> make/local"`
	`163`	`+ sh "echo CXX_TYPE=gcc >> make/local"`
`164`	`164`	`sh "echo STAN_MPI=true >> make/local"`
`165`	`165`	`runTests("test/unit")`
`166`	`166`	`}`