db/d73/dot_8cc_source.html

 /*

 Copyright (C) 2009-2018 VZLU Prague

 This file is part of Octave.

 Octave is free software: you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 Octave is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with Octave; see the file COPYING.  If not, see
 <https://www.gnu.org/licenses/>.

 */

 #if defined (HAVE_CONFIG_H)
 #  include "config.h"
 #endif

 #include "lo-blas-proto.h"
 #include "mx-base.h"
 #include "error.h"
 #include "defun.h"
 #include "parse.h"

 static void
 get_red_dims (const dim_vector& x, const dim_vector& y, int dim,
               dim_vector& z, F77_INT& m, F77_INT& n, F77_INT& k)
 {
   int nd = x.ndims ();
   assert (nd == y.ndims ());
   z = dim_vector::alloc (nd);
   octave_idx_type tmp_m = 1;
   octave_idx_type tmp_n = 1;
   octave_idx_type tmp_k = 1;
   for (int i = 0; i < nd; i++)
     {
       if (i < dim)
         {
           z(i) = x(i);
           tmp_m *= x(i);
         }
       else if (i > dim)
         {
           z(i) = x(i);
           tmp_n *= x(i);
         }
       else
         {
           tmp_k = x(i);
           z(i) = 1;
         }
     }

   m = octave::to_f77_int (tmp_m);
   n = octave::to_f77_int (tmp_n);
   k = octave::to_f77_int (tmp_k);
 }

 DEFUN (dot, args, ,
        doc: /* -*- texinfo -*-
 @deftypefn {} {} dot (@var{x}, @var{y}, @var{dim})
 Compute the dot product of two vectors.

 If @var{x} and @var{y} are matrices, calculate the dot products along the
 first non-singleton dimension.

 If the optional argument @var{dim} is given, calculate the dot products
 along this dimension.

 This is equivalent to
 @code{sum (conj (@var{X}) .* @var{Y}, @var{dim})},
 but avoids forming a temporary array and is faster.  When @var{X} and
 @var{Y} are column vectors, the result is equivalent to
 @code{@var{X}' * @var{Y}}.
 @seealso{cross, divergence}
 @end deftypefn */)
 {
   int nargin = args.length ();

   if (nargin < 2 || nargin > 3)
     print_usage ();

   octave_value retval;
   octave_value argx = args(0);
   octave_value argy = args(1);

   if (! argx.isnumeric () || ! argy.isnumeric ())
     error ("dot: X and Y must be numeric");

   dim_vector dimx = argx.dims ();
   dim_vector dimy = argy.dims ();
   bool match = dimx == dimy;
   if (! match && nargin == 2 && dimx.isvector () && dimy.isvector ())
     {
       // Change to column vectors.
       dimx = dimx.redim (1);
       argx = argx.reshape (dimx);
       dimy = dimy.redim (1);
       argy = argy.reshape (dimy);
       match = dimx == dimy;
     }

   if (! match)
     error ("dot: sizes of X and Y must match");

   int dim;
   if (nargin == 2)
     dim = dimx.first_non_singleton ();
   else
     dim = args(2).int_value (true) - 1;

   if (dim < 0)
     error ("dot: DIM must be a valid dimension");

   F77_INT m, n, k;
   dim_vector dimz;
   if (argx.iscomplex () || argy.iscomplex ())
     {
       if (argx.is_single_type () || argy.is_single_type ())
         {
           FloatComplexNDArray x = argx.float_complex_array_value ();
           FloatComplexNDArray y = argy.float_complex_array_value ();
           get_red_dims (dimx, dimy, dim, dimz, m, n, k);
           FloatComplexNDArray z (dimz);

           F77_XFCN (cdotc3, CDOTC3, (m, n, k,
                                      F77_CONST_CMPLX_ARG (x.data ()), F77_CONST_CMPLX_ARG (y.data ()),
                                      F77_CMPLX_ARG (z.fortran_vec ())));
           retval = z;
         }
       else
         {
           ComplexNDArray x = argx.complex_array_value ();
           ComplexNDArray y = argy.complex_array_value ();
           get_red_dims (dimx, dimy, dim, dimz, m, n, k);
           ComplexNDArray z (dimz);

           F77_XFCN (zdotc3, ZDOTC3, (m, n, k,
                                      F77_CONST_DBLE_CMPLX_ARG (x.data ()), F77_CONST_DBLE_CMPLX_ARG (y.data ()),
                                      F77_DBLE_CMPLX_ARG (z.fortran_vec ())));
           retval = z;
         }
     }
   else if (argx.isfloat () && argy.isfloat ())
     {
       if (argx.is_single_type () || argy.is_single_type ())
         {
           FloatNDArray x = argx.float_array_value ();
           FloatNDArray y = argy.float_array_value ();
           get_red_dims (dimx, dimy, dim, dimz, m, n, k);
           FloatNDArray z (dimz);

           F77_XFCN (sdot3, SDOT3, (m, n, k, x.data (), y.data (),
                                    z.fortran_vec ()));
           retval = z;
         }
       else
         {
           NDArray x = argx.array_value ();
           NDArray y = argy.array_value ();
           get_red_dims (dimx, dimy, dim, dimz, m, n, k);
           NDArray z (dimz);

           F77_XFCN (ddot3, DDOT3, (m, n, k, x.data (), y.data (),
                                    z.fortran_vec ()));
           retval = z;
         }
     }
   else
     {
       // Non-optimized evaluation.
       octave_value_list tmp;
       tmp(1) = dim + 1;
       tmp(0) = do_binary_op (octave_value::op_el_mul, argx, argy);

       tmp = octave::feval ("sum", tmp, 1);
       if (! tmp.empty ())
         retval = tmp(0);
     }

   return retval;
 }

 /*
 %!assert (dot ([1, 2], [2, 3]), 8)

 %!test
 %! x = [2, 1; 2, 1];
 %! y = [-0.5, 2; 0.5, -2];
 %! assert (dot (x, y), [0 0]);
 %! assert (dot (single (x), single (y)), single ([0 0]));

 %!test
 %! x = [1+i, 3-i; 1-i, 3-i];
 %! assert (dot (x, x), [4, 20]);
 %! assert (dot (single (x), single (x)), single ([4, 20]));

 %!test
 %! x = int8 ([1 2]);
 %! y = int8 ([2 3]);
 %! assert (dot (x, y), 8);

 %!test
 %! x = int8 ([1 2; 3 4]);
 %! y = int8 ([5 6; 7 8]);
 %! assert (dot (x, y), [26 44]);
 %! assert (dot (x, y, 2), [17; 53]);
 %! assert (dot (x, y, 3), [5 12; 21 32]);

 ## Test input validation
 %!error dot ()
 %!error dot (1)
 %!error dot (1,2,3,4)
 %!error <X and Y must be numeric> dot ({1,2}, [3,4])
 %!error <X and Y must be numeric> dot ([1,2], {3,4})
 %!error <sizes of X and Y must match> dot ([1 2], [1 2 3])
 %!error <sizes of X and Y must match> dot ([1 2]', [1 2 3]')
 %!error <sizes of X and Y must match> dot (ones (2,2), ones (2,3))
 %!error <DIM must be a valid dimension> dot ([1 2], [1 2], 0)
 */

 template <typename T>
 void
 blkmm_internal (const T& x, const T& y, T& z,
                 F77_INT m, F77_INT n, F77_INT k, F77_INT np);

 template <>
 void
 blkmm_internal (const FloatComplexNDArray& x, const FloatComplexNDArray& y,
                 FloatComplexNDArray& z,
                 F77_INT m, F77_INT n, F77_INT k, F77_INT np)
 {
   F77_XFCN (cmatm3, CMATM3, (m, n, k, np,
                              F77_CONST_CMPLX_ARG (x.data ()),
                              F77_CONST_CMPLX_ARG (y.data ()),
                              F77_CMPLX_ARG (z.fortran_vec ())));
 }

 template <>
 void
 blkmm_internal (const ComplexNDArray& x, const ComplexNDArray& y,
                 ComplexNDArray& z,
                 F77_INT m, F77_INT n, F77_INT k, F77_INT np)
 {
   F77_XFCN (zmatm3, ZMATM3, (m, n, k, np,
                              F77_CONST_DBLE_CMPLX_ARG (x.data ()),
                              F77_CONST_DBLE_CMPLX_ARG (y.data ()),
                              F77_DBLE_CMPLX_ARG (z.fortran_vec ())));
 }

 template <>
 void
 blkmm_internal (const FloatNDArray& x, const FloatNDArray& y, FloatNDArray& z,
                 F77_INT m, F77_INT n, F77_INT k, F77_INT np)
 {
   F77_XFCN (smatm3, SMATM3, (m, n, k, np,
                              x.data (), y.data (),
                              z.fortran_vec ()));
 }

 template <>
 void
 blkmm_internal (const NDArray& x, const NDArray& y, NDArray& z,
                 F77_INT m, F77_INT n, F77_INT k, F77_INT np)
 {
   F77_XFCN (dmatm3, DMATM3, (m, n, k, np,
                              x.data (), y.data (),
                              z.fortran_vec ()));
 }

 static void
 get_blkmm_dims (const dim_vector& dimx, const dim_vector& dimy,
                 F77_INT& m, F77_INT& n, F77_INT& k, F77_INT& np,
                 dim_vector& dimz)
 {
   int nd = dimx.ndims ();

   m = octave::to_f77_int (dimx(0));
   k = octave::to_f77_int (dimx(1));
   n = octave::to_f77_int (dimy(1));

   octave_idx_type tmp_np = 1;

   bool match = dimy(0) == k && nd == dimy.ndims ();

   dimz = dim_vector::alloc (nd);

   dimz(0) = m;
   dimz(1) = n;
   for (int i = 2; match && i < nd; i++)
     {
       match = match && dimx(i) == dimy(i);
       dimz(i) = dimx(i);
       tmp_np *= dimz(i);
     }

   np = octave::to_f77_int (tmp_np);

   if (! match)
     error ("blkmm: A and B dimensions don't match: (%s) and (%s)",
            dimx.str ().c_str (), dimy.str ().c_str ());
 }

 template <typename T>
 T
 do_blkmm (const octave_value& xov, const octave_value& yov)
 {
   const T x = octave_value_extract<T> (xov);
   const T y = octave_value_extract<T> (yov);
   F77_INT m, n, k, np;
   dim_vector dimz;

   get_blkmm_dims (x.dims (), y.dims (), m, n, k, np, dimz);

   T z (dimz);

   if (n != 0 && m != 0)
     blkmm_internal<T> (x, y, z, m, n, k, np);

   return z;
 }

 DEFUN (blkmm, args, ,
        doc: /* -*- texinfo -*-
 @deftypefn {} {} blkmm (@var{A}, @var{B})
 Compute products of matrix blocks.

 The blocks are given as 2-dimensional subarrays of the arrays @var{A},
 @var{B}.  The size of @var{A} must have the form @code{[m,k,@dots{}]} and
 size of @var{B} must be @code{[k,n,@dots{}]}.  The result is then of size
 @code{[m,n,@dots{}]} and is computed as follows:

 @example
 @group
 for i = 1:prod (size (@var{A})(3:end))
   @var{C}(:,:,i) = @var{A}(:,:,i) * @var{B}(:,:,i)
 endfor
 @end group
 @end example
 @end deftypefn */)
 {
   if (args.length () != 2)
     print_usage ();

   octave_value retval;

   octave_value argx = args(0);
   octave_value argy = args(1);

   if (! argx.isnumeric () || ! argy.isnumeric ())
     error ("blkmm: A and B must be numeric");

   if (argx.iscomplex () || argy.iscomplex ())
     {
       if (argx.is_single_type () || argy.is_single_type ())
         retval = do_blkmm<FloatComplexNDArray> (argx, argy);
       else
         retval = do_blkmm<ComplexNDArray> (argx, argy);
     }
   else
     {
       if (argx.is_single_type () || argy.is_single_type ())
         retval = do_blkmm<FloatNDArray> (argx, argy);
       else
         retval = do_blkmm<NDArray> (argx, argy);
     }

   return retval;
 }

 /*
 %!test
 %! x(:,:,1) = [1 2; 3 4];
 %! x(:,:,2) = [1 1; 1 1];
 %! z(:,:,1) = [7 10; 15 22];
 %! z(:,:,2) = [2 2; 2 2];
 %! assert (blkmm (x,x), z);
 %! assert (blkmm (single (x), single (x)), single (z));
 %! assert (blkmm (x, single (x)), single (z));

 %!test
 %! x(:,:,1) = [1 2; 3 4];
 %! x(:,:,2) = [1i 1i; 1i 1i];
 %! z(:,:,1) = [7 10; 15 22];
 %! z(:,:,2) = [-2 -2; -2 -2];
 %! assert (blkmm (x,x), z);
 %! assert (blkmm (single (x), single (x)), single (z));
 %! assert (blkmm (x, single (x)), single (z));

 %!test <*54261>
 %! x = ones (0, 3, 3);
 %! y = ones (3, 5, 3);
 %! z = blkmm (x,y);
 %! assert (size (z), [0, 5, 3]);
 %! x = ones (1, 3, 3);
 %! y = ones (3, 0, 3);
 %! z = blkmm (x,y);
 %! assert (size (z), [1, 0, 3]);

 ## Test input validation
 %!error blkmm ()
 %!error blkmm (1)
 %!error blkmm (1,2,3)
 %!error <A and B must be numeric> blkmm ({1,2}, [3,4])
 %!error <A and B must be numeric> blkmm ([3,4], {1,2})
 %!error <A and B dimensions don't match> blkmm (ones (2,2), ones (3,3))
 */
octave::feval
OCTINTERP_API octave_value_list feval(const std::string &name, const octave_value_list &args=octave_value_list(), int nargout=0)

dim_vector::str
std::string str(char sep='x') const
Definition: dim-vector.cc:73

get_red_dims
static void get_red_dims(const dim_vector &x, const dim_vector &y, int dim, dim_vector &z, F77_INT &m, F77_INT &n, F77_INT &k)
Definition: dot.cc:34

octave_value
Definition: ov.h:75

cmatm3
subroutine cmatm3(m, n, k, np, a, b, c)
Definition: cmatm3.f:22

ComplexNDArray
Definition: CNDArray.h:33

cdotc3
subroutine cdotc3(m, n, k, a, b, c)
Definition: cdotc3.f:22

print_usage
OCTINTERP_API void print_usage(void)
Definition: defun.cc:54

F77_DBLE_CMPLX_ARG
#define F77_DBLE_CMPLX_ARG(x)
Definition: f77-fcn.h:315

parse.h

k
for large enough k
Definition: lu.cc:617

Array::fortran_vec
const T * fortran_vec(void) const
Definition: Array.h:584

DEFUN
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:53

error
void error(const char *fmt,...)
Definition: error.cc:578

smatm3
subroutine smatm3(m, n, k, np, a, b, c)
Definition: smatm3.f:22

octave_value_list
Definition: ovl.h:39

octave_value::float_array_value
FloatNDArray float_array_value(bool frc_str_conv=false) const
Definition: ov.h:843

F77_XFCN
#define F77_XFCN(f, F, args)
Definition: f77-fcn.h:41

octave_value::op_el_mul
Definition: ov.h:108

get_blkmm_dims
static void get_blkmm_dims(const dim_vector &dimx, const dim_vector &dimy, F77_INT &m, F77_INT &n, F77_INT &k, F77_INT &np, dim_vector &dimz)
Definition: dot.cc:280

error.h

dmatm3
subroutine dmatm3(m, n, k, np, a, b, c)
Definition: dmatm3.f:22

do_binary_op
OCTINTERP_API octave_value do_binary_op(octave::type_info &ti, octave_value::binary_op op, const octave_value &a, const octave_value &b)

octave_value::is_single_type
bool is_single_type(void) const
Definition: ov.h:651

dim_vector::first_non_singleton
int first_non_singleton(int def=0) const
Definition: dim-vector.h:475

octave_value::dims
dim_vector dims(void) const
Definition: ov.h:469

mx-base.h

tmp
double tmp
Definition: data.cc:6252

retval
octave_value retval
Definition: data.cc:6246

FloatNDArray
Definition: fNDArray.h:35

zmatm3
subroutine zmatm3(m, n, k, np, a, b, c)
Definition: zmatm3.f:22

dim_vector::alloc
static dim_vector alloc(int n)
Definition: dim-vector.h:264

sdot3
subroutine sdot3(m, n, k, a, b, c)
Definition: sdot3.f:22

dim_vector::redim
dim_vector redim(int n) const
Force certain dimensionality, preserving numel ().
Definition: dim-vector.cc:233

octave_value::isfloat
bool isfloat(void) const
Definition: ov.h:654

octave_value::float_complex_array_value
FloatComplexNDArray float_complex_array_value(bool frc_str_conv=false) const
Definition: ov.h:863

lo-blas-proto.h

octave_value::reshape
octave_value reshape(const dim_vector &dv) const
Definition: ov.h:502

defun.h

F77_CMPLX_ARG
#define F77_CMPLX_ARG(x)
Definition: f77-fcn.h:309

F77_CONST_DBLE_CMPLX_ARG
#define F77_CONST_DBLE_CMPLX_ARG(x)
Definition: f77-fcn.h:318

do_blkmm
T do_blkmm(const octave_value &xov, const octave_value &yov)
Definition: dot.cc:314

dot
double dot(const ColumnVector &v1, const ColumnVector &v2)
Definition: graphics.cc:5483

F77_INT
octave_f77_int_type F77_INT
Definition: f77-fcn.h:305

blkmm_internal
void blkmm_internal(const FloatComplexNDArray &x, const FloatComplexNDArray &y, FloatComplexNDArray &z, F77_INT m, F77_INT n, F77_INT k, F77_INT np)
Definition: dot.cc:237

dim_vector::isvector
bool isvector(void) const
Definition: dim-vector.h:422

y
the element is set to zero In other the statement xample y
Definition: data.cc:5264

FloatComplexNDArray
Definition: fCNDArray.h:33

NDArray
Definition: dNDArray.h:35

octave_idx_type

nargin
args.length() nargin
Definition: file-io.cc:589

F77_CONST_CMPLX_ARG
#define F77_CONST_CMPLX_ARG(x)
Definition: f77-fcn.h:312

octave_value::iscomplex
bool iscomplex(void) const
Definition: ov.h:710

i
for i
Definition: data.cc:5264

dim_vector::ndims
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:295

octave_value::complex_array_value
ComplexNDArray complex_array_value(bool frc_str_conv=false) const
Definition: ov.h:859

dim_vector
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87

ddot3
subroutine ddot3(m, n, k, a, b, c)
Definition: ddot3.f:22

octave_value::array_value
NDArray array_value(bool frc_str_conv=false) const
Definition: ov.h:840

octave_value::isnumeric
bool isnumeric(void) const
Definition: ov.h:723

x
F77_RET_T const F77_REAL const F77_REAL F77_REAL &F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE &F77_RET_T const F77_DBLE F77_DBLE &F77_RET_T const F77_REAL F77_REAL &F77_RET_T const F77_DBLE * x
Definition: lo-slatec-proto.h:55

zdotc3
subroutine zdotc3(m, n, k, a, b, c)
Definition: zdotc3.f:22