Skip to content

Commit d2b8761

Browse files
committed
Merge pull request #338 from cuda/memory
Cleaning up native memory usage and other minor issues
2 parents 4569f51 + e1e2bb2 commit d2b8761

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2844
-8745
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ coverage.*
2121
*.psess
2222
*.vsp
2323
*.vspx
24+
*.lnt
2425

2526
# Caches
2627
_ReSharper*

MathNet.Numerics.NativeProviders.sln

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OpenBLAS", "src\NativeProvi
2626
EndProject
2727
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests-OpenBLAS", "src\UnitTests\UnitTests-OpenBLAS.csproj", "{96B903EF-3EE1-4569-803C-0482D2F5ED37}"
2828
EndProject
29+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestData", "src\TestData\TestData.csproj", "{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}"
30+
EndProject
2931
Global
3032
GlobalSection(SolutionConfigurationPlatforms) = preSolution
3133
Debug|Any CPU = Debug|Any CPU
@@ -344,6 +346,48 @@ Global
344346
{96B903EF-3EE1-4569-803C-0482D2F5ED37}.Release-Signed|Mixed Platforms.Build.0 = Release|Any CPU
345347
{96B903EF-3EE1-4569-803C-0482D2F5ED37}.Release-Signed|Win32.ActiveCfg = Release|Any CPU
346348
{96B903EF-3EE1-4569-803C-0482D2F5ED37}.Release-Signed|x64.ActiveCfg = Release|Any CPU
349+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
350+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Debug|Any CPU.Build.0 = Debug|Any CPU
351+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
352+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
353+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Debug|Win32.ActiveCfg = Debug|Any CPU
354+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Debug|x64.ActiveCfg = Debug|Any CPU
355+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release|Any CPU.ActiveCfg = Release|Any CPU
356+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release|Any CPU.Build.0 = Release|Any CPU
357+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
358+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release|Mixed Platforms.Build.0 = Release|Any CPU
359+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release|Win32.ActiveCfg = Release|Any CPU
360+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release|x64.ActiveCfg = Release|Any CPU
361+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|Any CPU.ActiveCfg = Release-Signed|Any CPU
362+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|Any CPU.Build.0 = Release-Signed|Any CPU
363+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|Mixed Platforms.ActiveCfg = Release-Signed|Any CPU
364+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|Mixed Platforms.Build.0 = Release-Signed|Any CPU
365+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|Win32.ActiveCfg = Release-Signed|Any CPU
366+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|Win32.Build.0 = Release-Signed|Any CPU
367+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|x64.ActiveCfg = Release-Signed|Any CPU
368+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-CUDA|x64.Build.0 = Release-Signed|Any CPU
369+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|Any CPU.ActiveCfg = Release-Signed|Any CPU
370+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|Any CPU.Build.0 = Release-Signed|Any CPU
371+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|Mixed Platforms.ActiveCfg = Release-Signed|Any CPU
372+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|Mixed Platforms.Build.0 = Release-Signed|Any CPU
373+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|Win32.ActiveCfg = Release-Signed|Any CPU
374+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|Win32.Build.0 = Release-Signed|Any CPU
375+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|x64.ActiveCfg = Release-Signed|Any CPU
376+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-MKL|x64.Build.0 = Release-Signed|Any CPU
377+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|Any CPU.ActiveCfg = Release-Signed|Any CPU
378+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|Any CPU.Build.0 = Release-Signed|Any CPU
379+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|Mixed Platforms.ActiveCfg = Release-Signed|Any CPU
380+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|Mixed Platforms.Build.0 = Release-Signed|Any CPU
381+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|Win32.ActiveCfg = Release-Signed|Any CPU
382+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|Win32.Build.0 = Release-Signed|Any CPU
383+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|x64.ActiveCfg = Release-Signed|Any CPU
384+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-OpenBLAS|x64.Build.0 = Release-Signed|Any CPU
385+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-Signed|Any CPU.ActiveCfg = Release-Signed|Any CPU
386+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-Signed|Any CPU.Build.0 = Release-Signed|Any CPU
387+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-Signed|Mixed Platforms.ActiveCfg = Release-Signed|Any CPU
388+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-Signed|Mixed Platforms.Build.0 = Release-Signed|Any CPU
389+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-Signed|Win32.ActiveCfg = Release-Signed|Any CPU
390+
{AF3253C9-4DB5-45A0-98CF-C105FDA9DA47}.Release-Signed|x64.ActiveCfg = Release-Signed|Any CPU
347391
EndGlobalSection
348392
GlobalSection(SolutionProperties) = preSolution
349393
HideSolutionNode = FALSE

src/NativeProviders/Common/blas.c

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#include "wrapper_common.h"
2+
#include "blas.h"
3+
4+
#if __cplusplus
5+
extern "C" {
6+
#endif
7+
DLLEXPORT void s_axpy(const blas_int n, const float alpha, const float x[], float y[]){
8+
cblas_saxpy(n, alpha, x, 1, y, 1);
9+
}
10+
11+
DLLEXPORT void d_axpy(const blas_int n, const double alpha, const double x[], double y[]){
12+
cblas_daxpy(n, alpha, x, 1, y, 1);
13+
}
14+
15+
DLLEXPORT void c_axpy(const blas_int n, const blas_complex_float alpha, const blas_complex_float x[], blas_complex_float y[]){
16+
cblas_caxpy(n, (float*)&alpha, (float*)x, 1, (float*)y, 1);
17+
}
18+
19+
DLLEXPORT void z_axpy(const blas_int n, const blas_complex_double alpha, const blas_complex_double x[], blas_complex_double y[]){
20+
cblas_zaxpy(n, (double*)&alpha, (double*)x, 1, (double*)y, 1);
21+
}
22+
23+
DLLEXPORT void s_scale(const blas_int n, const float alpha, float x[]){
24+
cblas_sscal(n, alpha, x, 1);
25+
}
26+
27+
DLLEXPORT void d_scale(const blas_int n, const double alpha, double x[]){
28+
cblas_dscal(n, alpha, x, 1);
29+
}
30+
31+
DLLEXPORT void c_scale(const blas_int n, const blas_complex_float alpha, blas_complex_float x[]){
32+
cblas_cscal(n, (float*)&alpha, (float*)x, 1);
33+
}
34+
35+
DLLEXPORT void z_scale(const blas_int n, const blas_complex_double alpha, blas_complex_double x[]){
36+
cblas_zscal(n, (double*)&alpha, (double*)x, 1);
37+
}
38+
39+
DLLEXPORT float s_dot_product(const blas_int n, const float x[], const float y[]){
40+
return cblas_sdot(n, x, 1, y, 1);
41+
}
42+
43+
DLLEXPORT double d_dot_product(const blas_int n, const double x[], const double y[]){
44+
return cblas_ddot(n, x, 1, y, 1);
45+
}
46+
47+
DLLEXPORT blas_complex_float c_dot_product(const blas_int n, const blas_complex_float x[], const blas_complex_float y[]){
48+
blas_complex_float ret;
49+
cblas_cdotu_sub(n, (float*)x, 1, (float*)y, 1, &ret);
50+
return ret;
51+
}
52+
53+
DLLEXPORT blas_complex_double z_dot_product(const blas_int n, const blas_complex_double x[], const blas_complex_double y[]){
54+
blas_complex_double ret;
55+
cblas_zdotu_sub(n, (double*)x, 1, (double*)y, 1, &ret);
56+
return ret;
57+
}
58+
59+
DLLEXPORT void s_matrix_multiply(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, const blas_int m, const blas_int n, const blas_int k, const float alpha, const float x[], const float y[], const float beta, float c[]){
60+
blas_int lda = transA == CblasNoTrans ? m : k;
61+
blas_int ldb = transB == CblasNoTrans ? k : n;
62+
63+
cblas_sgemm(CblasColMajor, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m);
64+
}
65+
66+
DLLEXPORT void d_matrix_multiply(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, const blas_int m, const blas_int n, const blas_int k, const double alpha, const double x[], const double y[], const double beta, double c[]){
67+
blas_int lda = transA == CblasNoTrans ? m : k;
68+
blas_int ldb = transB == CblasNoTrans ? k : n;
69+
70+
cblas_dgemm(CblasColMajor, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m);
71+
}
72+
73+
DLLEXPORT void c_matrix_multiply(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, const blas_int m, const blas_int n, const blas_int k, const blas_complex_float alpha, const blas_complex_float x[], const blas_complex_float y[], const blas_complex_float beta, blas_complex_float c[]){
74+
blas_int lda = transA == CblasNoTrans ? m : k;
75+
blas_int ldb = transB == CblasNoTrans ? k : n;
76+
77+
cblas_cgemm(CblasColMajor, transA, transB, m, n, k, (float*)&alpha, (float*)x, lda, (float*)y, ldb, (float*)&beta, (float*)c, m);
78+
}
79+
80+
DLLEXPORT void z_matrix_multiply(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, const blas_int m, const blas_int n, const blas_int k, const blas_complex_double alpha, const blas_complex_double x[], const blas_complex_double y[], const blas_complex_double beta, blas_complex_double c[]){
81+
blas_int lda = transA == CblasNoTrans ? m : k;
82+
blas_int ldb = transB == CblasNoTrans ? k : n;
83+
84+
cblas_zgemm(CblasColMajor, transA, transB, m, n, k, (double*)&alpha, (double*)x, lda, (double*)y, ldb, (double*)&beta, (double*)c, m);
85+
}
86+
87+
#if __cplusplus
88+
}
89+
#endif

0 commit comments

Comments
 (0)