Skip to content

Patches I used for issue #1622 #1623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions common_x86_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,13 @@
#endif
*/

#define MB
#define WMB
#ifdef __GNUC__
#define MB do { __asm__ __volatile__("": : :"memory"); } while (0)
#define WMB do { __asm__ __volatile__("": : :"memory"); } while (0)
#else
#define MB do {} while (0)
#define WMB do {} while (0)
#endif

static void __inline blas_lock(volatile BLASULONG *address){

Expand Down
15 changes: 6 additions & 9 deletions driver/level3/level3_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,7 @@
#endif

typedef struct {
#if __STDC_VERSION__ >= 201112L
_Atomic
#else
volatile
#endif
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
} job_t;

Expand Down Expand Up @@ -351,7 +347,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Make sure if no one is using workspace */
START_RPCC();
for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
STOP_RPCC(waiting1);

#if defined(FUSED_GEMM) && !defined(TIMING)
Expand Down Expand Up @@ -413,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,

/* Wait until other region of B is initialized */
START_RPCC();
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;MB;};
STOP_RPCC(waiting2);

/* Apply kernel with local region of A and part of other region of B */
Expand All @@ -431,6 +427,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Clear synchronization flag if this thread is done with other region of B */
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
WMB;
}
}
} while (current != mypos);
Expand Down Expand Up @@ -492,7 +489,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
START_RPCC();
for (i = 0; i < args -> nthreads; i++) {
for (js = 0; js < DIVIDE_RATE; js++) {
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;};
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;MB;};
}
}
STOP_RPCC(waiting3);
Expand Down Expand Up @@ -658,8 +655,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
}

/* Clear synchronization flags */
for (i = 0; i < MAX_CPU_NUMBER; i++) {
for (j = 0; j < MAX_CPU_NUMBER; j++) {
for (i = 0; i < nthreads; i++) {
for (j = 0; j < nthreads; j++) {
for (k = 0; k < DIVIDE_RATE; k++) {
job[i].working[j][CACHE_LINE_SIZE * k] = 0;
}
Expand Down
4 changes: 2 additions & 2 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -1507,7 +1507,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define SYMV_P 8

#define SWITCH_RATIO 4
#define SWITCH_RATIO 32

#ifdef ARCH_X86

Expand Down Expand Up @@ -1626,7 +1626,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define SYMV_P 8

#define SWITCH_RATIO 4
#define SWITCH_RATIO 32

#ifdef ARCH_X86

Expand Down