How to multiply two integer square matrices using MSVC inline assembly in C++

138 views Asked by At

I wrote this code in C++ with inline assembly _asm for square matrix multiplication.

#include <iostream>
using namespace std;

int main() {
    int n = 2;
    int A[2][2] = { {1, 2}, {3, 4} };
    int B[2][2] = { {5, 6}, {7, 8} };
    int C[2][2] = { {0, 0}, {0, 0} };
    _asm {
        mov ecx, n
        lea esi, A
        lea edi, B
        lea edx, C
        outer_loop :
            push ecx
            mov ecx, n
            inner_loop :
                push ecx
                mov eax, [esi] ; load A[i][k]
                mov ebx, [edi] ; load B[k][j]
                imul eax, ebx ; compute A[i][k] * B[k][j]
                add [edx], eax ; accumulate result in C[i][j]
                add esi, 4 ; increment A pointer
                add edi, 4 ; increment B pointer
                add edx, 4 ; increment C pointer
                pop ecx
                loop inner_loop ; repeat for all k
            add esi, 400 ; jump to next row of A
            sub esi, ecx ; adjust A pointer
            mov eax, ecx
            imul eax, 4
            sub eax, n
            imul eax, 4
            lea edi, [edi + eax] ; jump to next column of B
            pop ecx
            loop outer_loop ; repeat for all i
    }
    cout << "Resultant matrix:" << endl;
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            cout << C[i][j] << " ";
        }
        cout << endl;
    }
    return 0;
}

It doesn’t give any errors, but the resultant matrix C isn’t correct. It gives me these elements

5 12
0 0

I tried to change the offsets, but it didn't help.

1

There are 1 answers

8
Roman On

This is a simple variant based on your code:

#include <iostream>
using namespace std;

int main() {
    int n = 2;
    int A[2][2] = { {1, 2}, {3, 4} };
    int B[2][2] = { {5, 6}, {7, 8} };
    int C[2][2] = { {0, 0}, {0, 0} };
    _asm {
            ; note that two dimensional arrays are laid out
            ; contiguously in memory, that means that it is
            ; possible to iterate through it by the pointer
            ; incremention in this order A[R0][C0] .. A[R0][CN] .. A[RN][C0] ..A[RN][CN],
            ; from this you can evaluate that pointer to A[k][n] (A + k * n + n) is equal
            ; to the poinrt of A[k+1][0] (A + (k + 1) * n + 0)

            lea esi, A;        set A pointer to &A[0][0]
            lea edx, C;        set C pointer to &C[0][0]
            mov ecx, n

idx_loop :  push ecx
            lea edi, B;         set B pointer to &B[0][0]
            mov ecx, n

kdx_loop :  push ecx
            lea ecx, [edx+8];   set ecx to &C[i][n] for cmp usage
            mov ebx, [esi];     load A[i][k]

jdx_loop :  mov eax, [edi];     load B[k][j]
            imul eax, ebx;      compute A[i][k] * B[k][j]
            add[edx], eax;      accumulate result in C[i][j]
            add edi, 4;         set B pointer to &B[k][j+1]
            add edx, 4;         set C pointer to &C[i][j+1]
            cmp ecx, edx
            jne jdx_loop;       repeat for all j - j++

            add esi, 4;         set A pointer to &A[i][k+1]
            mov ebx, edx;       save &C[i][n]
            lea edx, [ebx-8];   set C pointer to &C[i][0]
            pop ecx;
            loop kdx_loop;      repeat for all k - k++

            mov edx, ebx;       set C pointer to &C[i][0]
            pop ecx
            loop idx_loop;      repeat for all i - i++
    }
    cout << "Resultant matrix:" << endl;
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            cout << C[i][j] << " ";
        }
        cout << endl;
    }
    return 0;
}

Also take a look on this to get, why the order of cycles is swapped.