ret ; return
inc dest ; dest++
add dest, src ; dest += src
vfmadd132ss dest, src2, src3 ; dest = dest * src3 + src2
89 d0 mov eax, edx
48 89 d0 mov rax, rdx
66 89 d0 mov ax, dx
rax
/ eax
/ ax
...rdi
, rsi
, rbx
, rcx
, rdx
, rbp
,
rsp
, r8
-r15
xmm0
-15
, ymm0
-15
,
zmm0
-15
(or 31)...
b9 ef be ad de mov ecx, 0xdeadbeef
48 b8 ef cd ab 89 67 45
23 01 mov rax, 0x0123456789abcdef
4c 8b 05[04 f3 ff 7f] mov r8, QWORD PTR [global]
01 84 8b[10 33 fe 7f] add DWORD PTR array[rbx + 4 * rdx], eax
addr = &array[b + d * 4];
tmp = *addr;
tmp += a;
*addr = tmp;
void maxArray(double *x, const double *y)
{
for (int i = 0; i < 65536; i++)
{
if (y[i] > x[i]) x[i] = y[i];
}
}
Compiler Explorer
def test(array):
total = num_clipped = clipped_total = 0
for i in array:
total += i
if i < 128:
num_clipped += 1
clipped_total += i
return total / len(array), clipped_total / num_clipped
31 c0 xor eax, eax
f2 0f 10 04 06 movsd xmm0, QWORD PTR [rsi+rax]
66 0f 2e 04 07 ucomisd xmm0, QWORD PTR [rdi+rax]
76 05 jbe skip
f2 0f ... movsd QWORD PTR...
maxArray:
xor eax, eax
.forLoop
movsd xmm0, QWORD PTR [rsi+rax]
ucomisd xmm0, QWORD PTR [rdi+rax]
jbe skipIf
movsd QWORD PTR [rdi+rax], xmm0
.skipIf
add rax, 8
cmp rax, 524288
jne forLoop
ret
clear(rax)
tmp = rsi + rax; xmm0 = rd64(tmp) // µ
tmp = rdi + rax; tmp = rd64(tmp);
flags = compare(xmm0, tmp)
if (flags.be) goto skipIf
tmp = rdi + rax; wr64(tmp, xmm0)
rax = rax + 8
flags = compare(rax, 524288);
if (flags.ne) goto forLoop // macro
rsp = rsp + 8; goto rd64(rsp - 8)
t00: clear(rax)
t08: tmp = rsi + rax; xmm0 = rd64(tmp)
t0d: tmp = rdi + rax; tmp = rd64(tmp); flags = compare(xmm0, tmp)
t12: if (flags.be) goto t19 // predicted taken
t19: rax = rax + 8
t1d: flags = compare(rax, 524288) if (flags.ne) goto t08// predicted taken
t08: tmp = rsi + rax; xmm0 = rd64(tmp)
t0d: tmp = rdi + rax; tmp = rd64(tmp); flags = compare(xmm0, tmp)
t12: if (flags.be) goto t19 // predicted not taken
t14: tmp = rdi + rax; wr64(tmp, xmm0)
t19: rax = rax + 8
xmm0 = rd64(rsi + rax)
flags = compare(xmm0, rd64(rdi + rax))
if (flags.be) goto t19
rax = rax + 8
flags = compare(rax, 524288) if (flags.ne) goto t08
xmm0 = rd64(rsi + rax)
flags = compare(xmm0, rd64(rdi + rax))
if (flags.be) goto t19
rax = rax + 8
flags = compare(rax, 524288) if (flags.ne) goto t08
xmm0 = rd64(rsi + rax)
flags = compare(xmm0, rd64(rdi + rax))
if (flags.be) goto t19
rax = rax + 8
flags = compare(rax, 524288) if (flags.ne) goto t08
xmm0 = rd64(rsi + rax)
flags = compare(xmm0, rd64(rdi + rax))
if (flags.be) goto t19
rax = rax + 8
flags = compare(rax, 524288) if (flags.ne) goto t08
xmm0_1 = rd64(rsi + rax_1)
flags_1 = compare(xmm0_1, rd64(rdi + rax_1))
if (flags_1.be) goto t19
rax_2 = rax_1 + 8
flags_2 = compare(rax_2, 524288) if (flags_2.ne) goto t08
xmm0_2 = rd64(rsi + rax_2)
flags_3 = compare(xmm0_2, rd64(rdi + rax_2))
if (flags_3.be) goto t19
rax_3 = rax_2 + 8
flags_4 = compare(rax_3, 524288) if (flags_4.ne) goto t08
xmm0_1 = rd64(rsi + rax_1)
flags_1 = compare(xmm0_1, rd64(rdi + rax_1))
if (flags_1.be) goto t19
rax_2 = rax_1 + 8
flags_2 = compare(rax_2, 524288) if (flags_2.ne) goto t08
xmm0_2 = rd64(rsi + rax_2)
flags_3 = compare(xmm0_2, rd64(rdi + rax_2))
if (flags_3.be) goto t19
rax_3 = rax_2 + 8
flags_4 = compare(rax_3, 524288) if (flags_4.ne) goto t08
Port 0 | Port 1 | Port 2 | Port 3 | Port 4 | Port 5 | Port 6 | Port 7 |
---|---|---|---|---|---|---|---|
ALU 1 |
ALU 1 |
load & addr |
load & addr |
store | ALU 1 |
ALU 1 |
addr |
vec str 3 |
vec alu 3 |
vector permute |
branch 1-2 |
||||
FPU 4 |
FPU 4 |
x87 FPU | |||||
branch 1-2 |
vec mul 5 |
PCLMUL 7 |
|||||
divide & sqrt |