define LIBFFI_ASM include <fficonfig.h> include <ffi.h> include <ffi_cfi.h> include “asmnames.h”

if defined(HAVE_AS_CFI_PSEUDO_OP)

.cfi_sections   .debug_frame

endif

ifdef X86_WIN64 define SEH(…) VA_ARGS define arg0 rcx define arg1 rdx define arg2 r8 define arg3 r9 else define SEH(…) define arg0 rdi define arg1 rsi define arg2 rdx define arg3 rcx endif

/* This macro allows the safe creation of jump tables without an

actual table.  The entry points into the table are all 8 bytes.
The use of ORG asserts that we're at the correct location.  */

/* ??? The clang assembler doesn’t handle .org with symbolic expressions. */ if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(svr4)) # define E(BASE, X) ALIGN 8 else # define E(BASE, X) ALIGN 8; ORG BASE + (X) * 8 endif

.CODE
extern PLT(C(abort)):near
extern C(ffi_closure_win64_inner):near

/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)

Bit o trickiness here -- FRAME is the base of the stack frame
for this function.  This has been allocated by ffi_call.  We also
deallocate some of the stack that has been alloca'd.  */

     ALIGN   8
     PUBLIC  C(ffi_call_win64)

     ; SEH(.safesh ffi_call_win64)

C(ffi_call_win64) proc SEH(frame)

cfi_startproc
/* Set up the local stack frame and install it in rbp/rsp.  */
mov     RAX, [RSP] ;    movq    (%rsp), %rax
mov [arg1], RBP ; movq  %rbp, (arg1)
mov [arg1 + 8], RAX;    movq    %rax, 8(arg1)
mov      RBP, arg1; movq        arg1, %rbp
cfi_def_cfa(rbp, 16)
cfi_rel_offset(rbp, 0)
SEH(.pushreg rbp)
SEH(.setframe rbp, 0)
SEH(.endprolog)
mov     RSP, arg0 ;     movq    arg0, %rsp

mov     R10, arg2 ; movq        arg2, %r10

/* Load all slots into both general and xmm registers.  */
mov     RCX, [RSP] ;    movq    (%rsp), %rcx
movsd XMM0, qword ptr [RSP] ; movsd     (%rsp), %xmm0
mov     RDX, [RSP + 8] ;movq    8(%rsp), %rdx
movsd XMM1, qword ptr [RSP + 8];        movsd   8(%rsp), %xmm1
mov R8, [RSP + 16] ; movq       16(%rsp), %r8
movsd   XMM2, qword ptr [RSP + 16] ; movsd      16(%rsp), %xmm2
mov     R9, [RSP + 24] ; movq   24(%rsp), %r9
movsd   XMM3, qword ptr [RSP + 24] ;movsd       24(%rsp), %xmm3

CALL qword ptr [RBP + 16] ; call        *16(%rbp)

mov      ECX, [RBP + 24] ; movl 24(%rbp), %ecx
mov     R8, [RBP + 32] ; movq   32(%rbp), %r8
LEA     R10, ffi_call_win64_tab ; leaq  0f(%rip), %r10
CMP     ECX, FFI_TYPE_SMALL_STRUCT_4B ; cmpl    $FFI_TYPE_SMALL_STRUCT_4B, %ecx
LEA     R10, [R10 + RCX*8] ; leaq       (%r10, %rcx, 8), %r10
JA      L99 ; ja        99f
JMP     R10 ; jmp       *%r10

/* Below, we’re space constrained most of the time. Thus we eschew the

modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */

epilogue macro

LEAVE
cfi_remember_state
cfi_def_cfa(rsp, 8)
cfi_restore(rbp)
RET
cfi_restore_state

endm

ALIGN 8

ffi_call_win64_tab LABEL NEAR E(0b, FFI_TYPE_VOID)

epilogue

E(0b, FFI_TYPE_INT)

movsxd rax, eax ; movslq        %eax, %rax
mov qword ptr [r8], rax; movq   %rax, (%r8)
epilogue

E(0b, FFI_TYPE_FLOAT)

movss dword ptr [r8], xmm0 ; movss      %xmm0, (%r8)
epilogue

E(0b, FFI_TYPE_DOUBLE)

movsd qword ptr[r8], xmm0; movsd        %xmm0, (%r8)
epilogue

// FFI_TYPE_LONGDOUBLE may be FFI_TYPE_DOUBLE but we need a different value here. E(0b, FFI_TYPE_DOUBLE + 1)

call    PLT(C(abort))

E(0b, FFI_TYPE_UINT8)

movzx eax, al ;movzbl   %al, %eax
mov qword ptr[r8], rax; movq    %rax, (%r8)
epilogue

E(0b, FFI_TYPE_SINT8)

movsx rax, al ; movsbq  %al, %rax
jmp     L98

E(0b, FFI_TYPE_UINT16)

movzx eax, ax ; movzwl  %ax, %eax
mov qword ptr[r8], rax; movq    %rax, (%r8)
epilogue

E(0b, FFI_TYPE_SINT16)

movsx rax, ax; movswq   %ax, %rax
jmp     L98

E(0b, FFI_TYPE_UINT32)

mov eax, eax; movl      %eax, %eax
mov qword ptr[r8], rax ; movq   %rax, (%r8)
epilogue

E(0b, FFI_TYPE_SINT32)

movsxd rax, eax; movslq %eax, %rax
mov qword ptr [r8], rax; movq   %rax, (%r8)
epilogue

E(0b, FFI_TYPE_UINT64) L98 LABEL near

mov qword ptr [r8], rax ; movq  %rax, (%r8)
epilogue

E(0b, FFI_TYPE_SINT64)

mov qword ptr [r8], rax;movq    %rax, (%r8)
epilogue

E(0b, FFI_TYPE_STRUCT)

epilogue

E(0b, FFI_TYPE_POINTER)

mov qword ptr [r8], rax ;movq   %rax, (%r8)
epilogue

E(0b, FFI_TYPE_COMPLEX)

call    PLT(C(abort))

E(0b, FFI_TYPE_SMALL_STRUCT_1B)

mov byte ptr [r8], al ; movb    %al, (%r8)
epilogue

E(0b, FFI_TYPE_SMALL_STRUCT_2B)

mov word ptr [r8], ax ; movw    %ax, (%r8)
epilogue

E(0b, FFI_TYPE_SMALL_STRUCT_4B)

mov dword ptr [r8], eax ; movl  %eax, (%r8)
epilogue

align   8

L99 LABEL near

call    PLT(C(abort))

epilogue

cfi_endproc
C(ffi_call_win64) endp

/* 32 bytes of outgoing register stack space, 8 bytes of alignment,

16 bytes of result, 32 bytes of xmm registers.  */

define ffi_clo_FS (32+8+16+32) define ffi_clo_OFF_R (32+8) define ffi_clo_OFF_X (32+8+16)

align   8
PUBLIC  C(ffi_go_closure_win64)

C(ffi_go_closure_win64) proc

cfi_startproc
/* Save all integer arguments into the incoming reg stack space.  */
mov qword ptr [rsp + 8], rcx; movq      %rcx, 8(%rsp)
mov qword ptr [rsp + 16], rdx; movq     %rdx, 16(%rsp)
mov qword ptr [rsp + 24], r8; movq      %r8, 24(%rsp)
mov qword ptr [rsp + 32], r9 ;movq      %r9, 32(%rsp)

mov rcx, qword ptr [r10 + 8]; movq      8(%r10), %rcx                   /* load cif */
mov rdx, qword ptr [r10 + 16];  movq    16(%r10), %rdx                  /* load fun */
mov r8, r10 ; movq      %r10, %r8                       /* closure is user_data */
jmp     ffi_closure_win64_2
cfi_endproc
C(ffi_go_closure_win64) endp

align   8

PUBLIC C(ffi_closure_win64) C(ffi_closure_win64) PROC FRAME

cfi_startproc
/* Save all integer arguments into the incoming reg stack space.  */
mov qword ptr [rsp + 8], rcx; movq      %rcx, 8(%rsp)
mov qword ptr [rsp + 16], rdx;  movq    %rdx, 16(%rsp)
mov qword ptr [rsp + 24], r8;   movq    %r8, 24(%rsp)
mov qword ptr [rsp + 32], r9;   movq    %r9, 32(%rsp)

mov rcx, qword ptr [FFI_TRAMPOLINE_SIZE + r10]  ;movq   FFI_TRAMPOLINE_SIZE(%r10), %rcx         /* load cif */
mov rdx, qword ptr [FFI_TRAMPOLINE_SIZE + 8 + r10] ;    movq    FFI_TRAMPOLINE_SIZE+8(%r10), %rdx       /* load fun */
mov r8, qword ptr [FFI_TRAMPOLINE_SIZE+16+r10] ;movq    FFI_TRAMPOLINE_SIZE+16(%r10), %r8       /* load user_data */

ffi_closure_win64_2 LABEL near

sub rsp, ffi_clo_FS ;subq       $ffi_clo_FS, %rsp
cfi_adjust_cfa_offset(ffi_clo_FS)
SEH(.allocstack ffi_clo_FS)
SEH(.endprolog)

/* Save all sse arguments into the stack frame.  */
movsd qword ptr [ffi_clo_OFF_X + rsp], xmm0     ; movsd %xmm0, ffi_clo_OFF_X(%rsp)
movsd qword ptr [ffi_clo_OFF_X+8+rsp], xmm1 ; movsd     %xmm1, ffi_clo_OFF_X+8(%rsp)
movsd qword ptr [ffi_clo_OFF_X+16+rsp], xmm2 ; movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
movsd qword ptr [ffi_clo_OFF_X+24+rsp], xmm3 ; movsd %xmm3, ffi_clo_OFF_X+24(%rsp)

lea     r9, [ffi_clo_OFF_R + rsp] ; leaq        ffi_clo_OFF_R(%rsp), %r9
call C(ffi_closure_win64_inner)

/* Load the result into both possible result registers.  */

mov rax, qword ptr [ffi_clo_OFF_R + rsp] ;movq    ffi_clo_OFF_R(%rsp), %rax
movsd xmm0, qword ptr [rsp + ffi_clo_OFF_R] ;movsd   ffi_clo_OFF_R(%rsp), %xmm0

add rsp, ffi_clo_FS ;addq       $ffi_clo_FS, %rsp
cfi_adjust_cfa_offset(-ffi_clo_FS)
ret

cfi_endproc
C(ffi_closure_win64) endp

if defined __ELF__ && defined __linux__

.section        .note.GNU-stack,"",@progbits

endif _text ends end