C++ Virtual Table - Explained thru Assemblies

Categories: cpp

C++ Virtual Table

Code

C++ Sources

#include <string>
#include <iostream>

class Base {
public:
    virtual const char* Name() const {
        return "Base";
    }

    virtual const char* BaseName() const {
        return "Base";
    }

    int a = 1;
    int t = 2;
};

class DerivedC : public Base {
public:
    virtual const char* Name() const override {
        return "Derived";
    }

    int b = 3;
    double c = 4.0;
};

int main() {
    Base* base = new DerivedC;

    base->Name();
    base->BaseName();

    delete base;
    return 0;
}

Compiled Assemblies

.LC0:
        .string "Base"
Base::Name() const:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $.LC0, %eax
        popq    %rbp
        ret
Base::BaseName() const:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $.LC0, %eax
        popq    %rbp
        ret
.LC1:
        .string "Derived"
DerivedC::Name() const:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $.LC1, %eax
        popq    %rbp
        ret
Base::Base() [base object constructor]:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $vtable for Base+16, %edx
        movq    -8(%rbp), %rax
        movq    %rdx, (%rax)
        movq    -8(%rbp), %rax
        movl    $1, 8(%rax)
        movq    -8(%rbp), %rax
        movl    $2, 12(%rax)
        nop
        popq    %rbp
        ret
DerivedC::DerivedC() [base object constructor]:
        pushq   %rbp
        movq    %rsp, %rbp
        subq    $16, %rsp
        movq    %rdi, -8(%rbp)
        movq    -8(%rbp), %rax
        movq    %rax, %rdi
        call    Base::Base() [base object constructor]
        movl    $vtable for DerivedC+16, %edx
        movq    -8(%rbp), %rax
        movq    %rdx, (%rax)
        movq    -8(%rbp), %rax
        movl    $3, 16(%rax)
        movq    -8(%rbp), %rax
        movsd   .LC2(%rip), %xmm0
        movsd   %xmm0, 24(%rax)
        nop
        leave
        ret
main:
        pushq   %rbp
        movq    %rsp, %rbp
        pushq   %rbx
        subq    $24, %rsp
        movl    $32, %edi
        call    operator new(unsigned long)
        movq    %rax, %rbx
        movq    %rbx, %rdi
        call    DerivedC::DerivedC() [complete object constructor]
        movq    %rbx, -24(%rbp)
        movq    -24(%rbp), %rax
        movq    (%rax), %rax
        movq    (%rax), %rax
        movq    -24(%rbp), %rdx
        movq    %rdx, %rdi
        call    *%rax
        movq    -24(%rbp), %rax
        movq    (%rax), %rax
        addq    $8, %rax
        movq    (%rax), %rax
        movq    -24(%rbp), %rdx
        movq    %rdx, %rdi
        call    *%rax
        movq    -24(%rbp), %rax
        movl    $16, %esi
        movq    %rax, %rdi
        call    operator delete(void*, unsigned long)
        movl    $0, %eax
        addq    $24, %rsp
        popq    %rbx
        popq    %rbp
        ret
vtable for DerivedC:
        .quad   0
        .quad   typeinfo for DerivedC
        .quad   DerivedC::Name() const
        .quad   Base::BaseName() const
vtable for Base:
        .quad   0
        .quad   typeinfo for Base
        .quad   Base::Name() const
        .quad   Base::BaseName() const
typeinfo for DerivedC:
        .quad   vtable for __cxxabiv1::__si_class_type_info+16
        .quad   typeinfo name for DerivedC
        .quad   typeinfo for Base
typeinfo name for DerivedC:
        .string "8DerivedC"
typeinfo for Base:
        .quad   vtable for __cxxabiv1::__class_type_info+16
        .quad   typeinfo name for Base
typeinfo name for Base:
        .string "4Base"
.LC2:
        .long   0
        .long   1074790400

Analysis

Base::Base()

Default constructor for Base.

Base::Base() [base object constructor]:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $vtable for Base+16, %edx
        movq    -8(%rbp), %rax
        movq    %rdx, (%rax)
        movq    -8(%rbp), %rax
        movl    $1, 8(%rax)
        movq    -8(%rbp), %rax
        movl    $2, 12(%rax)
        nop
        popq    %rbp
        ret
  1. Init Stack Frame

            pushq   %rbp
            movq    %rsp, %rbp
    
  2. Save the 1st Passed-In Parameter to Stack -8(%rbp): Address to this in %rdi

            movq    %rdi, -8(%rbp)
    
  3. Move the Virtual Table Address to %edx

            movl    $vtable for Base+16, %edx
    
            ...
    
    vtable for Base:
            .quad   0
            .quad   typeinfo for Base
            .quad   Base::Name() const      ; <-- %edx is pointed to here
            .quad   Base::BaseName() const
    

    Each .quad is data of 4 bytes, so %edx is pointed to the third line in vtable for Base (where the offset is "+16").

  4. Save the Address of Virtual Table to this

            movq    -8(%rbp), %rax
            movq    %rdx, (%rax)
    

    The 1st instruction moves the address to this to %rax.
    And the 2nd instruction moves %rdx which contains the %edx (vtable address) to the location indicated by %rax (that’s this).

  5. Assign the Member Variables

            movq    -8(%rbp), %rax
            movl    $1, 8(%rax)
    
            movq    -8(%rbp), %rax
            movl    $2, 12(%rax)
    

    The same as step 4, %rax stores the address to this, and the immediates $1 and $2 will be saved to the adjacent positions (+8, +12) in $rax (this).

  6. Restore the Stack Frame and Return

            popq    %rbp
            ret
    

Layout in Memory

this    ->      vtable for Base + 16
this+8  ->      1 (int a)
this+12 ->      2 (int t)

DerivedC::DerivedC()

Default constructor for DerivedC.

DerivedC::DerivedC() [base object constructor]:
        pushq   %rbp
        movq    %rsp, %rbp
        subq    $16, %rsp
        movq    %rdi, -8(%rbp)
        movq    -8(%rbp), %rax
        movq    %rax, %rdi
        call    Base::Base() [base object constructor]
        movl    $vtable for DerivedC+16, %edx
        movq    -8(%rbp), %rax
        movq    %rdx, (%rax)
        movq    -8(%rbp), %rax
        movl    $3, 16(%rax)
        movq    -8(%rbp), %rax
        movsd   .LC2(%rip), %xmm0
        movsd   %xmm0, 24(%rax)
        nop
        leave
        ret
  1. Init Stack Frame

            pushq   %rbp
            movq    %rsp, %rbp
    
            subq    $16, %rsp
    

    The last instruction preserves 16 bytes in stack.

    x64 needs stack to align to 16 bytes. So here rsp is subtracted by 16 bytes even though only 8 bytes are used.

  2. Save the 1st Passed-In Parameter this to Stack

            movq    %rdi, -8(%rbp)
    
  3. Set the 1st Parameter (this) for Function Call to Base::Base() and Call It

            movq    -8(%rbp), %rax
            movq    %rax, %rdi
    
            call    Base::Base() [base object constructor]
    
  4. Save Virtual Table Address to this

            movl    $vtable for DerivedC+16, %edx
            movq    -8(%rbp), %rax
            movq    %rdx, (%rax)
       
            ...
    
    vtable for DerivedC:
            .quad   0
            .quad   typeinfo for DerivedC
            .quad   DerivedC::Name() const      ; <-- %edx is pointed to here 
            .quad   Base::BaseName() const
    
  5. Assign the Member Variables

            movq    -8(%rbp), %rax
            movl    $3, 16(%rax)
            movq    -8(%rbp), %rax
            movsd   .LC2(%rip), %xmm0
            movsd   %xmm0, 24(%rax)
    
  6. Restore the Stack Frame and Return

            leave
            ret
    

    leave = mov %ebp, %esp and pop %ebp

Layout in Memory

this    ->      vtable for DerivedC + 16
this+8  ->      1 (int a from Base)
this+12 ->      2 (int t from Base)
this+16 ->      3 (int b)
this+24 ->      4.0 (double c)

Base::Name(), Base::BaseName()

virtual const char* Name() const {
    return "Base";
}

virtual const char* BaseName() const {
    return "Base";
}
.LC0:
        .string "Base"

Base::Name() const:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $.LC0, %eax
        popq    %rbp
        ret

Base::BaseName() const:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $.LC0, %eax
        popq    %rbp
        ret

They are the same.

Apart from instructions about Stack Frame, there are only 2 instructions left:

        movq    %rdi, -8(%rbp)
        movl    $.LC0, %eax
  1. Save the 1st Passed-In Parameter this to Stack
  2. Move the $.LC0 (the string "Base") to %eax as Return Value

DerivedC::Name()

virtual const char* Name() const override {
    return "Derived";
}
.LC1:
        .string "Derived"

DerivedC::Name() const:
        pushq   %rbp
        movq    %rsp, %rbp
        movq    %rdi, -8(%rbp)
        movl    $.LC1, %eax
        popq    %rbp
        ret

This is the same as Base::Name() except the return value.

main()

int main() {
    Base* base = new DerivedC;

    base->Name();
    base->BaseName();

    delete base;
    return 0;
}
main:
        pushq   %rbp
        movq    %rsp, %rbp
        pushq   %rbx
        subq    $24, %rsp

        movl    $32, %edi
        call    operator new(unsigned long)
        movq    %rax, %rbx
        movq    %rbx, %rdi
        call    DerivedC::DerivedC() [complete object constructor]
        movq    %rbx, -24(%rbp)

        movq    -24(%rbp), %rax
        movq    (%rax), %rax
        movq    (%rax), %rax
        movq    -24(%rbp), %rdx
        movq    %rdx, %rdi
        call    *%rax

        movq    -24(%rbp), %rax
        movq    (%rax), %rax
        addq    $8, %rax
        movq    (%rax), %rax
        movq    -24(%rbp), %rdx
        movq    %rdx, %rdi
        call    *%rax

        movq    -24(%rbp), %rax
        movl    $16, %esi
        movq    %rax, %rdi
        call    operator delete(void*, unsigned long)

        movl    $0, %eax
        addq    $24, %rsp
        popq    %rbx
        popq    %rbp
        ret
  1. Init Stack Frame

        pushq   %rbp
        movq    %rsp, %rbp
        pushq   %rbx
    
        subq    $24, %rsp
    
  2. Allocate DerivedC

    new DerivedC;
    
        movl    $32, %edi
        call    operator new(unsigned long)
        movq    %rax, %rbx
    

    DerivedC occupies 32 bytes, so we pass $32 into operator new(unsigned long) to allocate the memory.

    The allocated memory address is returned thru $rax, and it’s moved to %rbx by the last instruction.

  3. Construct DerivedC

    new DerivedC;
    
        movq    %rbx, %rdi
        call    DerivedC::DerivedC() [complete object constructor]
    

    Move the allocated memory address from %rbx to %rdi as the 1st parameter that will be passed into DerivedC::DerivedC().

  4. Assign the pointer to DerivedC to Base* base

    Base* base = new DerivedC;
    
        movq    %rbx, -24(%rbp)
    

    base is stored in stack: -24(%rbp).

  5. Call base->Name() Overriden Virtual Function

    base->Name();
    
        movq    -24(%rbp), %rax
        movq    (%rax), %rax
        movq    (%rax), %rax
    
        movq    -24(%rbp), %rdx
        movq    %rdx, %rdi
    
        call    *%rax
    
    1. Move Base* base to %rax.
      Now %rax is the address to allocated memory of DerivedC, which is base points to.

    2. Load (%rax) (which is *base) to %rax.
      As the memory layout of DerivedC is:

      this (base) ->      vtable for DerivedC + 16
      this+8      ->      1 (int a from Base)
      this+12     ->      2 (int t from Base)
      this+16     ->      3 (int b)
      this+24     ->      4.0 (double c)
      

      Memory at base also stores the address to vtable for DerivedC + 16.
      So, *base is the address to vtable for DerivedC + 16.

      vtable for DerivedC:
              .quad   0
              .quad   typeinfo for DerivedC
              .quad   DerivedC::Name() const      ; Here is *base
              .quad   Base::BaseName() const
      

      Now, %rax stores the address to Derived::Name() const.

    3. Load (%rax) again to %rax.
      Now, %rax is pointed to the method Derived::Name().

    4. Call It thru Method Address

              call    *%rax
      
  6. Call base->BaseName() Non-Overriden Virtual Function
    This is similar to the previous step.
    Only differences are the address we visit in virtual table and the method address in virtual table:

    1. Before loading the method address, it adds $8 to vtable address to let $rax points to the address to the 2nd method in vtable.

              movq    -24(%rbp), %rax
              movq    (%rax), %rax
      
              addq    $8, %rax    ; we visit the 2nd method in vtable
      
              movq    (%rax), %rax
              movq    -24(%rbp), %rdx
              movq    %rdx, %rdi
              call    *%rax
      
    2. Also, because BaseName() isn’t overriden by DerivedC, in virtual table, it stores the address to Base::BaseName().

          vtable for DerivedC:
              .quad   0
              .quad   typeinfo for DerivedC
              .quad   DerivedC::Name() const
              .quad   Base::BaseName() const
      
  7. Delete pointer

        movq    -24(%rbp), %rax
        movl    $16, %esi
        movq    %rax, %rdi
        call    operator delete(void*, unsigned long)
    
    1. Move Base* base to %rax
    2. Set 2nd Parameter unsigned long to $16 as the object (DerivedC) size we will delete is of 16 bytes
    3. Set 1st Parameter void* to %rax (Base* base)
    4. Call delete
  8. Set Return Value

    return 0;
    
        movl    $0, %eax
    
  9. Restore Stack Frame and Return

        addq    $24, %rsp
        popq    %rbx
        popq    %rbp
        ret