现代cpu与奔腾4级别的差距?


基于汇编语言实现整数乘法循环很多次来看程序需要花费的时间.
有两种方法,分别是用移位指令shl和标准乘法指令mul.

实验结果如下:


 shl        mul
4GHz的奔腾4机器         6078ms    20718ms
3.3GHz的i5-4590        7300ms     8400ms

可见shl指令性能主要看cpu频率,而现在cpu的mul指令性能似乎完全能和shl媲美了.

我想问:

  1. 现在的cpu在这道程序的shl指令上好像体现不出什么优势?
  2. mul指令按传统说法要比普通指令慢得多得多,为何这里的i5的mul指令性能基本与shl无差异?

只要给个概念性的回答即可


代码如下,用于参考


 ; Comparing Multiplications         (CompareMult.asm)

; This program compares the execution times of two approaches to 
; integer multiplication: Binary shifting versus the MUL instruction.

INCLUDE Irvine32.inc

LOOP_COUNT = 0FFFFFFFFh

.data
intval DWORD 5
startTime DWORD ?

.code
main PROC

; First approach:

    call    GetMseconds ; get start time
    mov startTime,eax

    mov eax,intval  ; multiply now
    call    mult_by_shifting

    call    GetMseconds ; get stop time
    sub eax,startTime
    call    WriteDec        ; display elapsed time
    call    Crlf

; Second approach:

    call    GetMseconds ; get start time
    mov startTime,eax

    mov eax,intval
    call    mult_by_MUL

    call    GetMseconds ; get stop time
    sub eax,startTime
    call    WriteDec        ; display elapsed time
    call    Crlf

    exit
main ENDP


;---------------------------------
mult_by_shifting PROC
;
; Multiplies EAX by 36 using SHL
;    LOOP_COUNT times.
; Receives: EAX
;---------------------------------

    mov ecx,LOOP_COUNT

L1: push    eax         ; save original EAX
    mov ebx,eax
    shl eax,5
    shl ebx,2
    add eax,ebx
    pop eax         ; restore EAX
    loop    L1

    ret
mult_by_shifting ENDP


;---------------------------------
mult_by_MUL PROC
;
; Multiplies EAX by 36 using MUL
;    LOOP_COUNT times.
; Receives: EAX
;---------------------------------

    mov ecx,LOOP_COUNT

L1: push    eax         ; save original EAX
    mov ebx,36
    mul ebx
    pop eax         ; restore EAX
    loop    L1

    ret
mult_by_MUL ENDP

END main

cpu asm 指令

adghfs 10 years, 4 months ago

Your Answer