Board index » off-topic » Re: I need the fastest routine
Sasa Zeman
Delphi Developer |
Re: I need the fastest routine2008-07-20 11:57:40 PM off-topic16 Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: 8Bit MinMaxTest6.dpr Content-Type: text/x-objcsrc; name="MinMaxTest6.dpr" Content-Transfer-Encoding: 8Bit Content-Disposition: attachment; filename="MinMaxTest6.dpr" program MinMaxTest6; {$APPTYPE CONSOLE} { Modified MinMaxTest5 by Sasa Zeman Few major chages are made: - Simplifying testing code - Putting sleep(10) before starting test - Random number test - Priority set to maximum during test } uses SysUtils, Windows, SZTimer; // SZTimer can be found at www.szutils.net Delphi section // It is RDTSC and GetTickCount based timer and also have // procedures to set priority to maximum and back to original. // These procedures are used making equal environment for all tested // functions. // If there is no need for these functions, simply remarking // the lines is enough const MinInt = -MaxInt - 1; type TMinMaxArray = Procedure (aArray: Array of Integer; out aMax, aMin : integer); var A: array of Integer; procedure MinMaxArray( const aArray : Array of Integer; out aMax, aMin : integer ); var MaxArray, MinArray : array[boolean] of integer; i : integer; begin aMax :=0; aMin :=0; i:=high( aArray ); if i>=0 then begin MaxArray[True] := aArray[0]; MinArray[True] := aArray[0]; while i>0 do begin MaxArray[ aArray[i]>MaxArray[True] ] := aArray[i]; MinArray[ aArray[i] < MinArray[True] ] := aArray[i]; dec(i); end; aMax := MaxArray[True]; aMin := MinArray[True]; end; end; procedure MinMaxArray0(const aArray: array of Integer; out aMax, aMin: Integer); var I: Integer; P: PInteger; begin aMax := MinInt; aMin := MaxInt; P :=@aArray[0]; for I := 0 to High(aArray) do begin if P^>aMax then aMax := P^; if P^ < aMin then aMin := P^; Inc(P); end; end; procedure MinMaxArray0_1(const aArray: array of Integer; out aMax, aMin: Integer); TYPE tIa = PACKED ARRAY [0..7] OF INTEGER ; tpIa = ^tIa ; var I: Integer; pIa : tPIa ; iMin,iMax : INTEGER ; rm : Integer ; begin iMax := MinInt; iMin := MaxInt; pIa :=@aArray[0]; rm := High(aArray) MOD 8 ; for I := 0 to High(aArray) DIV 8 do begin if ( PIa^[0]>iMax ) OR ( PIa^[1]>iMax ) OR ( PIa^[2]>iMax ) OR ( PIa^[3]>iMax ) OR ( PIa^[4]>iMax ) OR ( PIa^[5]>iMax ) OR ( PIa^[6]>iMax ) OR ( PIa^[7]>iMax ) then BEGIN IF PIa^[0]>iMax THEN iMax := PIa^[0]; IF PIa^[1]>iMax THEN iMax := PIa^[1]; IF PIa^[2]>iMax THEN iMax := PIa^[2]; IF PIa^[3]>iMax THEN iMax := PIa^[3]; IF PIa^[4]>iMax THEN iMax := PIa^[4]; IF PIa^[5]>iMax THEN iMax := PIa^[5]; IF PIa^[6]>iMax THEN iMax := PIa^[6]; IF PIa^[7]>iMax THEN iMax := PIa^[7]; END ; if ( PIa^[0] < iMin ) OR ( PIa^[1] < iMin ) OR ( PIa^[2] < iMin ) OR ( PIa^[3] < iMin ) OR ( PIa^[4] < iMin ) OR ( PIa^[5] < iMin ) OR ( PIa^[6] < iMin ) OR ( PIa^[7] < iMin ) then BEGIN IF PIa^[0] < iMin THEN iMin := PIa^[0]; IF PIa^[1] < iMin THEN iMin := PIa^[1]; IF PIa^[2] < iMin THEN iMin := PIa^[2]; IF PIa^[3] < iMin THEN iMin := PIa^[3]; IF PIa^[4] < iMin THEN iMin := PIa^[4]; IF PIa^[5] < iMin THEN iMin := PIa^[5]; IF PIa^[6] < iMin THEN iMin := PIa^[6]; IF PIa^[7] < iMin THEN iMin := PIa^[7]; END ; Inc(PIa); end; for I := 0 to rm do begin IF PIa^[I] < iMin THEN iMin := PIa^[I]; IF PIa^[I]>iMax THEN iMax := PIa^[I]; end; aMax := iMin; aMin := iMax; end; procedure MinMaxArray1(const aArray: array of Integer; out aMax, aMin: Integer); var I: Integer; begin aMax := MinInt; aMin := MaxInt; for I := 0 to High(aArray) do begin if aArray[I]>aMax then aMax := aArray[I]; if aArray[I] < aMin then aMin := aArray[I]; end; end; procedure MinMaxArray2(const aArray: array of Integer; out aMax, aMin: Integer); var I: Integer; Value: Integer; begin aMax := MinInt; aMin := MaxInt; for I := 0 to High(aArray) do begin Value := aArray[I]; if Value>aMax then aMax := Value; if Value < aMin then aMin := Value; end; end; procedure MinMaxArray3(const aArray : Array of Integer; out aMax, aMin : integer ); var i, m, mmod, n: Integer; begin aMax := 0; aMin := 0; if Length(aArray)>0 then begin aMin := aArray[0]; aMax := aArray[0]; m := Length(aArray) div 4; mmod := Length(aArray) mod 4; n := 0; for i := 0 to m -1 do begin if (aArray[n] < aMin) or (aArray[n+1] < aMin) or (aArray[n+2] < aMin) or (aArray[n+3] < aMin) then begin if aArray[n] < aMin then aMin := aArray[n]; if aArray[n+1] < aMin then aMin := aArray[n+1]; if aArray[n+2] < aMin then aMin := aArray[n+2]; if aArray[n+3] < aMin then aMin := aArray[n+3]; end; if (aArray[n]>aMax) or (aArray[n+1]>aMax) or (aArray[n+2]>aMax) or (aArray[n+3]>aMax) then begin if aArray[n]>aMax then aMax := aArray[n]; if aArray[n+1]>aMax then aMax := aArray[n+1]; if aArray[n+2]>aMax then aMax := aArray[n+2]; if aArray[n+3]>aMax then aMax := aArray[n+3]; end; Inc(n, 4); end; for i := 0 to mmod-1 do begin if aArray[i+n] < aMin then aMin := aArray[i+n]; if aArray[i+n]>aMax then aMax := aArray[i+n]; end; end; end; procedure MinMaxArray4(const aArray: array of Integer; out aMax, aMin: Integer); var I: Integer; Value: Integer; iMin, iMax: Integer; begin iMax := MinInt; iMin := MaxInt; for I := 0 to High(aArray) do begin Value := aArray[I]; if Value>iMax then iMax := Value; if Value < iMin then iMin := Value; end; aMax := iMax; aMin := iMin; end; procedure MinMaxArray5(const aArray: array of Integer; out aMax, aMin: Integer); var LInd, LTempMin, LTempMax, LArrVal: Integer; begin LTempMin := MaxInt; LTempMax := MinInt; for LInd := 0 to High(aArray) do begin {Get the array value} LArrVal := aArray[LInd]; {Update the minimum} LTempMin := LArrVal + ((-Ord(LTempMin < LArrVal)) and (LTempMin - LArrVal)); {Update the maximum} LTempMax := LArrVal + ((-Ord(LTempMax>LArrVal)) and (LTempMax - LArrVal)); end; aMin := LTempMin; aMax := LTempMax; end; procedure MinMaxArray6(const aArray : Array of Integer; out aMax, aMin : integer ); var i, m, mmod, n, iMax, iMin: Integer; begin aMax := 0; aMin := 0; if Length(aArray)>0 then begin iMin := aArray[0]; iMax := aArray[0]; m := Length(aArray) div 4; mmod := Length(aArray) mod 4; n := 0; for i := 0 to m -1 do begin if (aArray[n] < iMin) or (aArray[n+1] < iMin) or (aArray[n+2] < iMin) or (aArray[n+3] < iMin) then begin if aArray[n] < iMin then iMin := aArray[n]; if aArray[n+1] < iMin then iMin := aArray[n+1]; if aArray[n+2] < iMin then iMin := aArray[n+2]; if aArray[n+3] < iMin then iMin := aArray[n+3]; end; if (aArray[n]>iMax) or (aArray[n+1]>iMax) or (aArray[n+2]>iMax) or (aArray[n+3]>iMax) then begin if aArray[n]>iMax then iMax := aArray[n]; if aArray[n+1]>iMax then iMax := aArray[n+1]; if aArray[n+2]>iMax then iMax := aArray[n+2]; if aArray[n+3]>iMax then iMax := aArray[n+3]; end; Inc(n, 4); end; for i := 0 to mmod-1 do begin if aArray[i+n] < iMin then iMin := aArray[i+n]; if aArray[i+n]>iMax then iMax := aArray[i+n]; end; aMax := iMax; aMin := iMin; end; end; //------------------------------------- // By Sasa Zeman //------------------------------------- procedure MinMaxArraySZ0(const aArray: array of Integer; out aMax, aMin: Integer); var I: Integer; P: PInteger; v: integer; begin aMax := MinInt; aMin := MaxInt; P :=@aArray[0]; for I := 0 to High(aArray) do begin v:=p^; if V>aMax then aMax := V; if V < aMin then aMin := V; Inc(P); end; end; procedure MinMaxArraySZ1(const aArray: array of Integer; out aMax, aMin: Integer); var I: Integer; begin aMax := MinInt; aMin := MaxInt; for I := 0 to High(aArray) do begin if aArray[i]>aMax then aMax := aArray[i]; if aArray[i] < aMin then aMin := aArray[i]; end; end; procedure MinMaxArraySZ2(const aArray: array of Integer; out aMax, aMin: Integer); var I: Integer; begin aMax := MinInt; aMin := MaxInt; i:=0; while i<= High(aArray) do begin if aArray[i]>aMax then aMax := aArray[i]; if aArray[i] < aMin then aMin := aArray[i]; inc(i) end; end; procedure MinMaxArraySZ3(const aArray: array of Integer; out aMax, aMin: Integer); var v: integer; I: Integer; begin aMax := MinInt; aMin := MaxInt; i:=0; while i<=High(aArray) do begin v:=aArray[i]; if v>aMax then aMax := v; if v < aMin then aMin := v; inc(i) end; end; procedure MinMaxArraySZ4(const aArray: array of Integer; out aMax, aMin: Integer); var i,l,l1: Integer; vMax, vMin: integer; begin vMax := MinInt; vMin := MaxInt; l:= High(aArray); l1:=(l shr 2) shl 2; i:=0; while i <= l1 do begin begin if aArray[i ]>vMax then vMax := aArray[i ]; if aArray[i+1]>vMax then vMax := aArray[i+1]; if aArray[i+2]>vMax then vMax := aArray[i+2]; if aArray[i+3]>vMax then vMax := aArray[i+3]; end; if aArray[i ] < vMin then vMin := aArray[i ]; if aArray[i+1] < vMin then vMin := aArray[i+1]; if aArray[i+2] < vMin then vMin := aArray[i+2]; if aArray[i+3] < vMin then vMin := aArray[i+3]; inc(i,4) end; while i<=l do begin if aArray[i]>aMax then aMax := aArray[i]; if aArray[i] < aMin then aMin := aArray[i]; inc(i) end; aMax := vMax; aMin := vMin; end; procedure MinMaxArraySZ5(const aArray: array of Integer; out aMax, aMin: Integer); var i,l,l1: Integer; vMax, vMin: integer; begin vMax := MinInt; vMin := MaxInt; l:= High(aArray); l1:= (l shr 3) shl 3; i:=0; while i <= l1 do begin if aArray[i ]>vMax then vMax := aArray[i ]; if aArray[i+1]>vMax then vMax := aArray[i+1]; if aArray[i+2]>vMax then vMax := aArray[i+2]; if aArray[i+3]>vMax then vMax := aArray[i+3]; if aArray[i+4]>vMax then vMax := aArray[i+4]; if aArray[i+5]>vMax then vMax := aArray[i+5]; if aArray[i+6]>vMax then vMax := aArray[i+6]; if aArray[i+7]>vMax then vMax := aArray[i+7]; if aArray[i ] < vMin then vMin := aArray[i ]; if aArray[i+1] < vMin then vMin := aArray[i+1]; if aArray[i+2] < vMin then vMin := aArray[i+2]; if aArray[i+3] < vMin then vMin := aArray[i+3]; if aArray[i+4] < vMin then vMin := aArray[i+4]; if aArray[i+5] < vMin then vMin := aArray[i+5]; if aArray[i+6] < vMin then vMin := aArray[i+6]; if aArray[i+7] < vMin then vMin := aArray[i+7]; inc(i,8) end; while i<=l do begin if aArray[i]>aMax then aMax := aArray[i]; if aArray[i] < aMin then aMin := aArray[i]; inc(i) end; aMax := vMax; aMin := vMin; end; procedure MinMaxArraySZ6(const aArray: array of Integer; out aMax, aMin: Integer); var i,l,l1: Integer; vMax, vMin: integer; begin vMax := MinInt; vMin := MaxInt; l:= High(aArray); l1:= (l shr 4) shl 4; i:=0; while i <= l1 do begin if aArray[i ]>vMax then vMax := aArray[i ]; if aArray[i+ 1]>vMax then vMax := aArray[i+ 1]; if aArray[i+ 2]>vMax then vMax := aArray[i+ 2]; if aArray[i+ 3]>vMax then vMax := aArray[i+ 3]; if aArray[i+ 4]>vMax then vMax := aArray[i+ 4]; if aArray[i+ 5]>vMax then vMax := aArray[i+ 5]; if aArray[i+ 6]>vMax then vMax := aArray[i+ 6]; if aArray[i+ 7]>vMax then vMax := aArray[i+ 7]; if aArray[i+ 8]>vMax then vMax := aArray[i+ 8]; if aArray[i+ 9]>vMax then vMax := aArray[i+ 9]; if aArray[i+10]>vMax then vMax := aArray[i+10]; if aArray[i+11]>vMax then vMax := aArray[i+11]; if aArray[i+12]>vMax then vMax := aArray[i+12]; if aArray[i+13]>vMax then vMax := aArray[i+13]; if aArray[i+14]>vMax then vMax := aArray[i+14]; if aArray[i+15]>vMax then vMax := aArray[i+15]; if aArray[i ] < vMin then vMin := aArray[i ]; if aArray[i+ 1] < vMin then vMin := aArray[i+ 1]; if aArray[i+ 2] < vMin then vMin := aArray[i+ 2]; if aArray[i+ 3] < vMin then vMin := aArray[i+ 3]; if aArray[i+ 4] < vMin then vMin := aArray[i+ 4]; if aArray[i+ 5] < vMin then vMin := aArray[i+ 5]; if aArray[i+ 6] < vMin then vMin := aArray[i+ 6]; if aArray[i+ 7] < vMin then vMin := aArray[i+ 7]; if aArray[i+ 8] < vMin then vMin := aArray[i+ 8]; if aArray[i+ 9] < vMin then vMin := aArray[i+ 9]; if aArray[i+10] < vMin then vMin := aArray[i+10]; if aArray[i+11] < vMin then vMin := aArray[i+11]; if aArray[i+12] < vMin then vMin := aArray[i+12]; if aArray[i+13] < vMin then vMin := aArray[i+13]; if aArray[i+14] < vMin then vMin := aArray[i+14]; if aArray[i+15] < vMin then vMin := aArray[i+15]; inc(i,16) end; while i<=l do begin if aArray[i]>aMax then aMax := aArray[i]; if aArray[i] < aMin then aMin := aArray[i]; inc(i) end; aMax := vMax; aMin := vMin; end; //------------------------------------- procedure MinMaxArrayGS1(const AArray: array of Integer; out AMax, AMin: integer); asm mov [ebp-$08],ecx mov [ebp-$04],eax shl edx,$02 // a better rangering operation sub edx,$1C add [ebp-$04],edx mov [ebp-$0C],1 mov ecx,0 mov edx,MaxInt @loop: cmp ecx,[eax+$00] jl @parseMax cmp ecx,[eax+$04] jl @parseMax cmp ecx,[eax+$08] jl @parseMax cmp ecx,[eax+$0C] jl @parseMax cmp ecx,[eax+$10] jl @parseMax cmp ecx,[eax+$14] jl @parseMax cmp ecx,[eax+$18] jl @parseMax cmp ecx,[eax+$1C] jl @parseMax jmp @checkMin @parseMax: cmp ecx,[eax+$00] cmovl ecx,[eax+$00] cmp ecx,[eax+$04] cmovl ecx,[eax+$04] cmp ecx,[eax+$08] cmovl ecx,[eax+$08] cmp ecx,[eax+$0C] cmovl ecx,[eax+$0C] cmp ecx,[eax+$10] cmovl ecx,[eax+$10] cmp ecx,[eax+$14] cmovl ecx,[eax+$14] cmp ecx,[eax+$18] cmovl ecx,[eax+$18] cmp ecx,[eax+$1C] cmovl ecx,[eax+$1C] @checkMin: cmp edx,[eax+$00] jg @parseMin cmp edx,[eax+$04] jg @parseMin cmp edx,[eax+$08] jg @parseMin cmp edx,[eax+$0C] jg @parseMin cmp edx,[eax+$10] jg @parseMin cmp edx,[eax+$14] jg @parseMin cmp edx,[eax+$18] jg @parseMin cmp edx,[eax+$1C] jg @parseMin jmp @loopfooter @parseMin: cmp edx,[eax+$00] cmovg edx,[eax+$00] cmp edx,[eax+$04] cmovg edx,[eax+$04] cmp edx,[eax+$08] cmovg edx,[eax+$08] cmp edx,[eax+$0C] cmovg edx,[eax+$0C] cmp edx,[eax+$10] cmovg edx,[eax+$10] cmp edx,[eax+$14] cmovg edx,[eax+$14] cmp edx,[eax+$18] cmovg edx,[eax+$18] cmp edx,[eax+$1C] cmovg edx,[eax+$1C] @loopfooter: add eax,$20 cmp eax,[ebp-$04] jl @loop mov eax,[ebp-$04] dec [ebp-$0C] jz @loop push edx mov edx,ecx mov ecx,[ebp-$08] mov [ecx],edx pop edx mov ecx,edx // I really don't know what I've doing here to change // it for [esp+$0C] on the last code .. rsrsrsr mov edx,[esp+$08] mov [edx],ecx end; procedure MinMaxArrayGS2(const AArray: array of Integer; out AMax, AMin: integer); asm mov [ebp-$08],ecx mov [ebp-$04],eax shl edx,$02 sub edx,$3C add [ebp-$04],edx mov [ebp-$0C],1 mov ecx,0 mov edx,MaxInt @loop: cmp ecx,[eax+$00] jl @parseMax cmp ecx,[eax+$04] jl @parseMax cmp ecx,[eax+$08] jl @parseMax cmp ecx,[eax+$0C] jl @parseMax cmp ecx,[eax+$10] jl @parseMax cmp ecx,[eax+$14] jl @parseMax cmp ecx,[eax+$18] jl @parseMax cmp ecx,[eax+$1C] jl @parseMax cmp ecx,[eax+$20] jl @parseMax cmp ecx,[eax+$24] jl @parseMax cmp ecx,[eax+$28] jl @parseMax cmp ecx,[eax+$2C] jl @parseMax cmp ecx,[eax+$30] jl @parseMax cmp ecx,[eax+$34] jl @parseMax cmp ecx,[eax+$38] jl @parseMax cmp ecx,[eax+$3C] jl @parseMax jmp @checkMin @parseMax: cmp ecx,[eax+$00] cmovl ecx,[eax+$00] cmp ecx,[eax+$04] cmovl ecx,[eax+$04] cmp ecx,[eax+$08] cmovl ecx,[eax+$08] cmp ecx,[eax+$0C] cmovl ecx,[eax+$0C] cmp ecx,[eax+$10] cmovl ecx,[eax+$10] cmp ecx,[eax+$14] cmovl ecx,[eax+$14] cmp ecx,[eax+$18] cmovl ecx,[eax+$18] cmp ecx,[eax+$1C] cmovl ecx,[eax+$1C] cmp ecx,[eax+$20] cmovl ecx,[eax+$20] cmp ecx,[eax+$24] cmovl ecx,[eax+$24] cmp ecx,[eax+$28] cmovl ecx,[eax+$28] cmp ecx,[eax+$2C] cmovl ecx,[eax+$2C] cmp ecx,[eax+$30] cmovl ecx,[eax+$30] cmp ecx,[eax+$34] cmovl ecx,[eax+$34] cmp ecx,[eax+$38] cmovl ecx,[eax+$38] cmp ecx,[eax+$3C] cmovl ecx,[eax+$3C] @checkMin: cmp edx,[eax+$00] jg @parseMin cmp edx,[eax+$04] jg @parseMin cmp edx,[eax+$08] jg @parseMin cmp edx,[eax+$0C] jg @parseMin cmp edx,[eax+$10] jg @parseMin cmp edx,[eax+$14] jg @parseMin cmp edx,[eax+$18] jg @parseMin cmp edx,[eax+$1C] jg @parseMin cmp edx,[eax+$20] jg @parseMin cmp edx,[eax+$24] jg @parseMin cmp edx,[eax+$28] jg @parseMin cmp edx,[eax+$2C] jg @parseMin cmp edx,[eax+$30] jg @parseMin cmp edx,[eax+$34] jg @parseMin cmp edx,[eax+$38] jg @parseMin cmp edx,[eax+$3C] jg @parseMin jmp @loopfooter @parseMin: cmp edx,[eax+$00] cmovg edx,[eax+$00] cmp edx,[eax+$04] cmovg edx,[eax+$04] cmp edx,[eax+$08] cmovg edx,[eax+$08] cmp edx,[eax+$0C] cmovg edx,[eax+$0C] cmp edx,[eax+$10] cmovg edx,[eax+$10] cmp edx,[eax+$14] cmovg edx,[eax+$14] cmp edx,[eax+$18] cmovg edx,[eax+$18] cmp edx,[eax+$1C] cmovg edx,[eax+$1C] cmp edx,[eax+$20] cmovg edx,[eax+$20] cmp edx,[eax+$24] cmovg edx,[eax+$24] cmp edx,[eax+$28] cmovg edx,[eax+$28] cmp edx,[eax+$2C] cmovg edx,[eax+$2C] cmp edx,[eax+$30] cmovg edx,[eax+$30] cmp edx,[eax+$34] cmovg edx,[eax+$34] cmp edx,[eax+$38] cmovg edx,[eax+$38] cmp edx,[eax+$3C] cmovg edx,[eax+$3C] @loopfooter: add eax,$40 cmp eax,[ebp-$04] jl @loop mov eax,[ebp-$04] dec [ebp-$0C] jz @loop push edx mov edx,ecx mov ecx,[ebp-$08] mov [ecx],edx pop edx mov ecx,edx mov edx,[esp+$08] mov [edx],ecx end; procedure MinMaxArrayHS1(const aArray:array of integer; out aMax, aMin:integer); asm // in the hope to use a little bit better U- and V-pipeline // push ebx push esi push edi push ecx push ebp mov ebx, [eax] mov edi, ebx inc edx test edx, edx jle @@Output mov ebp, edx // ebp := edx mod 4 and ebp, 7 shr edx, 3 // edx := edx div 4 test edx, edx jle @@RestMod4 mov esi, [eax] @@LpMainBegin: db $0F,$18,$40,$40 { prefetchnta [eax+64] } mov ecx, [eax+4] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax+8] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } mov ecx, [eax+12] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax+16] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } mov ecx, [eax+20] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax+24] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } mov ecx, [eax+28] add eax, 32 cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } @@LpMainEnd: dec edx jnz @@LpMainBegin @@RestMod4: test ebp, ebp jle @@Output @@LpRestBegin: mov esi, [eax] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } @@LpRestEnd: add eax, 4 dec ebp jnz @@LpRestBegin @@Output: pop ebp pop ecx mov eax, [ebp+$08] mov [ecx], ebx // Output aMin // mov [eax], edi // Output aMax // pop edi pop esi pop ebx end; procedure MinMaxArrayHS2(const aArray:array of integer; out aMax, aMin:integer); var i,ix,iy,mi,ma:integer; begin ma := aArray[Low(aArray)]; mi := ma; ix:= ((High(aArray)+1) div 8); iy:=0; for i := Low(aArray) + 1 to ix-1 do begin asm // dependent on compiler // { two times faster as without prefetchnta at PIII } db $0F,$18,$44,$B0,$60 // dependent on compiler // { prefetchnta [eax+esi*4+96] because *) } end; // dependent on compiler // test with Delphi 5 // if (aArray[iy ]<mi) then mi:=aArray[iy ] else // *) mov edi, [eax+esi*4] see above // if (aArray[iy ]>ma) then ma:=aArray[iy ]; if (aArray[iy+1]>ma) then ma:=aArray[iy+1] else if (aArray[iy+1]<mi) then mi:=aArray[iy+1]; if (aArray[iy+2]<mi) then mi:=aArray[iy+2] else if (aArray[iy+2]>ma) then ma:=aArray[iy+2]; if (aArray[iy+3]>ma) then ma:=aArray[iy+3] else if (aArray[iy+3]<mi) then mi:=aArray[iy+3]; if (aArray[iy+4]<mi) then mi:=aArray[iy+4] else if (aArray[iy+4]>ma) then ma:=aArray[iy+4]; if (aArray[iy+5]>ma) then ma:=aArray[iy+5] else if (aArray[iy+5]<mi) then mi:=aArray[iy+5]; if (aArray[iy+6]<mi) then mi:=aArray[iy+6] else if (aArray[iy+6]>ma) then ma:=aArray[iy+6]; if (aArray[iy+7]>ma) then ma:=aArray[iy+7] else if (aArray[iy+7]<mi) then mi:=aArray[iy+7]; inc(iy,8); end; for i := ix*8 to High(aArray) do begin if (aArray[i]<mi) then mi:=aArray[i]; if (aArray[i]>ma) then ma:=aArray[i]; end; aMax := ma; aMin := mi; end; procedure MinMaxArrayO1(const aArray:array of integer; out aMax, aMin:integer); var // averaged 10 clocks per roundtrip = 10% faster than above i,mi,ma:integer; begin ma := aArray[Low(aArray)]; mi := ma; for i := Low(aArray) + 1 to High(aArray) do begin if (aArray[i]<mi) then mi:=aArray[i]; if (aArray[i]>ma) then ma:=aArray[i]; end; aMax := ma; aMin := mi; end; procedure MinMaxArrayCD1( const aArray : Array of Integer; out aMax, aMin : integer ); var MaxArray, MinArray : array[boolean] of integer; i : integer; begin aMax :=0; aMin :=0; i:=high( aArray ); if i>=0 then begin MaxArray[True] := aArray[0]; MinArray[True] := aArray[0]; while i>0 do begin MaxArray[ aArray[i]>MaxArray[True] ] := aArray[i]; MinArray[ aArray[i] < MinArray[True] ] := aArray[i]; dec(i); end; aMax := MaxArray[True]; aMin := MinArray[True]; end; end; procedure MinMaxArrayCD2(const aArray:array of integer; out aMax, aMin:integer); var // averaged 10 clocks per roundtrip = 10% faster than above i,mi,ma:integer; begin ma := aArray[Low(aArray)]; mi := ma; for i := Low(aArray) + 1 to High(aArray) do begin if (aArray[i]<mi) then mi:=aArray[i]; if (aArray[i]>ma) then ma:=aArray[i]; end; aMax := ma; aMin := mi; end; procedure MinMaxArrayHS3(const aArray:array of integer; out aMax, aMin:integer); asm push ebx push esi push edi mov ebx, ecx mov ecx, [eax] mov edi, ecx inc edx test edx, edx jle @@Output @@LpBegin: mov esi, [eax] @@CheckMin: cmp esi, edi jnl @@CheckMax mov edi, esi // Store in EDI if < // @@CheckMax: cmp ecx, esi jnl @@LpEnd mov ecx, esi // Store in ECX if>// @@LpEnd: add eax, 4 dec edx jnz @@LpBegin @@Output: mov eax, [ebp+$08] mov [ebx], ecx // Output aMin // mov [eax], edi // Output aMax // pop edi pop esi pop ebx end; //version 2 (cmov and unrolling in basm): //========================================================== procedure MinMaxArrayHS4(const aArray:array of integer; out aMax, aMin:integer); asm push ebx push esi push edi push ebp mov ebx, ecx mov ecx, [eax] mov edi, ecx inc edx test edx, edx jle @@Output @@LpBeginA: mov esi, [eax] mov ebp, [eax+4] @@CheckMinA: cmp esi, edi jnl @@CheckMaxA mov edi, esi // Store in EDI if < // @@CheckMaxA: cmp ecx, esi jnl @@LpEndA mov ecx, esi // Store in ECX if>// @@LpEndA: dec edx jz @@Output @@CheckMinB: cmp ebp, edi jnl @@CheckMaxB mov edi, ebp // Store in EDI if < // @@CheckMaxB: cmp ecx, ebp jnl @@LpEndB mov ecx, ebp // Store in ECX if>// @@LpEndB: add eax, 8 dec edx jnz @@LpBeginA @@Output: pop ebp mov eax, [ebp+$08] mov [ebx], ecx // Output aMin // mov [eax], edi // Output aMax // pop edi pop esi pop ebx end; //version 3: //========================================================== procedure MinMaxArrayHS5(const aArray:array of integer; out aMax, aMin:integer); asm push ebx push esi push edi push ebp mov ebx, ecx mov ecx, [eax] mov edi, ecx inc edx test edx, edx jle @@Output mov ebp, edx // ebp := edx mod 4 and ebp, 3 shr edx, 2 // edx := edx div 4 test edx, edx jle @@RestMod4 @@LpMainBegin: mov esi, [eax] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ecx, esi db $0F,$4C,$CE { cmovl ecx, esi } mov esi, [eax+4] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ecx, esi db $0F,$4C,$CE { cmovl ecx, esi } mov esi, [eax+8] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ecx, esi db $0F,$4C,$CE { cmovl ecx, esi } mov esi, [eax+12] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ecx, esi db $0F,$4C,$CE { cmovl ecx, esi } @@LpMainEnd: add eax, 16 dec edx jnz @@LpMainBegin @@RestMod4: test ebp, ebp jle @@Output @@LpRestBegin: mov esi, [eax] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ecx, esi db $0F,$4C,$CE { cmovl ecx, esi } @@LpRestEnd: add eax, 4 dec ebp jnz @@LpRestBegin @@Output: pop ebp mov eax, [ebp+$08] mov [ebx], ecx // Output aMin // mov [eax], edi // Output aMax // pop edi pop esi pop ebx end; procedure MinMaxArrayHS6(const aArray:array of integer; out aMax, aMin:integer); asm // in the hope to use a little bit better U- and V-pipeline // push ebx push esi push edi push ecx push ebp mov ebx, [eax] mov edi, ebx inc edx test edx, edx jle @@Output mov ebp, edx // ebp := edx mod 4 and ebp, 7 shr edx, 3 // edx := edx div 4 test edx, edx jle @@RestMod4 mov esi, [eax] @@LpMainBegin: mov ecx, [eax+4] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax+8] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } mov ecx, [eax+12] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax+16] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } mov ecx, [eax+20] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax+24] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } mov ecx, [eax+28] add eax, 32 cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } mov esi, [eax] cmp ecx, edi db $0F,$4C,$F9 { cmovl edi, ecx } cmp ebx, ecx db $0F,$4C,$D9 { cmovl ebx, ecx } @@LpMainEnd: dec edx jnz @@LpMainBegin @@RestMod4: test ebp, ebp jle @@Output @@LpRestBegin: mov esi, [eax] cmp esi, edi db $0F,$4C,$FE { cmovl edi, esi } cmp ebx, esi db $0F,$4C,$DE { cmovl ebx, esi } @@LpRestEnd: add eax, 4 dec ebp jnz @@LpRestBegin @@Output: pop ebp pop ecx mov eax, [ebp+$08] mov [ecx], ebx // Output aMin // mov [eax], edi // Output aMax // pop edi pop esi pop ebx end; function RDTSC: Int64; asm RDTSC end; var PerfStart, PefrStop, PerfFrq: Int64; procedure QueryPerformanceStart; begin QueryPerformanceCounter(PerfStart); end; function QueryPerformanceStop: Double; begin QueryPerformanceCounter(PefrStop); QueryPerformanceFrequency(PerfFrq); Result := (PefrStop - PerfStart)/PerfFrq; end; procedure DoTest ( proc: TMinMaxArray; const OutText: string); const Times = 100; var Min: Integer; Max: Integer; I: Integer; Time: Double; begin sleep(100); QueryPerformanceStart; for I := 1 to Times do proc(A, Max, Min); Time := QueryPerformanceStop; Writeln(Format('%10.6fs %d %d %s', [Time, Min, Max, Outtext])); end; var i: integer; begin Randomize; try SetLength(A, 1000 * 1000); for I := 0 to High(A) do //A[I] := I; //Random(10 * 1000 * 1000); A[I] := Random(10 * 1000 * 1000); SZBeforeBenchmark; writeln ('1 mil. random numbers test'); // DoTest(@MinMaxArray ,'Dummy! - the same as original'); writeln; DoTest(@MinMaxArray ,'Original'); DoTest(@MinMaxArrayO1 ,'Original - optimized'); DoTest(@MinMaxArray0 ,'Using pointers'); DoTest(@MinMaxArray0_1 ,'Using pointers (8/loop) !!! BUG !!!'); DoTest(@MinMaxArray1 ,'Using indices'); DoTest(@MinMaxArray2 ,'Reading array value once'); DoTest(@MinMaxArray3 ,'Nenad Trkulja - loop unrolling'); DoTest(@MinMaxArray4 ,'Hubert Seidel - local min and max'); DoTest(@MinMaxArray5 ,'Pierre le Riche - boolean trick'); DoTest(@MinMaxArray6 ,'Trkulja/Seidel - loop unrolling and local min and max'); DoTest(@MinMaxArrayGS1 ,'Gilberto Saraiva 8 ASM'); DoTest(@MinMaxArrayGS2 ,'Gilberto Saraiva 16 ASM'); DoTest(@MinMaxArrayHS1 ,'Hubert Seidel ASM'); DoTest(@MinMaxArrayHS2 ,'Hubert Seidel 8 PAS + ASM !!! BUG !!!'); DoTest(@MinMaxArrayHS3 ,'Hubert Seidel ASM V1' ); DoTest(@MinMaxArrayHS4 ,'Hubert Seidel ASM V2' ); DoTest(@MinMaxArrayHS5 ,'Hubert Seidel ASM V3' ); DoTest(@MinMaxArrayHS6 ,'Hubert Seidel ASM Rollup 8' ); DoTest(@MinMaxArrayCD1 ,'Clement Doss PAS 1'); DoTest(@MinMaxArrayCD2 ,'Clement Doss PAS 2'); DoTest(@MinMaxArraySZ0 ,'Sasa Zeman - pointers PAS'); DoTest(@MinMaxArraySZ1 ,'Sasa Zeman - indices (1) for PAS'); DoTest(@MinMaxArraySZ2 ,'Sasa Zeman - indices (2) while PAS'); DoTest(@MinMaxArraySZ3 ,'Sasa Zeman - indices (3) while PAS'); DoTest(@MinMaxArraySZ4 ,'Sasa Zeman - indices (4) while local 4 PAS'); DoTest(@MinMaxArraySZ5 ,'Sasa Zeman - indices (5) while local 8 PAS'); DoTest(@MinMaxArraySZ6 ,'Sasa Zeman - indices (6) while local 16 PAS'); writeln; SZAfterBenchmark; Readln; except on E:Exception do Writeln(E.Classname, ': ', E.Message); end; end. |