Board index » off-topic » Re: I need the fastest routine

Re: I need the fastest routine


2008-07-20 11:57:40 PM
off-topic16
Content-Type: text/plain; charset=iso-8859-1
Content-Transfer-Encoding: 8Bit
MinMaxTest6.dpr
Content-Type: text/x-objcsrc; name="MinMaxTest6.dpr"
Content-Transfer-Encoding: 8Bit
Content-Disposition: attachment; filename="MinMaxTest6.dpr"
program MinMaxTest6;
{$APPTYPE CONSOLE}
{
Modified MinMaxTest5 by Sasa Zeman
Few major chages are made:
- Simplifying testing code
- Putting sleep(10) before starting test
- Random number test
- Priority set to maximum during test
}
uses
SysUtils, Windows, SZTimer;
// SZTimer can be found at www.szutils.net Delphi section
// It is RDTSC and GetTickCount based timer and also have
// procedures to set priority to maximum and back to original.
// These procedures are used making equal environment for all tested
// functions.
// If there is no need for these functions, simply remarking
// the lines is enough
const
MinInt = -MaxInt - 1;
type
TMinMaxArray = Procedure (aArray: Array of Integer; out aMax, aMin : integer);
var
A: array of Integer;
procedure MinMaxArray( const aArray : Array of Integer; out aMax, aMin : integer );
var
MaxArray,
MinArray : array[boolean] of integer;
i : integer;
begin
aMax :=0;
aMin :=0;
i:=high( aArray );
if i>=0 then begin
MaxArray[True] := aArray[0];
MinArray[True] := aArray[0];
while i>0 do begin
MaxArray[ aArray[i]>MaxArray[True] ] := aArray[i];
MinArray[ aArray[i] < MinArray[True] ] := aArray[i];
dec(i);
end;
aMax := MaxArray[True];
aMin := MinArray[True];
end;
end;
procedure MinMaxArray0(const aArray: array of Integer;
out aMax, aMin: Integer);
var
I: Integer;
P: PInteger;
begin
aMax := MinInt;
aMin := MaxInt;
P :=@aArray[0];
for I := 0 to High(aArray) do
begin
if P^>aMax then
aMax := P^;
if P^ < aMin then
aMin := P^;
Inc(P);
end;
end;
procedure MinMaxArray0_1(const aArray: array of Integer;
out aMax, aMin: Integer);
TYPE
tIa = PACKED ARRAY [0..7] OF INTEGER ;
tpIa = ^tIa ;
var
I: Integer;
pIa : tPIa ;
iMin,iMax : INTEGER ;
rm : Integer ;
begin
iMax := MinInt;
iMin := MaxInt;
pIa :=@aArray[0];
rm := High(aArray) MOD 8 ;
for I := 0 to High(aArray) DIV 8 do
begin
if ( PIa^[0]>iMax ) OR ( PIa^[1]>iMax ) OR ( PIa^[2]>iMax ) OR ( PIa^[3]>iMax ) OR ( PIa^[4]>iMax ) OR ( PIa^[5]>iMax ) OR ( PIa^[6]>iMax ) OR ( PIa^[7]>iMax ) then BEGIN
IF PIa^[0]>iMax THEN
iMax := PIa^[0];
IF PIa^[1]>iMax THEN
iMax := PIa^[1];
IF PIa^[2]>iMax THEN
iMax := PIa^[2];
IF PIa^[3]>iMax THEN
iMax := PIa^[3];
IF PIa^[4]>iMax THEN
iMax := PIa^[4];
IF PIa^[5]>iMax THEN
iMax := PIa^[5];
IF PIa^[6]>iMax THEN
iMax := PIa^[6];
IF PIa^[7]>iMax THEN
iMax := PIa^[7];
END ;
if ( PIa^[0] < iMin ) OR ( PIa^[1] < iMin ) OR ( PIa^[2] < iMin ) OR ( PIa^[3] < iMin ) OR ( PIa^[4] < iMin ) OR ( PIa^[5] < iMin ) OR ( PIa^[6] < iMin ) OR ( PIa^[7] < iMin ) then BEGIN
IF PIa^[0] < iMin THEN
iMin := PIa^[0];
IF PIa^[1] < iMin THEN
iMin := PIa^[1];
IF PIa^[2] < iMin THEN
iMin := PIa^[2];
IF PIa^[3] < iMin THEN
iMin := PIa^[3];
IF PIa^[4] < iMin THEN
iMin := PIa^[4];
IF PIa^[5] < iMin THEN
iMin := PIa^[5];
IF PIa^[6] < iMin THEN
iMin := PIa^[6];
IF PIa^[7] < iMin THEN
iMin := PIa^[7];
END ;
Inc(PIa);
end;
for I := 0 to rm do
begin
IF PIa^[I] < iMin THEN
iMin := PIa^[I];
IF PIa^[I]>iMax THEN
iMax := PIa^[I];
end;
aMax := iMin;
aMin := iMax;
end;
procedure MinMaxArray1(const aArray: array of Integer;
out aMax, aMin: Integer);
var
I: Integer;
begin
aMax := MinInt;
aMin := MaxInt;
for I := 0 to High(aArray) do
begin
if aArray[I]>aMax then
aMax := aArray[I];
if aArray[I] < aMin then
aMin := aArray[I];
end;
end;
procedure MinMaxArray2(const aArray: array of Integer;
out aMax, aMin: Integer);
var
I: Integer;
Value: Integer;
begin
aMax := MinInt;
aMin := MaxInt;
for I := 0 to High(aArray) do
begin
Value := aArray[I];
if Value>aMax then
aMax := Value;
if Value < aMin then
aMin := Value;
end;
end;
procedure MinMaxArray3(const aArray : Array of Integer; out aMax, aMin : integer );
var
i, m, mmod, n: Integer;
begin
aMax := 0;
aMin := 0;
if Length(aArray)>0 then
begin
aMin := aArray[0];
aMax := aArray[0];
m := Length(aArray) div 4;
mmod := Length(aArray) mod 4;
n := 0;
for i := 0 to m -1 do
begin
if (aArray[n] < aMin) or (aArray[n+1] < aMin) or
(aArray[n+2] < aMin) or (aArray[n+3] < aMin) then
begin
if aArray[n] < aMin then
aMin := aArray[n];
if aArray[n+1] < aMin then
aMin := aArray[n+1];
if aArray[n+2] < aMin then
aMin := aArray[n+2];
if aArray[n+3] < aMin then
aMin := aArray[n+3];
end;
if (aArray[n]>aMax) or (aArray[n+1]>aMax) or
(aArray[n+2]>aMax) or (aArray[n+3]>aMax) then
begin
if aArray[n]>aMax then
aMax := aArray[n];
if aArray[n+1]>aMax then
aMax := aArray[n+1];
if aArray[n+2]>aMax then
aMax := aArray[n+2];
if aArray[n+3]>aMax then
aMax := aArray[n+3];
end;
Inc(n, 4);
end;
for i := 0 to mmod-1 do
begin
if aArray[i+n] < aMin then
aMin := aArray[i+n];
if aArray[i+n]>aMax then
aMax := aArray[i+n];
end;
end;
end;
procedure MinMaxArray4(const aArray: array of Integer;
out aMax, aMin: Integer);
var
I: Integer;
Value: Integer;
iMin, iMax: Integer;
begin
iMax := MinInt;
iMin := MaxInt;
for I := 0 to High(aArray) do
begin
Value := aArray[I];
if Value>iMax then
iMax := Value;
if Value < iMin then
iMin := Value;
end;
aMax := iMax;
aMin := iMin;
end;
procedure MinMaxArray5(const aArray: array of Integer; out aMax, aMin: Integer);
var
LInd, LTempMin, LTempMax, LArrVal: Integer;
begin
LTempMin := MaxInt;
LTempMax := MinInt;
for LInd := 0 to High(aArray) do
begin
{Get the array value}
LArrVal := aArray[LInd];
{Update the minimum}
LTempMin := LArrVal + ((-Ord(LTempMin < LArrVal)) and (LTempMin - LArrVal));
{Update the maximum}
LTempMax := LArrVal + ((-Ord(LTempMax>LArrVal)) and (LTempMax - LArrVal));
end;
aMin := LTempMin;
aMax := LTempMax;
end;
procedure MinMaxArray6(const aArray : Array of Integer; out aMax, aMin : integer );
var
i, m, mmod, n, iMax, iMin: Integer;
begin
aMax := 0;
aMin := 0;
if Length(aArray)>0 then
begin
iMin := aArray[0];
iMax := aArray[0];
m := Length(aArray) div 4;
mmod := Length(aArray) mod 4;
n := 0;
for i := 0 to m -1 do
begin
if (aArray[n] < iMin) or (aArray[n+1] < iMin) or
(aArray[n+2] < iMin) or (aArray[n+3] < iMin) then
begin
if aArray[n] < iMin then
iMin := aArray[n];
if aArray[n+1] < iMin then
iMin := aArray[n+1];
if aArray[n+2] < iMin then
iMin := aArray[n+2];
if aArray[n+3] < iMin then
iMin := aArray[n+3];
end;
if (aArray[n]>iMax) or (aArray[n+1]>iMax) or
(aArray[n+2]>iMax) or (aArray[n+3]>iMax) then
begin
if aArray[n]>iMax then
iMax := aArray[n];
if aArray[n+1]>iMax then
iMax := aArray[n+1];
if aArray[n+2]>iMax then
iMax := aArray[n+2];
if aArray[n+3]>iMax then
iMax := aArray[n+3];
end;
Inc(n, 4);
end;
for i := 0 to mmod-1 do
begin
if aArray[i+n] < iMin then
iMin := aArray[i+n];
if aArray[i+n]>iMax then
iMax := aArray[i+n];
end;
aMax := iMax;
aMin := iMin;
end;
end;
//-------------------------------------
// By Sasa Zeman
//-------------------------------------
procedure MinMaxArraySZ0(const aArray: array of Integer;
out aMax, aMin: Integer);
var
I: Integer;
P: PInteger;
v: integer;
begin
aMax := MinInt;
aMin := MaxInt;
P :=@aArray[0];
for I := 0 to High(aArray) do
begin
v:=p^;
if V>aMax then
aMax := V;
if V < aMin then
aMin := V;
Inc(P);
end;
end;
procedure MinMaxArraySZ1(const aArray: array of Integer;
out aMax, aMin: Integer);
var
I: Integer;
begin
aMax := MinInt;
aMin := MaxInt;
for I := 0 to High(aArray) do
begin
if aArray[i]>aMax then aMax := aArray[i];
if aArray[i] < aMin then aMin := aArray[i];
end;
end;
procedure MinMaxArraySZ2(const aArray: array of Integer;
out aMax, aMin: Integer);
var
I: Integer;
begin
aMax := MinInt;
aMin := MaxInt;
i:=0;
while i<= High(aArray) do
begin
if aArray[i]>aMax then aMax := aArray[i];
if aArray[i] < aMin then aMin := aArray[i];
inc(i)
end;
end;
procedure MinMaxArraySZ3(const aArray: array of Integer;
out aMax, aMin: Integer);
var
v: integer;
I: Integer;
begin
aMax := MinInt;
aMin := MaxInt;
i:=0;
while i<=High(aArray) do
begin
v:=aArray[i];
if v>aMax then aMax := v;
if v < aMin then aMin := v;
inc(i)
end;
end;
procedure MinMaxArraySZ4(const aArray: array of Integer;
out aMax, aMin: Integer);
var
i,l,l1: Integer;
vMax, vMin: integer;
begin
vMax := MinInt;
vMin := MaxInt;
l:= High(aArray);
l1:=(l shr 2) shl 2;
i:=0;
while i <= l1 do
begin
begin
if aArray[i ]>vMax then vMax := aArray[i ];
if aArray[i+1]>vMax then vMax := aArray[i+1];
if aArray[i+2]>vMax then vMax := aArray[i+2];
if aArray[i+3]>vMax then vMax := aArray[i+3];
end;
if aArray[i ] < vMin then vMin := aArray[i ];
if aArray[i+1] < vMin then vMin := aArray[i+1];
if aArray[i+2] < vMin then vMin := aArray[i+2];
if aArray[i+3] < vMin then vMin := aArray[i+3];
inc(i,4)
end;
while i<=l do
begin
if aArray[i]>aMax then aMax := aArray[i];
if aArray[i] < aMin then aMin := aArray[i];
inc(i)
end;
aMax := vMax;
aMin := vMin;
end;
procedure MinMaxArraySZ5(const aArray: array of Integer;
out aMax, aMin: Integer);
var
i,l,l1: Integer;
vMax, vMin: integer;
begin
vMax := MinInt;
vMin := MaxInt;
l:= High(aArray);
l1:= (l shr 3) shl 3;
i:=0;
while i <= l1 do
begin
if aArray[i ]>vMax then vMax := aArray[i ];
if aArray[i+1]>vMax then vMax := aArray[i+1];
if aArray[i+2]>vMax then vMax := aArray[i+2];
if aArray[i+3]>vMax then vMax := aArray[i+3];
if aArray[i+4]>vMax then vMax := aArray[i+4];
if aArray[i+5]>vMax then vMax := aArray[i+5];
if aArray[i+6]>vMax then vMax := aArray[i+6];
if aArray[i+7]>vMax then vMax := aArray[i+7];
if aArray[i ] < vMin then vMin := aArray[i ];
if aArray[i+1] < vMin then vMin := aArray[i+1];
if aArray[i+2] < vMin then vMin := aArray[i+2];
if aArray[i+3] < vMin then vMin := aArray[i+3];
if aArray[i+4] < vMin then vMin := aArray[i+4];
if aArray[i+5] < vMin then vMin := aArray[i+5];
if aArray[i+6] < vMin then vMin := aArray[i+6];
if aArray[i+7] < vMin then vMin := aArray[i+7];
inc(i,8)
end;
while i<=l do
begin
if aArray[i]>aMax then aMax := aArray[i];
if aArray[i] < aMin then aMin := aArray[i];
inc(i)
end;
aMax := vMax;
aMin := vMin;
end;
procedure MinMaxArraySZ6(const aArray: array of Integer;
out aMax, aMin: Integer);
var
i,l,l1: Integer;
vMax, vMin: integer;
begin
vMax := MinInt;
vMin := MaxInt;
l:= High(aArray);
l1:= (l shr 4) shl 4;
i:=0;
while i <= l1 do
begin
if aArray[i ]>vMax then vMax := aArray[i ];
if aArray[i+ 1]>vMax then vMax := aArray[i+ 1];
if aArray[i+ 2]>vMax then vMax := aArray[i+ 2];
if aArray[i+ 3]>vMax then vMax := aArray[i+ 3];
if aArray[i+ 4]>vMax then vMax := aArray[i+ 4];
if aArray[i+ 5]>vMax then vMax := aArray[i+ 5];
if aArray[i+ 6]>vMax then vMax := aArray[i+ 6];
if aArray[i+ 7]>vMax then vMax := aArray[i+ 7];
if aArray[i+ 8]>vMax then vMax := aArray[i+ 8];
if aArray[i+ 9]>vMax then vMax := aArray[i+ 9];
if aArray[i+10]>vMax then vMax := aArray[i+10];
if aArray[i+11]>vMax then vMax := aArray[i+11];
if aArray[i+12]>vMax then vMax := aArray[i+12];
if aArray[i+13]>vMax then vMax := aArray[i+13];
if aArray[i+14]>vMax then vMax := aArray[i+14];
if aArray[i+15]>vMax then vMax := aArray[i+15];
if aArray[i ] < vMin then vMin := aArray[i ];
if aArray[i+ 1] < vMin then vMin := aArray[i+ 1];
if aArray[i+ 2] < vMin then vMin := aArray[i+ 2];
if aArray[i+ 3] < vMin then vMin := aArray[i+ 3];
if aArray[i+ 4] < vMin then vMin := aArray[i+ 4];
if aArray[i+ 5] < vMin then vMin := aArray[i+ 5];
if aArray[i+ 6] < vMin then vMin := aArray[i+ 6];
if aArray[i+ 7] < vMin then vMin := aArray[i+ 7];
if aArray[i+ 8] < vMin then vMin := aArray[i+ 8];
if aArray[i+ 9] < vMin then vMin := aArray[i+ 9];
if aArray[i+10] < vMin then vMin := aArray[i+10];
if aArray[i+11] < vMin then vMin := aArray[i+11];
if aArray[i+12] < vMin then vMin := aArray[i+12];
if aArray[i+13] < vMin then vMin := aArray[i+13];
if aArray[i+14] < vMin then vMin := aArray[i+14];
if aArray[i+15] < vMin then vMin := aArray[i+15];
inc(i,16)
end;
while i<=l do
begin
if aArray[i]>aMax then aMax := aArray[i];
if aArray[i] < aMin then aMin := aArray[i];
inc(i)
end;
aMax := vMax;
aMin := vMin;
end;
//-------------------------------------
procedure MinMaxArrayGS1(const AArray: array of Integer;
out AMax, AMin: integer);
asm
mov [ebp-$08],ecx
mov [ebp-$04],eax
shl edx,$02
// a better rangering operation
sub edx,$1C
add [ebp-$04],edx
mov [ebp-$0C],1
mov ecx,0
mov edx,MaxInt
@loop:
cmp ecx,[eax+$00] jl @parseMax
cmp ecx,[eax+$04] jl @parseMax
cmp ecx,[eax+$08] jl @parseMax
cmp ecx,[eax+$0C] jl @parseMax
cmp ecx,[eax+$10] jl @parseMax
cmp ecx,[eax+$14] jl @parseMax
cmp ecx,[eax+$18] jl @parseMax
cmp ecx,[eax+$1C] jl @parseMax
jmp @checkMin
@parseMax:
cmp ecx,[eax+$00] cmovl ecx,[eax+$00]
cmp ecx,[eax+$04] cmovl ecx,[eax+$04]
cmp ecx,[eax+$08] cmovl ecx,[eax+$08]
cmp ecx,[eax+$0C] cmovl ecx,[eax+$0C]
cmp ecx,[eax+$10] cmovl ecx,[eax+$10]
cmp ecx,[eax+$14] cmovl ecx,[eax+$14]
cmp ecx,[eax+$18] cmovl ecx,[eax+$18]
cmp ecx,[eax+$1C] cmovl ecx,[eax+$1C]
@checkMin:
cmp edx,[eax+$00] jg @parseMin
cmp edx,[eax+$04] jg @parseMin
cmp edx,[eax+$08] jg @parseMin
cmp edx,[eax+$0C] jg @parseMin
cmp edx,[eax+$10] jg @parseMin
cmp edx,[eax+$14] jg @parseMin
cmp edx,[eax+$18] jg @parseMin
cmp edx,[eax+$1C] jg @parseMin
jmp @loopfooter
@parseMin:
cmp edx,[eax+$00] cmovg edx,[eax+$00]
cmp edx,[eax+$04] cmovg edx,[eax+$04]
cmp edx,[eax+$08] cmovg edx,[eax+$08]
cmp edx,[eax+$0C] cmovg edx,[eax+$0C]
cmp edx,[eax+$10] cmovg edx,[eax+$10]
cmp edx,[eax+$14] cmovg edx,[eax+$14]
cmp edx,[eax+$18] cmovg edx,[eax+$18]
cmp edx,[eax+$1C] cmovg edx,[eax+$1C]
@loopfooter:
add eax,$20
cmp eax,[ebp-$04]
jl @loop
mov eax,[ebp-$04]
dec [ebp-$0C]
jz @loop
push edx
mov edx,ecx
mov ecx,[ebp-$08]
mov [ecx],edx
pop edx
mov ecx,edx
// I really don't know what I've doing here to change
// it for [esp+$0C] on the last code .. rsrsrsr
mov edx,[esp+$08]
mov [edx],ecx
end;
procedure MinMaxArrayGS2(const AArray: array of Integer;
out AMax, AMin: integer);
asm
mov [ebp-$08],ecx
mov [ebp-$04],eax
shl edx,$02
sub edx,$3C
add [ebp-$04],edx
mov [ebp-$0C],1
mov ecx,0
mov edx,MaxInt
@loop:
cmp ecx,[eax+$00] jl @parseMax
cmp ecx,[eax+$04] jl @parseMax
cmp ecx,[eax+$08] jl @parseMax
cmp ecx,[eax+$0C] jl @parseMax
cmp ecx,[eax+$10] jl @parseMax
cmp ecx,[eax+$14] jl @parseMax
cmp ecx,[eax+$18] jl @parseMax
cmp ecx,[eax+$1C] jl @parseMax
cmp ecx,[eax+$20] jl @parseMax
cmp ecx,[eax+$24] jl @parseMax
cmp ecx,[eax+$28] jl @parseMax
cmp ecx,[eax+$2C] jl @parseMax
cmp ecx,[eax+$30] jl @parseMax
cmp ecx,[eax+$34] jl @parseMax
cmp ecx,[eax+$38] jl @parseMax
cmp ecx,[eax+$3C] jl @parseMax
jmp @checkMin
@parseMax:
cmp ecx,[eax+$00] cmovl ecx,[eax+$00]
cmp ecx,[eax+$04] cmovl ecx,[eax+$04]
cmp ecx,[eax+$08] cmovl ecx,[eax+$08]
cmp ecx,[eax+$0C] cmovl ecx,[eax+$0C]
cmp ecx,[eax+$10] cmovl ecx,[eax+$10]
cmp ecx,[eax+$14] cmovl ecx,[eax+$14]
cmp ecx,[eax+$18] cmovl ecx,[eax+$18]
cmp ecx,[eax+$1C] cmovl ecx,[eax+$1C]
cmp ecx,[eax+$20] cmovl ecx,[eax+$20]
cmp ecx,[eax+$24] cmovl ecx,[eax+$24]
cmp ecx,[eax+$28] cmovl ecx,[eax+$28]
cmp ecx,[eax+$2C] cmovl ecx,[eax+$2C]
cmp ecx,[eax+$30] cmovl ecx,[eax+$30]
cmp ecx,[eax+$34] cmovl ecx,[eax+$34]
cmp ecx,[eax+$38] cmovl ecx,[eax+$38]
cmp ecx,[eax+$3C] cmovl ecx,[eax+$3C]
@checkMin:
cmp edx,[eax+$00] jg @parseMin
cmp edx,[eax+$04] jg @parseMin
cmp edx,[eax+$08] jg @parseMin
cmp edx,[eax+$0C] jg @parseMin
cmp edx,[eax+$10] jg @parseMin
cmp edx,[eax+$14] jg @parseMin
cmp edx,[eax+$18] jg @parseMin
cmp edx,[eax+$1C] jg @parseMin
cmp edx,[eax+$20] jg @parseMin
cmp edx,[eax+$24] jg @parseMin
cmp edx,[eax+$28] jg @parseMin
cmp edx,[eax+$2C] jg @parseMin
cmp edx,[eax+$30] jg @parseMin
cmp edx,[eax+$34] jg @parseMin
cmp edx,[eax+$38] jg @parseMin
cmp edx,[eax+$3C] jg @parseMin
jmp @loopfooter
@parseMin:
cmp edx,[eax+$00] cmovg edx,[eax+$00]
cmp edx,[eax+$04] cmovg edx,[eax+$04]
cmp edx,[eax+$08] cmovg edx,[eax+$08]
cmp edx,[eax+$0C] cmovg edx,[eax+$0C]
cmp edx,[eax+$10] cmovg edx,[eax+$10]
cmp edx,[eax+$14] cmovg edx,[eax+$14]
cmp edx,[eax+$18] cmovg edx,[eax+$18]
cmp edx,[eax+$1C] cmovg edx,[eax+$1C]
cmp edx,[eax+$20] cmovg edx,[eax+$20]
cmp edx,[eax+$24] cmovg edx,[eax+$24]
cmp edx,[eax+$28] cmovg edx,[eax+$28]
cmp edx,[eax+$2C] cmovg edx,[eax+$2C]
cmp edx,[eax+$30] cmovg edx,[eax+$30]
cmp edx,[eax+$34] cmovg edx,[eax+$34]
cmp edx,[eax+$38] cmovg edx,[eax+$38]
cmp edx,[eax+$3C] cmovg edx,[eax+$3C]
@loopfooter:
add eax,$40
cmp eax,[ebp-$04]
jl @loop
mov eax,[ebp-$04]
dec [ebp-$0C]
jz @loop
push edx
mov edx,ecx
mov ecx,[ebp-$08]
mov [ecx],edx
pop edx
mov ecx,edx
mov edx,[esp+$08]
mov [edx],ecx
end;
procedure MinMaxArrayHS1(const aArray:array of integer; out aMax,
aMin:integer);
asm // in the hope to use a little bit better U- and V-pipeline //
push ebx
push esi
push edi
push ecx
push ebp
mov ebx, [eax]
mov edi, ebx
inc edx
test edx, edx
jle @@Output
mov ebp, edx // ebp := edx mod 4
and ebp, 7
shr edx, 3 // edx := edx div 4
test edx, edx
jle @@RestMod4
mov esi, [eax]
@@LpMainBegin:
db $0F,$18,$40,$40 { prefetchnta [eax+64] }
mov ecx, [eax+4]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax+8]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
mov ecx, [eax+12]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax+16]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
mov ecx, [eax+20]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax+24]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
mov ecx, [eax+28]
add eax, 32
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
@@LpMainEnd:
dec edx
jnz @@LpMainBegin
@@RestMod4:
test ebp, ebp
jle @@Output
@@LpRestBegin:
mov esi, [eax]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
@@LpRestEnd:
add eax, 4
dec ebp
jnz @@LpRestBegin
@@Output:
pop ebp
pop ecx
mov eax, [ebp+$08]
mov [ecx], ebx // Output aMin //
mov [eax], edi // Output aMax //
pop edi
pop esi
pop ebx
end;
procedure MinMaxArrayHS2(const aArray:array of integer; out aMax,
aMin:integer);
var
i,ix,iy,mi,ma:integer;
begin
ma := aArray[Low(aArray)];
mi := ma;
ix:= ((High(aArray)+1) div 8);
iy:=0;
for i := Low(aArray) + 1 to ix-1 do
begin
asm // dependent on compiler // { two times faster as without prefetchnta at PIII }
db $0F,$18,$44,$B0,$60 // dependent on compiler // { prefetchnta [eax+esi*4+96] because *) }
end; // dependent on compiler // test with Delphi 5
//
if (aArray[iy ]<mi) then mi:=aArray[iy ] else // *) mov edi, [eax+esi*4] see above //
if (aArray[iy ]>ma) then ma:=aArray[iy ];
if (aArray[iy+1]>ma) then ma:=aArray[iy+1] else
if (aArray[iy+1]<mi) then mi:=aArray[iy+1];
if (aArray[iy+2]<mi) then mi:=aArray[iy+2] else
if (aArray[iy+2]>ma) then ma:=aArray[iy+2];
if (aArray[iy+3]>ma) then ma:=aArray[iy+3] else
if (aArray[iy+3]<mi) then mi:=aArray[iy+3];
if (aArray[iy+4]<mi) then mi:=aArray[iy+4] else
if (aArray[iy+4]>ma) then ma:=aArray[iy+4];
if (aArray[iy+5]>ma) then ma:=aArray[iy+5] else
if (aArray[iy+5]<mi) then mi:=aArray[iy+5];
if (aArray[iy+6]<mi) then mi:=aArray[iy+6] else
if (aArray[iy+6]>ma) then ma:=aArray[iy+6];
if (aArray[iy+7]>ma) then ma:=aArray[iy+7] else
if (aArray[iy+7]<mi) then mi:=aArray[iy+7];
inc(iy,8);
end;
for i := ix*8 to High(aArray) do
begin
if (aArray[i]<mi) then mi:=aArray[i];
if (aArray[i]>ma) then ma:=aArray[i];
end;
aMax := ma;
aMin := mi;
end;
procedure MinMaxArrayO1(const aArray:array of integer; out aMax,
aMin:integer);
var // averaged 10 clocks per roundtrip = 10% faster than above
i,mi,ma:integer;
begin
ma := aArray[Low(aArray)];
mi := ma;
for i := Low(aArray) + 1 to High(aArray) do
begin
if (aArray[i]<mi) then mi:=aArray[i];
if (aArray[i]>ma) then ma:=aArray[i];
end;
aMax := ma;
aMin := mi;
end;
procedure MinMaxArrayCD1( const aArray : Array of Integer; out aMax, aMin :
integer );
var
MaxArray,
MinArray : array[boolean] of integer;
i : integer;
begin
aMax :=0;
aMin :=0;
i:=high( aArray );
if i>=0 then begin
MaxArray[True] := aArray[0];
MinArray[True] := aArray[0];
while i>0 do begin
MaxArray[ aArray[i]>MaxArray[True] ] := aArray[i];
MinArray[ aArray[i] < MinArray[True] ] := aArray[i];
dec(i);
end;
aMax := MaxArray[True];
aMin := MinArray[True];
end;
end;
procedure MinMaxArrayCD2(const aArray:array of integer; out aMax,
aMin:integer);
var // averaged 10 clocks per roundtrip = 10% faster than above
i,mi,ma:integer;
begin
ma := aArray[Low(aArray)];
mi := ma;
for i := Low(aArray) + 1 to High(aArray) do
begin
if (aArray[i]<mi) then mi:=aArray[i];
if (aArray[i]>ma) then ma:=aArray[i];
end;
aMax := ma;
aMin := mi;
end;
procedure MinMaxArrayHS3(const aArray:array of integer; out aMax,
aMin:integer);
asm
push ebx
push esi
push edi
mov ebx, ecx
mov ecx, [eax]
mov edi, ecx
inc edx
test edx, edx
jle @@Output
@@LpBegin:
mov esi, [eax]
@@CheckMin:
cmp esi, edi
jnl @@CheckMax
mov edi, esi // Store in EDI if < //
@@CheckMax:
cmp ecx, esi
jnl @@LpEnd
mov ecx, esi // Store in ECX if>//
@@LpEnd:
add eax, 4
dec edx
jnz @@LpBegin
@@Output:
mov eax, [ebp+$08]
mov [ebx], ecx // Output aMin //
mov [eax], edi // Output aMax //
pop edi
pop esi
pop ebx
end;
//version 2 (cmov and unrolling in basm):
//==========================================================
procedure MinMaxArrayHS4(const aArray:array of integer; out aMax,
aMin:integer);
asm
push ebx
push esi
push edi
push ebp
mov ebx, ecx
mov ecx, [eax]
mov edi, ecx
inc edx
test edx, edx
jle @@Output
@@LpBeginA:
mov esi, [eax]
mov ebp, [eax+4]
@@CheckMinA:
cmp esi, edi
jnl @@CheckMaxA
mov edi, esi // Store in EDI if < //
@@CheckMaxA:
cmp ecx, esi
jnl @@LpEndA
mov ecx, esi // Store in ECX if>//
@@LpEndA:
dec edx
jz @@Output
@@CheckMinB:
cmp ebp, edi
jnl @@CheckMaxB
mov edi, ebp // Store in EDI if < //
@@CheckMaxB:
cmp ecx, ebp
jnl @@LpEndB
mov ecx, ebp // Store in ECX if>//
@@LpEndB:
add eax, 8
dec edx
jnz @@LpBeginA
@@Output:
pop ebp
mov eax, [ebp+$08]
mov [ebx], ecx // Output aMin //
mov [eax], edi // Output aMax //
pop edi
pop esi
pop ebx
end;
//version 3:
//==========================================================
procedure MinMaxArrayHS5(const aArray:array of integer; out aMax,
aMin:integer);
asm
push ebx
push esi
push edi
push ebp
mov ebx, ecx
mov ecx, [eax]
mov edi, ecx
inc edx
test edx, edx
jle @@Output
mov ebp, edx // ebp := edx mod 4
and ebp, 3
shr edx, 2 // edx := edx div 4
test edx, edx
jle @@RestMod4
@@LpMainBegin:
mov esi, [eax]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ecx, esi
db $0F,$4C,$CE { cmovl ecx, esi }
mov esi, [eax+4]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ecx, esi
db $0F,$4C,$CE { cmovl ecx, esi }
mov esi, [eax+8]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ecx, esi
db $0F,$4C,$CE { cmovl ecx, esi }
mov esi, [eax+12]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ecx, esi
db $0F,$4C,$CE { cmovl ecx, esi }
@@LpMainEnd:
add eax, 16
dec edx
jnz @@LpMainBegin
@@RestMod4:
test ebp, ebp
jle @@Output
@@LpRestBegin:
mov esi, [eax]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ecx, esi
db $0F,$4C,$CE { cmovl ecx, esi }
@@LpRestEnd:
add eax, 4
dec ebp
jnz @@LpRestBegin
@@Output:
pop ebp
mov eax, [ebp+$08]
mov [ebx], ecx // Output aMin //
mov [eax], edi // Output aMax //
pop edi
pop esi
pop ebx
end;
procedure MinMaxArrayHS6(const aArray:array of integer; out aMax,
aMin:integer);
asm // in the hope to use a little bit better U- and V-pipeline //
push ebx
push esi
push edi
push ecx
push ebp
mov ebx, [eax]
mov edi, ebx
inc edx
test edx, edx
jle @@Output
mov ebp, edx // ebp := edx mod 4
and ebp, 7
shr edx, 3 // edx := edx div 4
test edx, edx
jle @@RestMod4
mov esi, [eax]
@@LpMainBegin:
mov ecx, [eax+4]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax+8]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
mov ecx, [eax+12]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax+16]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
mov ecx, [eax+20]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax+24]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
mov ecx, [eax+28]
add eax, 32
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
mov esi, [eax]
cmp ecx, edi
db $0F,$4C,$F9 { cmovl edi, ecx }
cmp ebx, ecx
db $0F,$4C,$D9 { cmovl ebx, ecx }
@@LpMainEnd:
dec edx
jnz @@LpMainBegin
@@RestMod4:
test ebp, ebp
jle @@Output
@@LpRestBegin:
mov esi, [eax]
cmp esi, edi
db $0F,$4C,$FE { cmovl edi, esi }
cmp ebx, esi
db $0F,$4C,$DE { cmovl ebx, esi }
@@LpRestEnd:
add eax, 4
dec ebp
jnz @@LpRestBegin
@@Output:
pop ebp
pop ecx
mov eax, [ebp+$08]
mov [ecx], ebx // Output aMin //
mov [eax], edi // Output aMax //
pop edi
pop esi
pop ebx
end;
function RDTSC: Int64;
asm
RDTSC
end;
var
PerfStart, PefrStop, PerfFrq: Int64;
procedure QueryPerformanceStart;
begin
QueryPerformanceCounter(PerfStart);
end;
function QueryPerformanceStop: Double;
begin
QueryPerformanceCounter(PefrStop);
QueryPerformanceFrequency(PerfFrq);
Result := (PefrStop - PerfStart)/PerfFrq;
end;
procedure DoTest ( proc: TMinMaxArray; const OutText: string);
const
Times = 100;
var
Min: Integer;
Max: Integer;
I: Integer;
Time: Double;
begin
sleep(100);
QueryPerformanceStart;
for I := 1 to Times do
proc(A, Max, Min);
Time := QueryPerformanceStop;
Writeln(Format('%10.6fs %d %d %s', [Time, Min, Max, Outtext]));
end;
var
i: integer;
begin
Randomize;
try
SetLength(A, 1000 * 1000);
for I := 0 to High(A) do
//A[I] := I; //Random(10 * 1000 * 1000);
A[I] := Random(10 * 1000 * 1000);
SZBeforeBenchmark;
writeln ('1 mil. random numbers test');
// DoTest(@MinMaxArray ,'Dummy! - the same as original');
writeln;
DoTest(@MinMaxArray ,'Original');
DoTest(@MinMaxArrayO1 ,'Original - optimized');
DoTest(@MinMaxArray0 ,'Using pointers');
DoTest(@MinMaxArray0_1 ,'Using pointers (8/loop) !!! BUG !!!');
DoTest(@MinMaxArray1 ,'Using indices');
DoTest(@MinMaxArray2 ,'Reading array value once');
DoTest(@MinMaxArray3 ,'Nenad Trkulja - loop unrolling');
DoTest(@MinMaxArray4 ,'Hubert Seidel - local min and max');
DoTest(@MinMaxArray5 ,'Pierre le Riche - boolean trick');
DoTest(@MinMaxArray6 ,'Trkulja/Seidel - loop unrolling and local min and max');
DoTest(@MinMaxArrayGS1 ,'Gilberto Saraiva 8 ASM');
DoTest(@MinMaxArrayGS2 ,'Gilberto Saraiva 16 ASM');
DoTest(@MinMaxArrayHS1 ,'Hubert Seidel ASM');
DoTest(@MinMaxArrayHS2 ,'Hubert Seidel 8 PAS + ASM !!! BUG !!!');
DoTest(@MinMaxArrayHS3 ,'Hubert Seidel ASM V1' );
DoTest(@MinMaxArrayHS4 ,'Hubert Seidel ASM V2' );
DoTest(@MinMaxArrayHS5 ,'Hubert Seidel ASM V3' );
DoTest(@MinMaxArrayHS6 ,'Hubert Seidel ASM Rollup 8' );
DoTest(@MinMaxArrayCD1 ,'Clement Doss PAS 1');
DoTest(@MinMaxArrayCD2 ,'Clement Doss PAS 2');
DoTest(@MinMaxArraySZ0 ,'Sasa Zeman - pointers PAS');
DoTest(@MinMaxArraySZ1 ,'Sasa Zeman - indices (1) for PAS');
DoTest(@MinMaxArraySZ2 ,'Sasa Zeman - indices (2) while PAS');
DoTest(@MinMaxArraySZ3 ,'Sasa Zeman - indices (3) while PAS');
DoTest(@MinMaxArraySZ4 ,'Sasa Zeman - indices (4) while local 4 PAS');
DoTest(@MinMaxArraySZ5 ,'Sasa Zeman - indices (5) while local 8 PAS');
DoTest(@MinMaxArraySZ6 ,'Sasa Zeman - indices (6) while local 16 PAS');
writeln;
SZAfterBenchmark;
Readln;
except
on E:Exception do
Writeln(E.Classname, ': ', E.Message);
end;
end.
 
 

Re:Re: I need the fastest routine

Hello Sasa,
"Sasa Zeman" < XXXX@XXXXX.COM >schrieb im Newsbeitrag
Quote
MinMaxTest6.dpr
I can't send with attachments :-(
Outlook Express konnte den Beitrag nicht bereitstellen. Betreff 'Re: I need
the fastest routine', Konto: 'news.t-online.de', Server: 'news.t-online.de',
Protokoll: NNTP, Serverantwort: '441 437 Binary in non-binary group ',
Port: 119, Secure (SSL): Nein, Serverfehler: 441, Fehlernummer: 0x800CCCA9
)
MinMaxTest7 (split functions into separate libraries) with some increments
based on MinMaxTest6.
download possible www.hubert-seidel.eu/downloads/minmaxarray.zip
mfg.
Herby
--
www.hubert-seidel.de
 

Re:Re: I need the fastest routine

Hallo,
"Sasa Zeman" < XXXX@XXXXX.COM >schrieb im Newsbeitrag
Quote
I find a little spare time to make change a bit on testing code to make it
simple and put some contribution code. Functions I added under my name are
nothing new, only optimized code of existed ideas in pure Pascal, forcing
compiler (with optimization turn on) to make almost ideal ASM code.
thank you for code ( news: XXXX@XXXXX.COM )
I splitt the functions in several libraries for better overview and so on.
Bacause i can't send attachments, here is the download-link:
www.hubert-seidel.eu/downloads/minmaxarray.zip
It based on you code, but test with 4 array-types.
Quote
Few major chages are made:

- Simplifying testing code
- Putting sleep(10) before starting test
- Random number test
- Priority set to maximum during test to allow equal testing environment
for
all functions
(priority change functions are from my SZTimer, can be found at
www.szutils.net, Delphi section)
Here my major changes:
Modified MinMaxTest6 by www.hubert-seidel.eu /.de
- Splitting into libraries
- zero, step up, step down and random test
- change out to var for compatibility D5 down to D2
(i test with D2, D3 and D5)
Currently Test7 and Test10 can't compile with D2/D3
because Length(aArray) is used
- dynamic test-list ->libs.txt <-
Quote
Results on 1 mil. random numbers test (Celeron 2.8):

1,309029s 13 9999999 Original
0,195020s 13 9999999 Original - optimized
0,266579s 13 9999999 Using pointers
0,217537s 960051513 0 Using pointers (8/loop) !!! BUG !!!
0,235024s 13 9999999 Using indices
0,257107s 13 9999999 Reading array value once
0,213859s 13 9999999 Nenad Trkulja - loop unrolling
0,198597s 13 9999999 Hubert Seidel - local min and max
0,838087s 13 9999999 Pierre le Riche - boolean trick
0,152449s 13 9999999 Trkulja/Seidel - loop unrolling and local min and
max
0,132889s 13 9999999 Gilberto Saraiva 8 ASM
0,122806s 13 9999999 Gilberto Saraiva 16 ASM
0,377528s 13 9999999 Hubert Seidel ASM
0,135966s 13 9999991 Hubert Seidel 8 PAS + ASM !!! BUG !!!
0,206225s 13 9999999 Hubert Seidel ASM V1
0,189716s 13 9999999 Hubert Seidel ASM V2
0,374263s 13 9999999 Hubert Seidel ASM V3
0,386615s 13 9999999 Hubert Seidel ASM Rollup 8
1,308774s 13 9999999 Clement Doss PAS 1
0,215284s 13 9999999 Clement Doss PAS 2
0,282143s 13 9999999 Sasa Zeman - pointers PAS
0,256864s 13 9999999 Sasa Zeman - indices (1) for PAS
0,261010s 13 9999999 Sasa Zeman - indices (2) while PAS
0,199161s 13 9999999 Sasa Zeman - indices (3) while PAS
0,144271s 13 9999999 Sasa Zeman - indices (4) while local 4 PAS
0,137221s 13 9999999 Sasa Zeman - indices (5) while local 8 PAS
0,123888s 13 9999999 Sasa Zeman - indices (6) while local 16 PAS
I fixed the bug in "Hubert Seidel 8 PAS + ASM" (i removed "+1")
Resuklts on PIII 500:
1000000 zero test
7,093170s 0 0 Original
2,566660s 0 0 Original - optimized
2,690189s 0 0 Using pointers
2,565663s 4000014 0 Using pointers (8/loop) err[100]
2,690388s 0 0 Using indices
2,819175s 0 0 Reading array value once
2,572062s 0 0 Nenad Trkulja - loop unrolling
2,566906s 0 0 Hubert Seidel - local min and max
7,219565s 0 0 Pierre le Riche - boolean trick
2,566333s 0 0 Trkulja/Seidel - loop unrolling and local min and max
2,565259s 0 0 Gilberto Saraiva 8 ASM err[7]
1,546467s 0 0 Gilberto Saraiva 16 ASM err[15]
1,115325s 0 0 Hubert Seidel ASM
1,052107s 0 0 Hubert Seidel 8 PAS + ASM
2,566583s 0 0 Hubert Seidel ASM V1
2,565895s 0 0 Hubert Seidel ASM V2
2,566376s 0 0 Hubert Seidel ASM V3
2,566293s 0 0 Hubert Seidel ASM Rollup 8
7,092724s 0 0 Clement Doss PAS 1
2,568177s 0 0 Clement Doss PAS 2
2,690229s 0 0 Sasa Zeman - pointers PAS
2,689724s 0 0 Sasa Zeman - indices (1) for PAS
2,819314s 0 0 Sasa Zeman - indices (2) while PAS
1000000 step up test
7,092395s 0 999999 Original
2,575710s 0 999999 Original - optimized
2,795169s 0 999999 Using pointers
2,567649s 29332688 0 Using pointers (8/loop) err[100]
2,691912s 0 999999 Using indices
2,947185s 0 999999 Reading array value once
3,072719s 0 999999 Nenad Trkulja - loop unrolling
2,570522s 0 999999 Hubert Seidel - local min and max
7,218081s 0 999999 Pierre le Riche - boolean trick
2,566796s 0 999999 Trkulja/Seidel - loop unrolling and local min and max
2,592084s 0 999999 Gilberto Saraiva 8 ASM err[7]
2,688290s 0 999999 Gilberto Saraiva 16 ASM err[15]
1,411727s 0 999999 Hubert Seidel ASM
0,895835s 0 999999 Hubert Seidel 8 PAS + ASM
2,572231s 0 999999 Hubert Seidel ASM V1
2,571806s 0 999999 Hubert Seidel ASM V2
2,567874s 0 999999 Hubert Seidel ASM V3
2,566216s 0 999999 Hubert Seidel ASM Rollup 8
7,092070s 0 999999 Clement Doss PAS 1
2,570038s 0 999999 Clement Doss PAS 2
2,721547s 0 999999 Sasa Zeman - pointers PAS
3,068285s 0 999999 Sasa Zeman - indices (1) for PAS
2,820808s 0 999999 Sasa Zeman - indices (2) while PAS
1000000 step down test
7,092890s -999999 0 Original
2,570853s -999999 0 Original - optimized
2,948346s -999999 0 Using pointers
2,565726s 7503436 -999999 Using pointers (8/loop) err[100]
3,063204s -999999 0 Using indices
3,070483s -999999 0 Reading array value once
3,073319s -999999 0 Nenad Trkulja - loop unrolling
2,576264s -999999 0 Hubert Seidel - local min and max
7,217981s -999999 0 Pierre le Riche - boolean trick
2,567039s -999999 0 Trkulja/Seidel - loop unrolling and local min and
max
2,566024s -999999 0 Gilberto Saraiva 8 ASM err[7]
1,859491s -999999 0 Gilberto Saraiva 16 ASM err[15]
1,115347s -999999 0 Hubert Seidel ASM
0,785001s -999999 0 Hubert Seidel 8 PAS + ASM
2,576771s -999999 0 Hubert Seidel ASM V1
2,566619s -999999 0 Hubert Seidel ASM V2
2,566368s -999999 0 Hubert Seidel ASM V3
2,566014s -999999 0 Hubert Seidel ASM Rollup 8
7,092303s -999999 0 Clement Doss PAS 1
2,568726s -999999 0 Clement Doss PAS 2
2,948634s -999999 0 Sasa Zeman - pointers PAS
2,951441s -999999 0 Sasa Zeman - indices (1) for PAS
3,074999s -999999 0 Sasa Zeman - indices (2) while PAS
1000000 random numbers test
7,092420s 1 9999988 Original
2,565828s 1 9999988 Original - optimized
2,689518s 1 9999988 Using pointers
2,565154s 1936028704 1 Using pointers (8/loop) err[100]
2,689524s 1 9999988 Using indices
2,818907s 1 9999988 Reading array value once
2,567847s 1 9999988 Nenad Trkulja - loop unrolling
2,565662s 1 9999988 Hubert Seidel - local min and max
7,218498s 1 9999988 Pierre le Riche - boolean trick
2,565023s 1 9999988 Trkulja/Seidel - loop unrolling and local min and
max
2,565435s 1 9999988 Gilberto Saraiva 8 ASM err[7]
1,548365s 1 9999988 Gilberto Saraiva 16 ASM err[15]
1,412842s 1 9999988 Hubert Seidel ASM
1,064787s 1 9999988 Hubert Seidel 8 PAS + ASM
2,565831s 1 9999988 Hubert Seidel ASM V1
2,565403s 1 9999988 Hubert Seidel ASM V2
2,568214s 1 9999988 Hubert Seidel ASM V3
2,565673s 1 9999988 Hubert Seidel ASM Rollup 8
7,092523s 1 9999988 Clement Doss PAS 1
2,567520s 1 9999988 Clement Doss PAS 2
2,689362s 1 9999988 Sasa Zeman - pointers PAS
2,689979s 1 9999988 Sasa Zeman - indices (1) for PAS
2,818905s 1 9999988 Sasa Zeman - indices (2) while PAS
mfg.
Herby
--
www.hubert-seidel.de
 

{smallsort}

Re:Re: I need the fastest routine

Hubert Seidel wrote:
Quote
I can't send with attachments :-(
Outlook Express konnte den Beitrag nicht bereitstellen. Betreff 'Re:
I need the fastest routine', Konto: 'news.t-online.de', Server:
'news.t-online.de', Protokoll: NNTP, Serverantwort: '441 437 Binary
in non-binary group ', Port: 119, Secure (SSL): Nein, Serverfehler:
441, Fehlernummer: 0x800CCCA9 )

You seem to be posting on the news server of t-online you should be
able to post attachments when you connect directly to the CodeGear news
server
--
Pieter
 

Re:Re: I need the fastest routine

Update:
"Hubert Seidel" < XXXX@XXXXX.COM >schrieb im Newsbeitrag
1000000 zero test
7,102748s 0 0 Original
2,566444s 0 0 Original - optimized
2,690325s 0 0 Using pointers
2,565527s 4000014 0 Using pointers (8/loop) err[100]
2,690412s 0 0 Using indices
2,819295s 0 0 Reading array value once
2,572471s 0 0 Nenad Trkulja - loop unrolling
2,566473s 0 0 Hubert Seidel - local min and max
7,218965s 0 0 Pierre le Riche - boolean trick
2,565781s 0 0 Trkulja/Seidel - loop unrolling and local min and max
2,565414s 0 0 Gilberto Saraiva 8 ASM err[7]
1,546369s 0 0 Gilberto Saraiva 16 ASM err[15]
1,115225s 0 0 Hubert Seidel ASM
1,052090s 0 0 Hubert Seidel 8 PAS + ASM
2,566464s 0 0 Hubert Seidel ASM V1
2,565362s 0 0 Hubert Seidel ASM V2
2,566528s 0 0 Hubert Seidel ASM V3
2,565972s 0 0 Hubert Seidel ASM Rollup 8
7,093431s 0 0 Clement Doss PAS 1
2,568016s 0 0 Clement Doss PAS 2
2,689874s 0 0 Sasa Zeman - pointers PAS
2,689800s 0 0 Sasa Zeman - indices (1) for PAS
2,823381s 0 0 Sasa Zeman - indices (2) while PAS
2,569007s 0 0 Sasa Zeman - indices (3) while PAS
2,564868s 0 0 Sasa Zeman - indices (4) while local 4 PAS ERR[99]
2,565255s 0 0 Sasa Zeman - indices (5) while local 8 PAS err[100]
1,738057s 0 0 Sasa Zeman - indices (6) while local 16 PAS err[100]
1000000 step up test
7,093093s 0 999999 Original
2,576095s 0 999999 Original - optimized
2,795489s 0 999999 Using pointers
2,567761s 29332704 0 Using pointers (8/loop) err[100]
2,691677s 0 999999 Using indices
2,947683s 0 999999 Reading array value once
3,069451s 0 999999 Nenad Trkulja - loop unrolling
2,571347s 0 999999 Hubert Seidel - local min and max
7,218614s 0 999999 Pierre le Riche - boolean trick
2,566943s 0 999999 Trkulja/Seidel - loop unrolling and local min and max
2,592080s 0 999999 Gilberto Saraiva 8 ASM err[7]
2,688291s 0 999999 Gilberto Saraiva 16 ASM err[15]
1,410791s 0 999999 Hubert Seidel ASM
0,896118s 0 999999 Hubert Seidel 8 PAS + ASM
2,572244s 0 999999 Hubert Seidel ASM V1
2,571485s 0 999999 Hubert Seidel ASM V2
2,568159s 0 999999 Hubert Seidel ASM V3
2,566707s 0 999999 Hubert Seidel ASM Rollup 8
7,093056s 0 999999 Clement Doss PAS 1
2,570183s 0 999999 Clement Doss PAS 2
2,721485s 0 999999 Sasa Zeman - pointers PAS
3,068733s 0 999999 Sasa Zeman - indices (1) for PAS
2,820990s 0 999999 Sasa Zeman - indices (2) while PAS
2,818436s 0 999999 Sasa Zeman - indices (3) while PAS
2,566127s 0 999999 Sasa Zeman - indices (4) while local 4 PAS ERR[99]
2,566121s 0 999999 Sasa Zeman - indices (5) while local 8 PAS err[100]
2,679838s 0 999999 Sasa Zeman - indices (6) while local 16 PAS err[100]
1000000 step down test
7,093432s -999999 0 Original
2,570947s -999999 0 Original - optimized
2,948684s -999999 0 Using pointers
2,565713s 7292626 -999999 Using pointers (8/loop) err[100]
3,063325s -999999 0 Using indices
3,070498s -999999 0 Reading array value once
3,078024s -999999 0 Nenad Trkulja - loop unrolling
2,575822s -999999 0 Hubert Seidel - local min and max
7,219278s -999999 0 Pierre le Riche - boolean trick
2,566944s -999999 0 Trkulja/Seidel - loop unrolling and local min and
max
2,566854s -999999 0 Gilberto Saraiva 8 ASM err[7]
1,859365s -999999 0 Gilberto Saraiva 16 ASM err[15]
1,115631s -999999 0 Hubert Seidel ASM
0,785062s -999999 0 Hubert Seidel 8 PAS + ASM
2,576785s -999999 0 Hubert Seidel ASM V1
2,566235s -999999 0 Hubert Seidel ASM V2
2,566733s -999999 0 Hubert Seidel ASM V3
2,566627s -999999 0 Hubert Seidel ASM Rollup 8
7,093052s -999999 0 Clement Doss PAS 1
2,569069s -999999 0 Clement Doss PAS 2
2,948386s -999999 0 Sasa Zeman - pointers PAS
2,948909s -999999 0 Sasa Zeman - indices (1) for PAS
3,075596s -999999 0 Sasa Zeman - indices (2) while PAS
2,658051s -999999 0 Sasa Zeman - indices (3) while PAS
2,565431s -999999 0 Sasa Zeman - indices (4) while local 4 PAS ERR[99]
2,566894s -999999 0 Sasa Zeman - indices (5) while local 8 PAS err[100]
1,803834s -999999 0 Sasa Zeman - indices (6) while local 16 PAS err[100]
1000000 random numbers test
7,093128s 4 9999991 Original
2,566270s 4 9999991 Original - optimized
2,690469s 4 9999991 Using pointers
2,565803s 1936028704 1 Using pointers (8/loop) err[100]
2,690444s 4 9999991 Using indices
2,819783s 4 9999991 Reading array value once
2,568683s 4 9999991 Nenad Trkulja - loop unrolling
2,566701s 4 9999991 Hubert Seidel - local min and max
7,219323s 4 9999991 Pierre le Riche - boolean trick
2,565796s 4 9999991 Trkulja/Seidel - loop unrolling and local min and
max
2,566250s 4 9999991 Gilberto Saraiva 8 ASM err[7]
1,548898s 4 9999991 Gilberto Saraiva 16 ASM err[15]
1,409447s 4 9999991 Hubert Seidel ASM
1,064731s 4 9999991 Hubert Seidel 8 PAS + ASM
2,566945s 4 9999991 Hubert Seidel ASM V1
2,566666s 4 9999991 Hubert Seidel ASM V2
2,568260s 4 9999991 Hubert Seidel ASM V3
2,566547s 4 9999991 Hubert Seidel ASM Rollup 8
7,096827s 4 9999991 Clement Doss PAS 1
2,568431s 4 9999991 Clement Doss PAS 2
2,690517s 4 9999991 Sasa Zeman - pointers PAS
2,689937s 4 9999991 Sasa Zeman - indices (1) for PAS
2,819498s 4 9999991 Sasa Zeman - indices (2) while PAS
2,569263s 4 9999991 Sasa Zeman - indices (3) while PAS
2,565807s 4 9999991 Sasa Zeman - indices (4) while local 4 PAS ERR[99]
2,566909s 4 9999991 Sasa Zeman - indices (5) while local 8 PAS err[100]
1,738044s 4 9999991 Sasa Zeman - indices (6) while local 16 PAS err[100]
if err[n] at the end of the line, the function don't work correctly by
array-size n.
(it will test 1 to 100 at every start)
Only "Gilberto Saraiva 8 ASM" and "Gilberto Saraiva 16 ASM" are known
"error".
(You had better if all functions give always the right results)
mfg.
Herby
--
www.hubert-seidel.de
 

Re:Re: I need the fastest routine

Philipp Pammler wrote:
Quote
i unno if you still need sum numbers :)
Thank you. It is interesting only informative. I have no time for further
development nor to improve functions.
Quote
started from outside the ide
cpu: 4*2.50 GHz (Q9300)
ram: 8192 MB (DDR2 800MHz)
os: vista64 business sp1
This is with or witout maximum priority set (functions from SZTimer)?
If test is provided with maximim priority, problem can be in measuring time
for any function, especially outside (the first and the last).
If compiled and run with setting maximum priority, diffrence is probably
related with different CPUs. BTW, I compiled it with D7, started outside
the ide and os is W2K. Results are prosted in BASM group.
 

Re:Re: I need the fastest routine

Sasa Zeman wrote:
Quote
MinMaxTest6.dpr
Thanks for that, but it fails to compile - it needs SZtimer. It would be
better to include this in the Zip file rather than make people go and
chase around for it.
W h y i s i t a l l d o u b l e - s p a c e d v e r t i ca l l
y?
David
 

Re:Re: I need the fastest routine

Sasa Zeman wrote:
Quote
David J Taylor wrote:

>Thanks for that, but it fails to compile - it needs SZtimer. It
>would be better to include this in the Zip file rather than make
>people go and chase around for it.

You are welcome.

I simply have had no time for that, but have nothing agains to be
included in the zip. I simply use SZTimer all the time when need
small interval benchmarking.

>W h y i s i t a l l d o u b l e - s p a c e d v e r t i ca l l
>y?

It seems the source was vasted by KNode parser (Linux news reader
clien).
Thanks, Sasa. I did get the extra unit, and the source code compiles and
runs correctly. Sorry to hear about the Linux newsreader problems.
Cheers,
David
 

Re:Re: I need the fastest routine

Hubert Seidel wrote:
Quote
thank you for code ( news: XXXX@XXXXX.COM )
You are welcome.
Quote
I splitt the functions in several libraries for better overview and so on.
I suggest to avoid dlls and only include separated files instead (unique
name for each testing function). Thus also can be created an existed
function pointer list to avoid manually starting each test.
Quote
2,565807s 4 9999991 Sasa Zeman - indices (4) while local 4 PAS ERR[99]
2,566909s 4 9999991 Sasa Zeman - indices (5) while local 8 PAS
err[100]
1,738044s 4 9999991 Sasa Zeman - indices (6) while local 16 PAS
err[100]
It seems I posted an old, incorrect version. Since it uses local vMin
and vMax values, thus last loop makes the problem in upper functions:
if aArray[i]>aMax then aMax := aArray[i];
if aArray[i] < aMin then aMin := aArray[i];
Should be:
if aArray[i]>vMax then vMax := aArray[i];
if aArray[i] < vMin then vMin := aArray[i];
Please change it.
Also you can change sleep(100) to sleep(10).
Quote
if err[n] at the end of the line, the function don't work correctly by
array-size n.
(it will test 1 to 100 at every start)
Covering all different cases is very good idea.
 

Re:Re: I need the fastest routine

Hubert Seidel wrote:
Quote
if err[n] at the end of the line, the function don't work correctly by
array-size n.
(it will test 1 to 100 at every start)
Only "Gilberto Saraiva 8 ASM" and "Gilberto Saraiva 16 ASM" are known
"error".
(You had better if all functions give always the right results)
Hello Hubert,
very good test you have made, thanks,
I'll see what can be done to fix that as soon as possible.
^^
--
[]'s
Gilberto Saraiva
gsaraiva.projects.pro.br/
 

Re:Re: I need the fastest routine

Quote
2,565807s 4 9999991 Sasa Zeman - indices (4) while local 4 PAS ERR[99]
2,566909s 4 9999991 Sasa Zeman - indices (5) while local 8 PAS err[100]
1,738044s 4 9999991 Sasa Zeman - indices (6) while local 16 PAS err[100]
As well:
i:=0;
while i <= l1 do
Should be:
i:=0;
while i < l1 do
 

Re:Re: I need the fastest routine

David J Taylor wrote:
Quote
Thanks for that, but it fails to compile - it needs SZtimer.  It would be
better to include this in the Zip file rather than make people go and
chase around for it.
You are welcome.
I simply have had no time for that, but have nothing agains to be included
in the zip. I simply use SZTimer all the time when need small interval
benchmarking.
?
Quote
W h y ?i s ?i t ?a l l ?d o u b l e ?  s p a c e d ?v e r t i ca l l
y?
It seems the source was vasted by KNode parser (Linux news reader clien).
 

Re:Re: I need the fastest routine

Quote
Hubert Seidel wrote:

>if err[n] at the end of the line, the function don't work correctly
>by array-size n.
>(it will test 1 to 100 at every start)
>Only "Gilberto Saraiva 8 ASM" and "Gilberto Saraiva 16 ASM" are
>known "error".
>(You had better if all functions give always the right results)
I think now the code run for all arrays ranges, but 16 steps per loop
overhead the process for sequential calls and can be a bad code. I
guess 8steps are the most powerfull for any situations.
procedure MinMaxArray8steps(const AArray: array of Integer;
out AMax, AMin: integer);
asm
cmp edx,0
jl @exit
mov [ebp-$0C],edx
mov [ebp-$08],ecx
mov [ebp-$04],eax
shl edx,$02
cmp [ebp-$0C],$07
jl @restInit
sub edx,$1C
@restInit:
add [ebp-$04],edx
mov ecx,0
mov edx,MaxInt
cmp [ebp-$0C],$07
jl @restLoop
@loop:
cmp ecx,[eax+$00] jl @parseMax
cmp ecx,[eax+$04] jl @parseMax
cmp ecx,[eax+$08] jl @parseMax
cmp ecx,[eax+$0C] jl @parseMax
cmp ecx,[eax+$10] jl @parseMax
cmp ecx,[eax+$14] jl @parseMax
cmp ecx,[eax+$18] jl @parseMax
cmp ecx,[eax+$1C] jl @parseMax
jmp @checkMin
@parseMax:
cmp ecx,[eax+$00] cmovl ecx,[eax+$00]
cmp ecx,[eax+$04] cmovl ecx,[eax+$04]
cmp ecx,[eax+$08] cmovl ecx,[eax+$08]
cmp ecx,[eax+$0C] cmovl ecx,[eax+$0C]
cmp ecx,[eax+$10] cmovl ecx,[eax+$10]
cmp ecx,[eax+$14] cmovl ecx,[eax+$14]
cmp ecx,[eax+$18] cmovl ecx,[eax+$18]
cmp ecx,[eax+$1C] cmovl ecx,[eax+$1C]
@checkMin:
cmp edx,[eax+$00] jg @parseMin
cmp edx,[eax+$04] jg @parseMin
cmp edx,[eax+$08] jg @parseMin
cmp edx,[eax+$0C] jg @parseMin
cmp edx,[eax+$10] jg @parseMin
cmp edx,[eax+$14] jg @parseMin
cmp edx,[eax+$18] jg @parseMin
cmp edx,[eax+$1C] jg @parseMin
jmp @loopfooter
@parseMin:
cmp edx,[eax+$00] cmovg edx,[eax+$00]
cmp edx,[eax+$04] cmovg edx,[eax+$04]
cmp edx,[eax+$08] cmovg edx,[eax+$08]
cmp edx,[eax+$0C] cmovg edx,[eax+$0C]
cmp edx,[eax+$10] cmovg edx,[eax+$10]
cmp edx,[eax+$14] cmovg edx,[eax+$14]
cmp edx,[eax+$18] cmovg edx,[eax+$18]
cmp edx,[eax+$1C] cmovg edx,[eax+$1C]
@loopfooter:
add eax,$20
cmp eax,[ebp-$04]
jl @loop
sub eax,$20
@restLoop:
cmp ecx,[eax] cmovl ecx,[eax]
cmp edx,[eax] cmovg edx,[eax]
add eax,$04
cmp eax,[ebp-$04]
jng @restLoop
push edx
mov edx,ecx
mov ecx,[ebp-$08]
mov [ecx],edx
pop edx
mov ecx,edx
mov edx,[esp+$08]
mov [edx],ecx
@exit:
end;
procedure MinMaxArray16steps(const AArray: array of Integer;
out AMax, AMin: integer);
asm
cmp edx,0
jl @exit
mov [ebp-$0C],edx
mov [ebp-$08],ecx
mov [ebp-$04],eax
shl edx,$02
cmp [ebp-$0C],$0F
jl @restInit
sub edx,$3C
@restInit:
add [ebp-$04],edx
mov ecx,0
mov edx,MaxInt
cmp [ebp-$0C],$0F
jl @restLoop
@loop:
cmp ecx,[eax+$00] jl @parseMax
cmp ecx,[eax+$04] jl @parseMax
cmp ecx,[eax+$08] jl @parseMax
cmp ecx,[eax+$0C] jl @parseMax
cmp ecx,[eax+$10] jl @parseMax
cmp ecx,[eax+$14] jl @parseMax
cmp ecx,[eax+$18] jl @parseMax
cmp ecx,[eax+$1C] jl @parseMax
cmp ecx,[eax+$20] jl @parseMax
cmp ecx,[eax+$24] jl @parseMax
cmp ecx,[eax+$28] jl @parseMax
cmp ecx,[eax+$2C] jl @parseMax
cmp ecx,[eax+$30] jl @parseMax
cmp ecx,[eax+$34] jl @parseMax
cmp ecx,[eax+$38] jl @parseMax
cmp ecx,[eax+$3C] jl @parseMax
jmp @checkMin
@parseMax:
cmp ecx,[eax+$00] cmovl ecx,[eax+$00]
cmp ecx,[eax+$04] cmovl ecx,[eax+$04]
cmp ecx,[eax+$08] cmovl ecx,[eax+$08]
cmp ecx,[eax+$0C] cmovl ecx,[eax+$0C]
cmp ecx,[eax+$10] cmovl ecx,[eax+$10]
cmp ecx,[eax+$14] cmovl ecx,[eax+$14]
cmp ecx,[eax+$18] cmovl ecx,[eax+$18]
cmp ecx,[eax+$1C] cmovl ecx,[eax+$1C]
cmp ecx,[eax+$20] cmovl ecx,[eax+$20]
cmp ecx,[eax+$24] cmovl ecx,[eax+$24]
cmp ecx,[eax+$28] cmovl ecx,[eax+$28]
cmp ecx,[eax+$2C] cmovl ecx,[eax+$2C]
cmp ecx,[eax+$30] cmovl ecx,[eax+$30]
cmp ecx,[eax+$34] cmovl ecx,[eax+$34]
cmp ecx,[eax+$38] cmovl ecx,[eax+$38]
cmp ecx,[eax+$3C] cmovl ecx,[eax+$3C]
@checkMin:
cmp edx,[eax+$00] jg @parseMin
cmp edx,[eax+$04] jg @parseMin
cmp edx,[eax+$08] jg @parseMin
cmp edx,[eax+$0C] jg @parseMin
cmp edx,[eax+$10] jg @parseMin
cmp edx,[eax+$14] jg @parseMin
cmp edx,[eax+$18] jg @parseMin
cmp edx,[eax+$1C] jg @parseMin
cmp edx,[eax+$20] jg @parseMin
cmp edx,[eax+$24] jg @parseMin
cmp edx,[eax+$28] jg @parseMin
cmp edx,[eax+$2C] jg @parseMin
cmp edx,[eax+$30] jg @parseMin
cmp edx,[eax+$34] jg @parseMin
cmp edx,[eax+$38] jg @parseMin
cmp edx,[eax+$3C] jg @parseMin
jmp @loopfooter
@parseMin:
cmp edx,[eax+$00] cmovg edx,[eax+$00]
cmp edx,[eax+$04] cmovg edx,[eax+$04]
cmp edx,[eax+$08] cmovg edx,[eax+$08]
cmp edx,[eax+$0C] cmovg edx,[eax+$0C]
cmp edx,[eax+$10] cmovg edx,[eax+$10]
cmp edx,[eax+$14] cmovg edx,[eax+$14]
cmp edx,[eax+$18] cmovg edx,[eax+$18]
cmp edx,[eax+$1C] cmovg edx,[eax+$1C]
cmp edx,[eax+$20] cmovg edx,[eax+$20]
cmp edx,[eax+$24] cmovg edx,[eax+$24]
cmp edx,[eax+$28] cmovg edx,[eax+$28]
cmp edx,[eax+$2C] cmovg edx,[eax+$2C]
cmp edx,[eax+$30] cmovg edx,[eax+$30]
cmp edx,[eax+$34] cmovg edx,[eax+$34]
cmp edx,[eax+$38] cmovg edx,[eax+$38]
cmp edx,[eax+$3C] cmovg edx,[eax+$3C]
@loopfooter:
add eax,$40
cmp eax,[ebp-$04]
jl @loop
sub eax,$40
@restLoop:
cmp ecx,[eax] cmovl ecx,[eax]
cmp edx,[eax] cmovg edx,[eax]
add eax,$04
cmp eax,[ebp-$04]
jng @restLoop
push edx
mov edx,ecx
mov ecx,[ebp-$08]
mov [ecx],edx
pop edx
mov ecx,edx
mov edx,[esp+$08]
mov [edx],ecx
@exit:
end;
--
[]'s
Gilberto Saraiva
gsaraiva.projects.pro.br/
 

Re:Re: I need the fastest routine

Hello Sasa,
"Sasa Zeman" < XXXX@XXXXX.COM >schrieb im Newsbeitrag
Quote
>2,565807s 4 9999991 Sasa Zeman - indices (4) while local 4 PAS ERR[99]
>2,566909s 4 9999991 Sasa Zeman - indices (5) while local 8 PAS
err[100]
>1,738044s 4 9999991 Sasa Zeman - indices (6) while local 16 PAS
err[100]

As well:
i:=0;
while i <= l1 do

Should be:
i:=0;
while i < l1 do
sorry, but after this change it isn't better:
2,565688s 1 9999989 Sasa Zeman - indices (4) while local 4 PAS ERR[100]
2,567285s 1 9999989 Sasa Zeman - indices (5) while local 8 PAS ERR[100]
1,738547s 1 9999989 Sasa Zeman - indices (6) while local 16 PAS ERR[100]
mfg.
Herby
www.hubert-seidel.de
 

Re:Re: I need the fastest routine

Hello Stig,
"Stig Johansen" < XXXX@XXXXX.COM >schrieb im Newsbeitrag
Quote
"Sasa Zeman" < XXXX@XXXXX.COM >wrote in message
news: XXXX@XXXXX.COM ...
>0,217537s 960051513 0 Using pointers (8/loop) !!! BUG !!!
BUG !! - we can't have that.
...
IF PIa^[I]>iMax THEN
iMax := PIa^[I];
end;
aMax := iMin;
aMin := iMax;
end;
..............................
This version is also little buggy, but i found the mistake
and fix them (Test28/Using pointers (16/loop)):
aMax := iMax;
aMin := iMin;
this works fine!
(you can find the result for this code at "Using pointers (16/loop)")
Here are the latest test-results (PIII 500):
1000000 zero test
7,094087s 0 0 Original
2,567256s 0 0 Original - optimized
2,691150s 0 0 Using pointers
2,567331s 4000014 0 Using pointers (8/loop) err[100]
2,690514s 0 0 Using indices
2,818878s 0 0 Reading array value once
2,572338s 0 0 Nenad Trkulja - loop unrolling
2,566876s 0 0 Hubert Seidel - local min and max
7,219392s 0 0 Pierre le Riche - boolean trick
2,566428s 0 0 Trkulja/Seidel - loop unrolling and local min and max
2,572008s 0 0 Gilberto Saraiva 8 ASM err[7]
1,546508s 0 0 Gilberto Saraiva 16 ASM err[15]
1,115688s 0 0 Hubert Seidel ASM
1,052188s 0 0 Hubert Seidel 8 PAS + ASM
2,566636s 0 0 Hubert Seidel ASM V1
2,565747s 0 0 Hubert Seidel ASM V2
2,567003s 0 0 Hubert Seidel ASM V3
2,566405s 0 0 Hubert Seidel ASM Rollup 8
7,092683s 0 0 Clement Doss PAS 1
2,568045s 0 0 Clement Doss PAS 2
2,689924s 0 0 Sasa Zeman - pointers PAS
2,690525s 0 0 Sasa Zeman - indices (1) for PAS
2,819353s 0 0 Sasa Zeman - indices (2) while PAS
2,568586s 0 0 Sasa Zeman - indices (3) while PAS
2,565917s 0 0 Sasa Zeman - indices (4) while local 4 PAS ERR[100]
2,565427s 0 0 Sasa Zeman - indices (5) while local 8 PAS ERR[100]
1,738126s 0 0 Sasa Zeman - indices (6) while local 16 PAS ERR[100]
1,546470s 0 0 Using pointers (16/loop)
1000000 step up test
7,092537s 0 999999 Original
2,575568s 0 999999 Original - optimized
2,794888s 0 999999 Using pointers
2,567521s 29332704 0 Using pointers (8/loop) err[100]
2,690970s 0 999999 Using indices
2,947357s 0 999999 Reading array value once
3,068873s 0 999999 Nenad Trkulja - loop unrolling
2,570664s 0 999999 Hubert Seidel - local min and max
7,218536s 0 999999 Pierre le Riche - boolean trick
2,566987s 0 999999 Trkulja/Seidel - loop unrolling and local min and max
2,591244s 0 999999 Gilberto Saraiva 8 ASM err[7]
2,688183s 0 999999 Gilberto Saraiva 16 ASM err[15]
1,411768s 0 999999 Hubert Seidel ASM
0,896376s 0 999999 Hubert Seidel 8 PAS + ASM
2,572246s 0 999999 Hubert Seidel ASM V1
2,571414s 0 999999 Hubert Seidel ASM V2
2,567900s 0 999999 Hubert Seidel ASM V3
2,566636s 0 999999 Hubert Seidel ASM Rollup 8
7,091933s 0 999999 Clement Doss PAS 1
2,569673s 0 999999 Clement Doss PAS 2
2,720919s 0 999999 Sasa Zeman - pointers PAS
3,068359s 0 999999 Sasa Zeman - indices (1) for PAS
2,820381s 0 999999 Sasa Zeman - indices (2) while PAS
2,817882s 0 999999 Sasa Zeman - indices (3) while PAS
2,576657s 0 999995 Sasa Zeman - indices (4) while local 4 PAS ERR[100]
2,565242s 0 999991 Sasa Zeman - indices (5) while local 8 PAS ERR[100]
2,680530s 0 999983 Sasa Zeman - indices (6) while local 16 PAS ERR[100]
2,315212s 0 999999 Using pointers (16/loop)
1000000 step down test
7,093732s -999999 0 Original
2,571139s -999999 0 Original - optimized
2,949233s -999999 0 Using pointers
2,566151s 9995562 -999999 Using pointers (8/loop) err[100]
3,064079s -999999 0 Using indices
3,070529s -999999 0 Reading array value once
3,075098s -999999 0 Nenad Trkulja - loop unrolling
2,576464s -999999 0 Hubert Seidel - local min and max
7,220073s -999999 0 Pierre le Riche - boolean trick
2,567230s -999999 0 Trkulja/Seidel - loop unrolling and local min and
max
2,567215s -999999 0 Gilberto Saraiva 8 ASM err[7]
1,860273s -999999 0 Gilberto Saraiva 16 ASM err[15]
1,115463s -999999 0 Hubert Seidel ASM
0,785151s -999999 0 Hubert Seidel 8 PAS + ASM
2,577515s -999999 0 Hubert Seidel ASM V1
2,567811s -999999 0 Hubert Seidel ASM V2
2,567038s -999999 0 Hubert Seidel ASM V3
2,568046s -999999 0 Hubert Seidel ASM Rollup 8
7,093718s -999999 0 Clement Doss PAS 1
2,569505s -999999 0 Clement Doss PAS 2
2,949095s -999999 0 Sasa Zeman - pointers PAS
2,948932s -999999 0 Sasa Zeman - indices (1) for PAS
3,075656s -999999 0 Sasa Zeman - indices (2) while PAS
2,657536s -999999 0 Sasa Zeman - indices (3) while PAS
2,565744s -999995 0 Sasa Zeman - indices (4) while local 4 PAS ERR[100]
2,567140s -999991 0 Sasa Zeman - indices (5) while local 8 PAS ERR[100]
1,804088s -999983 0 Sasa Zeman - indices (6) while local 16 PAS ERR[100]
1,671787s -999999 0 Using pointers (16/loop)
1000000 random numbers test
7,092953s 7 9999983 Original
2,566348s 7 9999983 Original - optimized
2,689810s 7 9999983 Using pointers
2,565887s 1936028704 1 Using pointers (8/loop) err[100]
2,690485s 7 9999983 Using indices
2,819341s 7 9999983 Reading array value once
2,568643s 7 9999983 Nenad Trkulja - loop unrolling
2,566842s 7 9999983 Hubert Seidel - local min and max
7,229466s 7 9999983 Pierre le Riche - boolean trick
2,565998s 7 9999983 Trkulja/Seidel - loop unrolling and local min and
max
2,565791s 7 9999983 Gilberto Saraiva 8 ASM err[7]
1,548736s 7 9999983 Gilberto Saraiva 16 ASM err[15]
1,409777s 7 9999983 Hubert Seidel ASM
1,064794s 7 9999983 Hubert Seidel 8 PAS + ASM
2,566759s 7 9999983 Hubert Seidel ASM V1
2,566054s 7 9999983 Hubert Seidel ASM V2
2,567975s 7 9999983 Hubert Seidel ASM V3
2,566388s 7 9999983 Hubert Seidel ASM Rollup 8
7,092997s 7 9999983 Clement Doss PAS 1
2,567977s 7 9999983 Clement Doss PAS 2
2,690408s 7 9999983 Sasa Zeman - pointers PAS
2,690195s 7 9999983 Sasa Zeman - indices (1) for PAS
2,819143s 7 9999983 Sasa Zeman - indices (2) while PAS
2,569021s 7 9999983 Sasa Zeman - indices (3) while PAS
2,565611s 7 9999983 Sasa Zeman - indices (4) while local 4 PAS ERR[100]
2,566667s 7 9999983 Sasa Zeman - indices (5) while local 8 PAS ERR[100]
1,738324s 7 9999983 Sasa Zeman - indices (6) while local 16 PAS ERR[100]
1,549664s 7 9999983 Using pointers (16/loop)
you can download the latest sources version from:
www.hubert-seidel.eu/downloads/minmaxarray.zip
mfg.
Herby
--
www.hubert-seidel.de