; Copyright (C) 2000 Jens Franke
; This file is part of mpqs4linux, distributed under the terms of the 
; GNU General Public Licence and WITHOUT ANY WARRANTY.

; You should have received a copy of the GNU General Public License along
; with this program; see the file COPYING.  If not, write to the Free
; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
; 02111-1307, USA.

; Modified for NASM or YASM using Intel Syntax integrated with Microsoft
; VC++ 7.1 by Brian Gladman in April 2004.  Assembler commands:
;
; nasm -Xvc -f win32 -O3 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"

; The function we want to write.
; void asm_sieve(FB_ptr,sieve_interval,ub,log_ptr,root_ptr,max_prime)

; List of stack arguments

%define FB_ptr				[ebp+ 8]
%define sieve_interval		[ebp+12]
%define ub					[ebp+16]
%define log_ptr				[ebp+20]
%define root_ptr			[ebp+24]
%define max_prime			[ebp+28]

%define sieve_ub			[ebp- 4]
%define sieve_ub1			[ebp- 8]
%define prime_bound			[ebp-12]

stack_offset	equ		12
root_size		equ		 4
root_inc		equ	root_size + root_size

%macro SwitchRegs 0
%if	CNTD == 1
%define targ0	ecx
%define targ1	edx
%define	targ2	edi
%define targ3	esi
%assign	CNTD	  0
%else
%define targ2	ecx
%define targ3	edx
%define targ0	edi
%define targ1	esi
%assign CNTD	  1
%endif
%endmacro

%macro InitRegs 0
%assign	CNTD	0
	SwitchRegs
%endmacro

%macro	Sieve2 0
	add		[targ0],al
	lea		targ2,[targ0+ebx*2]
	lea		targ3,[targ1+ebx*2]
	add		[targ1],ah
	add		[targ0+ebx],al
	add		[targ1+ebx],ah
	SwitchRegs
%endmacro

%macro Sieve1 0
	add		[targ0],al
	lea		targ2,[targ0+ebx]
	lea		targ3,[targ1+ebx]
	add		[targ1],ah
	SwitchRegs
%endmacro

%macro New_Roots 3
	mov		ebx,root_ptr
	mov		eax,sieve_ub
	sub		%1,eax
	sub		%2,eax
	mov		[ebx],%1
	lea		eax,[root_inc+ebx]
	mov		[root_size+ebx],%2
	mov		root_ptr,eax
	jmp		%3
%endmacro

%macro Finish_Sieving 1
	cmp		sieve_ub,targ0
	jbe		%%1
	add		[targ0],al
	lea		targ2,[targ0+ebx]
	cmp		sieve_ub,targ1
	jbe		%%2
	add		[targ1],ah
	lea		targ3,[targ1+ebx]
	New_Roots	targ2, targ3, %1
%%1:New_Roots	targ0, targ1, %1
%%2:New_Roots	targ1, targ2, %1
%endmacro

; Write assembly code which does the following:
; We start each outer loop with a pointer, located in FB_ptr, to a prime P in
; the factorbase. If CountA=CountA_Max-1, the code tests the condition
; P<ub/CountA_MAX. If this condition holds, a loop is entered
; which does the sieving for this prime. Otherwise, we jump
; to assembly code written for CountA-1.
;
; For CountA<CountA_Max, the behavior is very much the same. However,
; the sieving is implemented as a fixed sequence of instructions, rather
; than by a loop. This fixed sequence of instructions works if
; ub/(CountA+1)<P<ub/CountA.
;
; The case CountA=0, corresponding to primes P with
;              ub <= P <= infinity,
; is not coded as a macro because it requires somewhat different,
; consideration, both from the point of view of ensuring correct
; behaviour and from the point of view of code optimization.

%macro Sieve_With_Some_FB_Primes 0
	mov		eax,ub
	xor		edx,edx
	mov		ecx,CountA
	div		ecx
	xor		ecx,ecx
	test	edx,edx
	setnz	cl
	add		eax,ecx

; If the divison produced a remainder, this means that the CountA-fold
; of the previous value of %eax was somewhat less than the subsieve size.

	mov		prime_bound,eax
%%1:mov		esi,FB_ptr			; Load pointer into FB
	mov		edi,log_ptr			; Load pointer into log table
	mov		ebx,[esi]			; Load prime P
	lea		ecx,[esi+4]			; Prepare replacing FBptr by next value
	cmp		prime_bound,ebx		; Does P satisfy our condition?
	mov		al,[edi]			; Log value --> al
	jbe		%%6					; If P is to large, goto next label
	mov		FB_ptr,ecx			; actually replace FBptr by next value
	inc		edi					; prepare incrementing the pointer into the log table
	mov		ecx,root_ptr		; points to offset of first root from sieve start
	mov		log_ptr,edi			; Actually increment pointer into the log table
	mov		edi,[ecx]			; now edi= offset of first root
	mov		esi,[root_size+ecx]	; now edi= offset of second root
	lea		edx,[root_inc+ecx]	; These two instructions make root_ptr
	xor		ah,ah
	cmp		edi,esi
	mov		ecx,sieve_interval
	sete	ah
	add		edi,ecx				; Shift both roots into the memory
	dec		ah
	add		esi,ecx				; that was reserved for sieving
	and		ah,al
	InitRegs

%if CountA == CountA_UB - 1

; Code the sieving with respect to the small primes as a loop.
; Note that havoc will result if CountA_UB<5.

	lea		ecx,[ebx+ebx*2]
	mov		edx,sieve_ub
	sub		edx,ecx
	mov		sieve_ub1,edx
%%2:Sieve2
	Sieve2
	cmp		sieve_ub1,targ1
	ja		%%2
	cmp		sieve_ub,targ1
	ja		%%3
	Finish_Sieving %%1
%%3:Sieve1
	cmp		sieve_ub,targ1
	ja		%%4
	Finish_Sieving %%1
%%4:Sieve1
	cmp		sieve_ub,targ1
	ja		%%5
	Finish_Sieving %%1
%%5:Sieve1

%else

%rep CountB		; Not considering a very small prime. Create code
	Sieve2		; for a fixed number of repetitions.
%endrep

%if CountA_odd == 1 
	Sieve1
%endif

%endif
	Finish_Sieving %%1

%if CountA_odd == 1 
%assign CountA_odd	0
%else
%assign CountB		CountB - 1
%assign CountA_odd	1
%endif
%assign CountA	CountA - 1

%%6:

%endmacro

%assign CountA_UB	5
%assign CountA		4
%assign CountB		2
%assign CountA_odd	0
%assign CountC		1

	text
	align	4
	global	_asm_sieve

_asm_sieve:
	push	ebp
	mov		ebp,esp
	sub		esp,stack_offset
	pushad
	mov		eax,sieve_interval
	mov		ebx,ub
	add		ebx,eax
	mov		sieve_ub,ebx

	Sieve_With_Some_FB_Primes
	Sieve_With_Some_FB_Primes
	Sieve_With_Some_FB_Primes
	Sieve_With_Some_FB_Primes

; Finally, the primes which are larger than the subsieve size.
; We assume that in most cases the prime is not used for sieving
; and store the following things in registers:
; FB_ptr:		ebx
; root_ptr:		esi
; root2_ptr:	edi

	mov		ebx,FB_ptr
	mov		esi,root_ptr
	mov		edi,esi
.1:	mov		eax,[ebx]
	cmp		max_prime,eax
	jbe		.4
	mov		ecx,[esi]
	lea		ebx,[ebx+4]
	sub		ecx,ub
	mov		edx,[root_size+esi]
	jb		.2
	sub		edx,ub
	inc		dword log_ptr
	mov		[edi],ecx
	lea		esi,[root_inc+esi]
	mov		[root_size+edi],edx
	lea		edi,[root_inc+edi]
	jmp		.1
.2:	mov		eax,log_ptr
	inc		dword log_ptr
	mov		edx,ecx
	mov		al,[eax]
	add		ecx,sieve_ub
	add		edx,[ebx-4]
	add		[ecx],al
	mov		ecx,[root_size+esi]
	sub		ecx,ub
	lea		esi,[root_inc+esi]
	jb		.3
	mov		[root_size+edi],edx
	mov		[edi],ecx
	lea		edi,[root_inc+edi]
	jmp		.1
.3:	mov		[edi],edx
	mov		edx,ecx
	add		ecx,sieve_ub
	add		edx,[ebx-4]
	add		[ecx],al
	mov		[root_size+edi],edx
	lea		edi,[root_inc+edi]
	jmp		.1
.4:	popad
	mov		esp,ebp
	pop		ebp
	ret
	
	end
