Using Auto-parallelization

Autoparallelization enables you to automatically detect different code sequences which can be correctly and profitably executed concurrently, in separate threads. For example, the code in the first row of the following example can be computed in two threads:

do i=1,100
 a(i) = a(i) + b(i) * c(i)
enddo

Thread 1

do i=1,50
 a(i) = a(i) + b(i) * c(i)
enddo

Thread 2

do i=50,100
 a(i) = a(i) + b(i) * c(i)
enddo

Auto-parallelization Examples

Use auto-parallelization for the subroutine daxpy.

Subroutine daxpy

subroutine daxpy
 parameter (N=1000)
 double precision a(N), b(N), s
   do i=1,N
      a(i) = a(i) * s + b(i)
   enddo
end

 

Parent Thread Code

subroutine daxpy
 parameter (N=1000)
 double precision a(N), b(N), s
 external daxpy_par_loop0, kmpc_ok_to_fork
 logical kmpc_ok_to_fork
   if ( kmpc_ok_to_fork() .eqv. .true. ) then
         call kmpc_fork_call( daxpy_par_loop0, a, b, s )
   else
         call kmpc_serialized_parallel( kmpc_tid )
             call daxpy_par_loop0( kmpc_tid, a, b, s )
         call kmpc_end_serialized_parallel( kmpc_tid )
   endif
end

In the above example, work threshold and runtime dependency tests are combined in this if statement:
if ( kmpc_ok_to_fork() .eqv. .true. )

Child Thread Code

subroutine daxpy(kmpc_tid, a, b, s)
 parameter (N=1000)
 double precision a(N), b(N), s
 integer lb, ub, inc
   call kmpc_for_static_init( kmpc_tid, lb, ub, inc )
     do i=lb,ub,inc
        a(i) = a(i) * s + b(i)
     enddo
   call kmpc_for_static_fini( kmpc_tid )
end

 

Using /Qpar_report

subroutine daxpy()
 parameter (N=1000)
 external foo
 integer, pointer :: q(:)
C Assumed side effects
   do i=1,N
      q(i) = q(i) + foo()
   enddo
C Real dependency
   do i=1,N
      q(i) = q(i-1) + i
   enddo
end

 

Using Auto-parallelization Directive

subroutine daxpy
 parameter (N=1000)
 double precision a(N)
 integer x(N)
!DIR$ PARALLEL
  do i=1,N
     a(i) = a(x(i)) * 2
  enddo
end