program test

  implicit none

! Show cache influence on code performances
! "good" loop sums matrixes in column order (best)
! "bad" loop sums matrixes in row order

! module load intel
! ifort 1-loop-interch.f90 -O0 -g -o loop-interch
! ifort 1-loop-interch.f90 -o loop-interch
! ifort 1-loop-interch.f90 -O3 -o loop-interch
! ./loop-interch
! time ./loop-interch

! ifort 1-loop-interch.f90 -O0 -g -pg -o loop-interch
! gprof ./loop-interch gmon.out > loop.gprof
! gprof -l ./loop-interch gmon.out > loop.gprof

! #########################################################

! HOMEPC
! gfortran 1-loop-interch.f90 -O0 -g -o loop-interch
! gfortran 1-loop-interch.f90 -o loop-interch
! gfortran 1-loop-interch.f90 -O3 -o loop-interch
! time ./loop-interch

! gfortran 1-loop-interch.f90 -O0 -g -pg -o loop-interch
! gprof ./loop-interch gmon.out > loop.gprof
! gprof -l ./loop-interch gmon.out > loop.gprof
  real, allocatable :: a(:,:), b(:,:), c(:,:)
  real alpha 
  integer i, j, dim, niter, t
  integer t1, t2, tr,nmax
  dim = 7000
  niter = 2
  alpha = sqrt(1.)

  allocate(a(dim, dim))
  allocate(b(dim, dim))
  allocate(c(dim, dim))

  a = 0.
  b = 1.
  c = 2.

!!!!!!!!!!!!!!!!!!!!!!!!! GOOD START !!!!!!!!!!!!!!!!!!!!!!!

  write(*,*) "bad start - index=i,j"
  call system_clock(t1, tr,nmax)

  do t=1, niter

     do i=1, dim
        do j=1, dim
           a(i,j) = a(i,j) + b(i,j) + c(i,j) + alpha
        enddo
     enddo

  enddo
  call system_clock(t2)
    write(*,*) "bad end - time ", real(t2-t1)/real(tr)
!!!!!!!!!!!!!!!!!!!!!!!!! GOOD END !!!!!!!!!!!!!!!!!!!!!!!
  if(alpha < -1) write(*,*) a, b, c

  a = 0.
  b = 1.
  c = 2.
!!!!!!!!!!!!!!!!!!!!!!!!! BAD START !!!!!!!!!!!!!!!!!!!!!!!
   write(*,*) "good start - loppo interchange"
  call system_clock(t1, tr,nmax)

  do t=1, niter

     do j=1, dim
        do i=1, dim
           a(i,j) = a(i,j) + b(i,j) + c(i,j) + alpha
        enddo
     enddo

  enddo
  call system_clock(t2)
    write(*,*) "good end - time ", real(t2-t1)/real(tr)
!!!!!!!!!!!!!!!!!!!!!!!!! BAD END !!!!!!!!!!!!!!!!!!!!!!!

  deallocate(a)
  deallocate(b)
  deallocate(c)

end program test
