#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define N 8


void Print(char *msg, MPI_Comm comm)
{
  int myid;

  MPI_Comm_rank(comm ,&myid);

  if(myid == 0)
    printf("%s", msg);
}


void PrintLocalMat(int m, int n, int mat[][n])
{
  int i,j;
  for (j=0; j < n; j++)
    {
      for (i=0; i < m; i++)
	printf("%d ", mat[i][j]);
  
      printf("\n");
    }
}

void PrintMat(int m, int n, int mat[][n], MPI_Comm comm)
{
  int numprocs, myid; 
  MPI_Comm_size(comm, &numprocs);
  MPI_Comm_rank(comm, &myid);

  int tmp[m][n];

  if ( myid == 0)
    {
      PrintLocalMat(m, n, mat);

  int k;
  MPI_Status status;
  for(k=1; k < numprocs; k++)
    {
      MPI_Recv(tmp, m*n, MPI_INT, k, 0, comm, &status);
      PrintLocalMat(m, n, tmp); 

    }
    }
  else
    MPI_Send(mat, m*n, MPI_INT, 0, 0, comm);
}

void TransposeBlock(int n, int mat[n][n])
{

  int i,j;
  int tmp;

    for (i=0; i < n; i++)
      for (j=i+1; j < n; j++)
	{
	  tmp = mat[i][j];
	  mat[i][j] = mat[j][i];
	  mat[j][i] = tmp;
	}
}

void TransposeBlocks(int m, int n, int mat[][n])
{

  int i;
 
  for (i=0; i < m; i += n)
    TransposeBlock(n, &mat[i]);
  
}

int main(int argc, char *argv[])
{
    int numprocs, myid, n;

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&myid);

    n = N / numprocs;

    int A[N][n];
    int B[N][n];

    int i,j, J;
    
    for (i=0; i < N; i++)
      for (j=0, J = myid*n; j < n; j++, J++)
  	A[i][j] = 1000*(J+1) + i + 1;
  
    Print("A:\n", MPI_COMM_WORLD); 
    PrintMat(N, n, A, MPI_COMM_WORLD);
    

    MPI_Alltoall(A, n*n, MPI_INT, B, n*n, MPI_INT,  MPI_COMM_WORLD);
    TransposeBlocks(N, n, B);

    Print("B = At:\n", MPI_COMM_WORLD); 
    PrintMat(N, n, B, MPI_COMM_WORLD);

    MPI_Finalize();
    return 0;
}
