MPI Java Binding

Open MPI (since 1.7 version) provides Java wrappers to a native MPI through the Java Native Interface (JNI).

EDIT dec 2022
use module mpi/openmpi/gcc/openmpi@4.1.1/gcc-11.2.0

Load Java MPI binding :

$ module load mpi/openmpi/icc/1.7.5 
$ module load lang/java

For convenience, the mpijavac wrapper compiler has been provided for compiling Java-based MPI applications. It ensures that all required MPI libraries and class paths are defined. You can see the actual command line using the –showme option.

Once your application has been compiled, you can run it with the standard mpirun command line:

$ mpirun [options] java [your-java-options] my-app-class

For convenience, mpirun has been updated to detect the Java command and ensure that the required MPI libraries and class paths are defined to support execution. You therefore do NOT need to specify the Java library path to the MPI installation, nor the MPI classpath. Any class path definitions required for your application should be specified either on the command line or via the CLASSPATH environmental variable. Note that the local directory will be added to the class path if nothing is specified. As always, the java executable, all required libraries, and your application classes must be available on all nodes.

BUG related to PSM Infiniband. There are two solutions to run MPI:

  • solution1: mpirun
    --mca mtl ^psm ...

    This solution may use a lot of memory.

  • solution2: mpirun
    --mca pml ob1 ...

import mpi.*;
public class HelloWorld {
 
public static void main(String args[]) throws Exception {
 
 MPI.Init(args);
 
 int me = MPI.COMM_WORLD.getRank();
 
 int size = MPI.COMM_WORLD.getSize();
 
 System.out.println("Hi from <"+me+">");
 
 MPI.Finalize();
}
 
}

Build it

mpijavac HelloWorld.java

Run it

mpirun -np 4 java HelloWorld

Run it in the cluster under SGE

#!/bin/bash 
 
#$ -V
#$ -N test_sge
#$ -pe mpi 16
#$ -l h_vmem=4G 
 
module load lang/java
module load mpi/openmpi/gcc/1.7.5
 
## lancement de l'application
mpirun -np $NSLOTS java -Xmx3G Hello 

Check the Matrix Multiply example :

MatrixPar.java
import mpi.*;
import static mpi.MPI.slice;
 
/**
 * @author kmazouzi
 *
 * Matrix Multiply using Master/Worker paradigm
 */
public class MatrixPar {
 
	public static void main(String[] args) throws MPIException {
		int N = 400;
		int MASTER = 0;
		int FROM_MASTER = 1;
		int FROM_WORKER = 2;
		int numtasks, /* number of tasks in partition */
		taskid, /* a task identifier */
		numworkers, /* number of worker tasks */
		source, /* task id of message source */
		dest, /* task id of message destination */
 
		averow, extra, /* used to determine rows sent to each worker */
		i, j, k, /* misc */
		count;
		int[] a; /* matrix A to be multiplied */
		int[] b; /* matrix B to be multiplied */
		int[] c; /* result matrix C */
		int[] offset = new int[1];
		int[] rows = new int[1]; /* rows of matrix A sent to each worker */
 
		MPI.Init(args);
 
		if (args.length == 1) {
			N = Integer.parseInt(args[0]);
		}
 
		taskid = MPI.COMM_WORLD.getRank();
		numtasks = MPI.COMM_WORLD.getSize();
		numworkers = numtasks - 1;
 
		int mtype;
		/* *************** Master Task ****************** */
		if (taskid == MASTER) {
 
			/**
			 * Master initialize a,b,c WARNING : workers will initialize only
			 * the needed parts
			 */
			a = new int[N * N]; /* matrix A to be multiplied */
			b = new int[N * N]; /* matrix B to be multiplied */
			c = new int[N * N]; /* result matrix C */
 
			System.out.println("Perfoming Parellel Matrix Multiply, size = "
					+ N + "x" + N + " . Using  " + numworkers + " worker");
			// Init matrix A,B
			for (i = 0; i < N; i++) {
				for (j = 0; j < N; j++) {
					a[(i * N) + j] = 1;
					b[(i * N) + j] = 2;
				}
			}
 
			long start = System.currentTimeMillis();
 
			// Send matrix data to worker tasks
			/**
			 * We split A by rows and we send to each worker a part The whole
			 * matrix B is sent to all workers
			 * 
			 */
			averow = N / numworkers;
			extra = N % numworkers;
			offset[0] = 0;
			mtype = FROM_MASTER;
 
			for (dest = 1; dest <= numworkers; dest++) {
				if (dest <= extra) {
					rows[0] = averow + 1;
				} else {
					rows[0] = averow;
				}
				System.out.println("Send offset : " + offset[0]
						+ ", and number of rows : " + rows[0]
						+ ", to the worker number : " + dest);
				MPI.COMM_WORLD.send(offset, 1, MPI.INT, dest, mtype);
				MPI.COMM_WORLD.send(rows, 1, MPI.INT, dest, mtype);
				count = rows[0] * N;
				MPI.COMM_WORLD.send(slice(a, (offset[0] * N)), count, MPI.INT,
						dest, mtype);
				count = N * N;
				MPI.COMM_WORLD.send(b, count, MPI.INT, dest, mtype);
				offset[0] = offset[0] + rows[0];
			}
 
			// Wait for results from all worker tasks
 
			mtype = FROM_WORKER;
			for (i = 1; i <= numworkers; i++) {
				source = i;
 
				MPI.COMM_WORLD.recv(offset, 1, MPI.INT, source, mtype);
				MPI.COMM_WORLD.recv(rows, 1, MPI.INT, source, mtype);
				count = rows[0] * N;
				MPI.COMM_WORLD.recv(slice(c, offset[0] * N), count, MPI.INT,
						source, mtype);
			}
			long stop = System.currentTimeMillis();
 
			System.out.println("Result of matrix c[0] = " + c[0] + ", c["
					+ (N * N - 2) + "] = " + c[N * N - 2]);
			System.out.println("Time Usage (ms) = " + (stop - start));
 
		}
 
		/*
		 * *************************** worker task The worker receive Matrix B,
		 * and a part of A perform Matrix Multiply and send back result to
		 * Master ***********************************
		 */
		if (taskid != MASTER) {
			mtype = FROM_MASTER;
			source = MASTER;
			MPI.COMM_WORLD.recv(offset, 1, MPI.INT, source, mtype);
			MPI.COMM_WORLD.recv(rows, 1, MPI.INT, source, mtype);
			count = rows[0] * N;
			a = new int[count]; /* initialize A and B */
			c = new int[count];
			MPI.COMM_WORLD.recv(a, count, MPI.INT, source, mtype);
			count = N * N;
			b = new int[count]; /* initialize B */
			MPI.COMM_WORLD.recv(b, count, MPI.INT, source, mtype);
 
			for (i = 0; i < rows[0]; i++) {
				for (k = 0; k < N; k++) {
					c[(i * N) + k] = 0;
					for (j = 0; j < N; j++) {
						c[(i * N) + k] = c[(i * N) + k] + a[(i * N) + j]
								* b[(j * N) + k];
					}
				}
			}
 
			// send result back to Master
			mtype = FROM_WORKER;
 
			MPI.COMM_WORLD.send(offset, 1, MPI.INT, MASTER, mtype);
			MPI.COMM_WORLD.send(rows, 1, MPI.INT, MASTER, mtype);
			MPI.COMM_WORLD.send(c, rows[0] * N, MPI.INT, MASTER, mtype);
		}
 
		MPI.Finalize();
	}
}

Documentation