Skip to content

Commit

Permalink
[feat] initial implementation of mua/mus/g/n all float format
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Dec 29, 2023
1 parent 9831c7e commit 1e3b031
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 169 deletions.
1 change: 1 addition & 0 deletions src/mcx_const.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#define MCX_DEBUG_PROGRESS 4 /**< debug flags: 4 - print progress bar */
#define MCX_DEBUG_MOVE_ONLY 8 /**< debug flags: 8 - only save photon trajectory data, disable volume and detphoton output */

#define MEDIA_ASGN_F2H 96 /**< media format: input: float4 {mua,mus,g,n} -> {[half: mua],[half: mus],[half: g],[half: n]} */
#define MEDIA_2LABEL_SPLIT 97 /**< media format: 64bit:{[byte: lower label][byte: upper label][byte*3: reference point][byte*3: normal vector]} */
#define MEDIA_2LABEL_MIX 98 /**< media format: {[int: label1][int: label2][float32: label1 %]} -> 32bit:{[short 0-32767 scaled label1 %],[byte: label2],[byte: label1]} */
#define MEDIA_LABEL_HALF 99 /**< media format: {[float32: mua/mus/g/n][float32: 1/2/3/4][float32: label]} -> 32bit:{[half: mua/mus/g/n][int16: [B15-B16: 0/1/2/3][B1-B14: tissue type]} */
Expand Down
21 changes: 18 additions & 3 deletions src/mcx_core.cu
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ __device__ void updateproperty(Medium* prop, unsigned int& mediaid, RandType t[R
} else if (gcfg->mediaformat == MEDIA_MUA_FLOAT) { //< [f0]: single-prec mua every voxel; mus/g/n uses 2nd row in gcfg.prop
prop->mua = fabsf(*((float*)&mediaid));
prop->n = gproperty[!(mediaid & MED_MASK) == 0].w;
} else if (gcfg->mediaformat == MEDIA_AS_F2H || gcfg->mediaformat == MEDIA_AS_HALF) { //< [h1][h0]: h1/h0: single-prec mua/mus for every voxel; g/n uses those in cfg.prop(2,:)
} else if (gcfg->mediaformat == MEDIA_AS_F2H || gcfg->mediaformat == MEDIA_AS_HALF) { //< [h1][h0]: h1/h0: half-prec mua/mus for every voxel; g/n uses those in cfg.prop(2,:)
union {
unsigned int i;
#if ! defined(__CUDACC_VER_MAJOR__) || __CUDACC_VER_MAJOR__ >= 9
Expand All @@ -593,6 +593,21 @@ __device__ void updateproperty(Medium* prop, unsigned int& mediaid, RandType t[R
prop->mua = fabsf(__half2float(val.h[0]));
prop->mus = fabsf(__half2float(val.h[1]));
prop->n = gproperty[!(mediaid & MED_MASK) == 0].w;
} else if (gcfg->mediaformat == MEDIA_ASGN_F2H) { //< [h3][h2][h1][h0]: h3/h2/h1/h0: half-prec n/g/mus/mua for every voxel
union {
unsigned int i[2];
#if ! defined(__CUDACC_VER_MAJOR__) || __CUDACC_VER_MAJOR__ >= 9
__half_raw h[4];
#else
half h[4];
#endif
} val;
val.i[0] = mediaid & MED_MASK;
val.i[1] = media[idx1d + gcfg->dimlen.z];
prop->mua = fabsf(__half2float(val.h[0]));
prop->mus = fabsf(__half2float(val.h[1]));
prop->g = fabsf(__half2float(val.h[2]));
prop->n = fabsf(__half2float(val.h[3]));
} else if (gcfg->mediaformat == MEDIA_2LABEL_MIX) { //< [s1][c1][c0]: s1: (volume fraction of tissue 1)*(2^16-1), c1: tissue 1 label, c0: tissue 0 label
union {
unsigned int i;
Expand Down Expand Up @@ -2917,7 +2932,7 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) {
/**
* Allocate all GPU buffers to store input or output data
*/
if (cfg->mediabyte != MEDIA_2LABEL_SPLIT) {
if (cfg->mediabyte != MEDIA_2LABEL_SPLIT && cfg->mediabyte != MEDIA_ASGN_F2H) {
CUDA_ASSERT(cudaMalloc((void**) &gmedia, sizeof(uint) * (cfg->dim.x * cfg->dim.y * cfg->dim.z)));
} else {
CUDA_ASSERT(cudaMalloc((void**) &gmedia, sizeof(uint) * (2 * cfg->dim.x * cfg->dim.y * cfg->dim.z)));
Expand Down Expand Up @@ -3086,7 +3101,7 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) {

mcx_flush(cfg);

if (cfg->mediabyte != MEDIA_2LABEL_SPLIT) {
if (cfg->mediabyte != MEDIA_2LABEL_SPLIT && cfg->mediabyte != MEDIA_ASGN_F2H) {
CUDA_ASSERT(cudaMemcpy(gmedia, media, sizeof(uint)*cfg->dim.x * cfg->dim.y * cfg->dim.z, cudaMemcpyHostToDevice));
} else {
CUDA_ASSERT(cudaMemcpy(gmedia, media, sizeof(uint) * 2 * cfg->dim.x * cfg->dim.y * cfg->dim.z, cudaMemcpyHostToDevice));
Expand Down
153 changes: 93 additions & 60 deletions src/mcx_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ const char* srctypeid[] = {"pencil", "isotropic", "cone", "gaussian", "planar",
* User can specify the source type using a string
*/

const unsigned int mediaformatid[] = {1, 2, 4, 97, 98, 99, 100, 101, 102, 103, 104, 0};
const char* mediaformat[] = {"byte", "short", "integer", "svmc", "mixlabel", "labelplus",
const unsigned int mediaformatid[] = {1, 2, 4, 96, 97, 98, 99, 100, 101, 102, 103, 104, 0};
const char* mediaformat[] = {"byte", "short", "integer", "asgn_float", "svmc", "mixlabel", "labelplus",
"muamus_float", "mua_float", "muamus_half", "asgn_byte", "muamus_short", ""
};

Expand Down Expand Up @@ -1546,7 +1546,7 @@ void mcx_preprocess(Config* cfg) {

if (cfg->isrowmajor) {
/*from here on, the array is always col-major*/
if (cfg->mediabyte == MEDIA_2LABEL_SPLIT) {
if (cfg->mediabyte == MEDIA_2LABEL_SPLIT || cfg->mediabyte == MEDIA_ASGN_F2H) {
mcx_convertrow2col64((size_t**) & (cfg->vol), &(cfg->dim));
} else {
mcx_convertrow2col(&(cfg->vol), &(cfg->dim));
Expand Down Expand Up @@ -3200,18 +3200,20 @@ void mcx_loadvolume(char* filename, Config* cfg, int isbuf) {
}

datalen = cfg->dim.x * cfg->dim.y * cfg->dim.z;
cfg->vol = (unsigned int*)malloc(sizeof(unsigned int) * datalen * (1 + (cfg->mediabyte == MEDIA_2LABEL_SPLIT)));
cfg->vol = (unsigned int*)malloc(sizeof(unsigned int) * datalen * (1 + (cfg->mediabyte == MEDIA_2LABEL_SPLIT || cfg->mediabyte == MEDIA_ASGN_F2H)));

if (!isbuf) {
if (cfg->mediabyte == MEDIA_AS_F2H) {
inputvol = (unsigned char*)malloc(sizeof(unsigned char) * (datalen << 3));
} else if (cfg->mediabyte == MEDIA_ASGN_F2H) {
inputvol = (unsigned char*)malloc(sizeof(unsigned char) * (datalen << 4));
} else if (cfg->mediabyte >= 4) {
inputvol = (unsigned char*)(cfg->vol);
} else {
inputvol = (unsigned char*)malloc(sizeof(unsigned char) * cfg->mediabyte * datalen);
}

res = fread(inputvol, sizeof(unsigned char) * ((cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_2LABEL_SPLIT) ? 8 : MIN(cfg->mediabyte, 4)), datalen, fp);
res = fread(inputvol, sizeof(unsigned char) * ((cfg->mediabyte == MEDIA_ASGN_F2H) ? 16 : ((cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_2LABEL_SPLIT) ? 8 : MIN(cfg->mediabyte, 4))), datalen, fp);
fclose(fp);

if (res != datalen) {
Expand Down Expand Up @@ -3253,70 +3255,28 @@ void mcx_loadvolume(char* filename, Config* cfg, int isbuf) {

cfg->vol[i] = f2i.i;
}
} else if (cfg->mediabyte == MEDIA_AS_F2H) {
} else if (cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_ASGN_F2H) {
float* val = (float*)inputvol;
union {
float f[2];
unsigned int i[2];
unsigned short h[2];
} f2h;
unsigned short tmp, m;
float f2h[2];
int offset = (cfg->mediabyte == MEDIA_ASGN_F2H);

for (i = 0; i < datalen; i++) {
f2h.f[0] = val[i << 1] * cfg->unitinmm;
f2h.f[1] = val[(i << 1) + 1] * cfg->unitinmm;
f2h[0] = val[i << (1 + offset)] * cfg->unitinmm; // mua
f2h[1] = val[(i << (1 + offset)) + 1] * cfg->unitinmm; // mus

if (f2h.f[0] != f2h.f[0] || f2h.f[1] != f2h.f[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/
if (f2h[0] != f2h[0] || f2h[1] != f2h[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/
cfg->vol[i] = 0;
continue;
}

/**
float to half conversion
https://stackoverflow.com/questions/3026441/float32-to-float16/5587983#5587983
https://gamedev.stackexchange.com/a/17410 (for denorms)
*/
m = ((f2h.i[0] >> 13) & 0x03ff);
tmp = (f2h.i[0] >> 23) & 0xff; /*exponent*/
tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);

if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/
unsigned short sign = (f2h.i[0] >> 16) & 0x8000;
tmp = ((f2h.i[0] >> 23) & 0xff);
m = (f2h.i[0] >> 12) & 0x07ff;
m |= 0x0800u;
f2h.h[0] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1));
if (cfg->mediabyte == MEDIA_ASGN_F2H) {
cfg->vol[i] = mcx_float2half2(f2h);
f2h[0] = val[(i << 2) + 2]; // g
f2h[1] = val[(i << 2) + 3]; // n
cfg->vol[i + datalen] = mcx_float2half2(f2h);
} else {
f2h.h[0] = (f2h.i[0] >> 31) << 5;
f2h.h[0] = (f2h.h[0] | tmp) << 10;
f2h.h[0] |= (f2h.i[0] >> 13) & 0x3ff;
cfg->vol[i] = mcx_float2half2(f2h);
}

m = ((f2h.i[1] >> 13) & 0x03ff);
tmp = (f2h.i[1] >> 23) & 0xff; /*exponent*/
tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);

if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/
unsigned short sign = (f2h.i[1] >> 16) & 0x8000;
tmp = ((f2h.i[1] >> 23) & 0xff);
m = (f2h.i[1] >> 12) & 0x07ff;
m |= 0x0800u;
f2h.h[1] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1));
} else {
f2h.h[1] = (f2h.i[1] >> 31) << 5;
f2h.h[1] = (f2h.h[1] | tmp) << 10;
f2h.h[1] |= (f2h.i[1] >> 13) & 0x3ff;
}

if (f2h.i[0] == 0) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/
f2h.i[0] = 0x00010000;
}

if (f2h.i[0] == SIGN_BIT) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/
f2h.i[0] = 0;
}

cfg->vol[i] = f2h.i[0];
}
} else if (cfg->mediabyte == MEDIA_2LABEL_SPLIT) {
memcpy(cfg->vol, inputvol, (datalen << 3));
Expand All @@ -3331,7 +3291,7 @@ void mcx_loadvolume(char* filename, Config* cfg, int isbuf) {
}
}

if (!isbuf && (cfg->mediabyte < 4 || cfg->mediabyte == MEDIA_AS_F2H)) {
if (!isbuf && (cfg->mediabyte < 4 || cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_ASGN_F2H)) {
free(inputvol);
}
}
Expand Down Expand Up @@ -4985,6 +4945,77 @@ int mcx_isbinstr(const char* str) {
return 1;
}

/**
* @brief Function to parse one subfield of the input structure
*
* This function reads in all necessary information from the cfg input structure.
* it can handle single scalar inputs, short vectors (3-4 elem), strings and arrays.
*
* @param[in] root: the cfg input data structure
* @param[in] item: the current element of the cfg input data structure
* @param[in] idx: the index of the current element (starting from 0)
* @param[out] cfg: the simulation configuration structure to store all input read from the parameters
*/

int mcx_float2half2(float input[2]) {
union {
float f[2];
unsigned int i[2];
unsigned short h[2];
} f2h;
unsigned short tmp, m;

f2h.f[0] = input[0];
f2h.f[1] = input[1];

/**
float to half conversion
https://stackoverflow.com/questions/3026441/float32-to-float16/5587983#5587983
https://gamedev.stackexchange.com/a/17410 (for denorms)
*/
m = ((f2h.i[0] >> 13) & 0x03ff);
tmp = (f2h.i[0] >> 23) & 0xff; /*exponent*/
tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);

if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/
unsigned short sign = (f2h.i[0] >> 16) & 0x8000;
tmp = ((f2h.i[0] >> 23) & 0xff);
m = (f2h.i[0] >> 12) & 0x07ff;
m |= 0x0800u;
f2h.h[0] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1));
} else {
f2h.h[0] = (f2h.i[0] >> 31) << 5;
f2h.h[0] = (f2h.h[0] | tmp) << 10;
f2h.h[0] |= (f2h.i[0] >> 13) & 0x3ff;
}

m = ((f2h.i[1] >> 13) & 0x03ff);
tmp = (f2h.i[1] >> 23) & 0xff; /*exponent*/
tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);

if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/
unsigned short sign = (f2h.i[1] >> 16) & 0x8000;
tmp = ((f2h.i[1] >> 23) & 0xff);
m = (f2h.i[1] >> 12) & 0x07ff;
m |= 0x0800u;
f2h.h[1] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1));
} else {
f2h.h[1] = (f2h.i[1] >> 31) << 5;
f2h.h[1] = (f2h.h[1] | tmp) << 10;
f2h.h[1] |= (f2h.i[1] >> 13) & 0x3ff;
}

if (f2h.i[0] == 0) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16, only happens for _AS, not ASGN as n will never be 0*/
f2h.i[0] = 0x00010000;
}

if (f2h.i[0] == SIGN_BIT) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16, , only happens for _AS, not ASGN as n will never be 0*/
f2h.i[0] = 0;
}

return f2h.i[0];
}

#ifndef MCX_CONTAINER

/**
Expand Down Expand Up @@ -5136,6 +5167,8 @@ where possible parameters include (the first value in [*|*] is the default)\n\
1 or byte: 0-128 tissue labels\n\
2 or short: 0-65535 (max to 4000) tissue labels\n\
4 or integer: integer tissue labels \n\
96 or asgn_float: mua/mus/g/n 4xfloat format\n\
{[f:mua][f:mus][f:g][f:n]}\n\
97 or svmc: split-voxel MC 8-byte format\n\
{[n.z][n.y][n.x][p.z][p.y][p.x][upper][lower]}\n\
98 or mixlabel: label1+label2+label1_percentage\n\
Expand Down
1 change: 1 addition & 0 deletions src/mcx_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ void mcx_loadseedjdat(char* filename, Config* cfg);
void mcx_prep_polarized(Config* cfg);
void mcx_replayinit(Config* cfg, float* detps, int dimdetps[2], int seedbyte);
void mcx_validatecfg(Config* cfg, float* detps, int dimdetps[2], int seedbyte);
int mcx_float2half2(float input[2]);

#ifdef MCX_CONTAINER
#ifdef __cplusplus
Expand Down
69 changes: 15 additions & 54 deletions src/mcxlab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
return;
}


/**
* @brief Function to parse one subfield of the input structure
*
Expand Down Expand Up @@ -595,6 +596,8 @@ void mcx_set_field(const mxArray* root, const mxArray* item, int idx, Config* cf
} else if (mxGetNumberOfDimensions(item) == 4) { // if dimension is 4D, 1st dim is the property records: mua/mus/g/n
if ((mxIsUint8(item) || mxIsInt8(item)) && arraydim[0] == 4) { // if 4D byte array has a 1st dim of 4
cfg->mediabyte = MEDIA_ASGN_BYTE;
} else if (mxIsSingle(item) && arraydim[0] == 4) { // if 4D float array has a 1st dim of 4
cfg->mediabyte = MEDIA_ASGN_F2H;
} else if ((mxIsUint8(item) || mxIsInt8(item)) && arraydim[0] == 8) {
cfg->mediabyte = MEDIA_2LABEL_SPLIT;
} else if (mxIsSingle(item) && arraydim[0] == 3) {
Expand Down Expand Up @@ -675,70 +678,28 @@ void mcx_set_field(const mxArray* root, const mxArray* item, int idx, Config* cf

cfg->vol[i] = f2i.i;
}
} else if (cfg->mediabyte == MEDIA_AS_F2H) {
} else if (cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_ASGN_F2H) {
float* val = (float*)mxGetPr(item);
union {
float f[2];
unsigned int i[2];
unsigned short h[2];
} f2h;
unsigned short tmp, m;
float f2h[2];
int offset = (cfg->mediabyte == MEDIA_ASGN_F2H);

for (i = 0; i < dimxyz; i++) {
f2h.f[0] = val[i << 1] * cfg->unitinmm; // mua
f2h.f[1] = val[(i << 1) + 1] * cfg->unitinmm; // mus
f2h[0] = val[i << (1 + offset)] * cfg->unitinmm; // mua
f2h[1] = val[(i << (1 + offset)) + 1] * cfg->unitinmm; // mus

if (f2h.f[0] != f2h.f[0] || f2h.f[1] != f2h.f[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/
if (f2h[0] != f2h[0] || f2h[1] != f2h[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/
cfg->vol[i] = 0;
continue;
}

/**
float to half conversion
https://stackoverflow.com/questions/3026441/float32-to-float16/5587983#5587983
https://gamedev.stackexchange.com/a/17410 (for denorms)
*/
m = ((f2h.i[0] >> 13) & 0x03ff);
tmp = (f2h.i[0] >> 23) & 0xff; /*exponent*/
tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);

if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/
unsigned short sign = (f2h.i[0] >> 16) & 0x8000;
tmp = ((f2h.i[0] >> 23) & 0xff);
m = (f2h.i[0] >> 12) & 0x07ff;
m |= 0x0800u;
f2h.h[0] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1));
if (cfg->mediabyte == MEDIA_ASGN_F2H) {
cfg->vol[i] = mcx_float2half2(f2h);
f2h[0] = val[(i << 2) + 2]; // g
f2h[1] = val[(i << 2) + 3]; // n
cfg->vol[i + dimxyz] = mcx_float2half2(f2h);
} else {
f2h.h[0] = (f2h.i[0] >> 31) << 5;
f2h.h[0] = (f2h.h[0] | tmp) << 10;
f2h.h[0] |= (f2h.i[0] >> 13) & 0x3ff;
cfg->vol[i] = mcx_float2half2(f2h);
}

m = ((f2h.i[1] >> 13) & 0x03ff);
tmp = (f2h.i[1] >> 23) & 0xff; /*exponent*/
tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);

if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/
unsigned short sign = (f2h.i[1] >> 16) & 0x8000;
tmp = ((f2h.i[1] >> 23) & 0xff);
m = (f2h.i[1] >> 12) & 0x07ff;
m |= 0x0800u;
f2h.h[1] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1));
} else {
f2h.h[1] = (f2h.i[1] >> 31) << 5;
f2h.h[1] = (f2h.h[1] | tmp) << 10;
f2h.h[1] |= (f2h.i[1] >> 13) & 0x3ff;
}

if (f2h.i[0] == 0) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/
f2h.i[0] = 0x00010000;
}

if (f2h.i[0] == SIGN_BIT) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/
f2h.i[0] = 0;
}

cfg->vol[i] = f2h.i[0];
}
} else if (cfg->mediabyte == MEDIA_LABEL_HALF) {
float* val = (float*)mxGetPr(item);
Expand Down
Loading

0 comments on commit 1e3b031

Please sign in to comment.