diff options
Diffstat (limited to 'ggml-opencl-dequant.cl')
-rw-r--r-- | ggml-opencl-dequant.cl | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/ggml-opencl-dequant.cl b/ggml-opencl-dequant.cl new file mode 100644 index 0000000..191b2e5 --- /dev/null +++ b/ggml-opencl-dequant.cl @@ -0,0 +1,84 @@ +#define MULTILINE_QUOTE(...) #__VA_ARGS__ +const char * clblast_dequant = MULTILINE_QUOTE( + +struct block_q4_0 +{ + float d; + uchar qs[16]; +}; + +__kernel void dequantize_row_q4_0(__global struct block_q4_0* blocks, __global float* result) { + const uint i = get_global_id(0) / 32; + const uint l = get_local_id(0); + + const float d = blocks[i].d; + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*32 + l*2; + result[index + 0] = ((vi & 0xf) - 8)*d; + result[index + 1] = ((vi >> 4) - 8)*d; +} + +struct block_q4_1 +{ + float d; + float m; + uchar qs[16]; +}; + +__kernel void dequantize_row_q4_1(__global struct block_q4_1* blocks, __global float* result) { + const uint i = get_global_id(0) / 32; + const uint l = get_local_id(0); + + const float d = blocks[i].d; + const float m = blocks[i].m; + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*32 + l*2; + result[index + 0] = (vi & 0xf) * d + m; + result[index + 1] = (vi >> 4) * d + m; +} + +struct block_q4_2 +{ + ushort d; + uchar qs[8]; +}; + +__kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global float* result) { + const uint i = get_global_id(0) / 16; + const uint l = get_local_id(0); + + const float d = vload_half(0, (__global half*) &blocks[i].d);; + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*16 + l*2; + result[index + 0] = ((vi & 0xf) - 8)*d; + result[index + 1] = ((vi >> 4) - 8)*d; +} + +struct block_q4_3 +{ + ushort d; + ushort m; + uchar qs[8]; +}; + +__kernel void dequantize_row_q4_3(__global struct block_q4_3* blocks, __global float* result) { + const uint i = get_global_id(0) / 16; + const uint l = get_local_id(0); + + const float d = vload_half(0, (__global half*) &(blocks[i].d)); + const float m = vload_half(0, (__global half*) &(blocks[i].m)); + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*16 + l*2; + result[index + 0] = (vi & 0xf) * d + m; + result[index + 1] = (vi >> 4) * d + m; +} + +); |