aboutsummaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authoraditya <bluenerd@protonmail.com>2023-08-10 12:32:35 +0530
committeraditya <bluenerd@protonmail.com>2023-08-10 12:32:35 +0530
commita9ff78b3f48dc9f81943c41531c4959ce7e2ae9d (patch)
tree49ee8c3c9148038f04112802265d928ef1aba428 /ggml.h
parent2516af4cd61f509c995b4f78fdf123cba33f3509 (diff)
parent916a9acdd0a411426690400ebe2bb7ce840a6bba (diff)
resolve merge conflict
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h274
1 files changed, 235 insertions, 39 deletions
diff --git a/ggml.h b/ggml.h
index 8fe05d3..bdbd128 100644
--- a/ggml.h
+++ b/ggml.h
@@ -183,6 +183,15 @@
# define GGML_API
#endif
+// TODO: support for clang
+#ifdef __GNUC__
+# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
+#elif defined(_MSC_VER)
+# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
+#else
+# define GGML_DEPRECATED(func, hint) func
+#endif
+
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
@@ -199,6 +208,7 @@
#define GGML_MAX_CONTEXTS 64
#define GGML_MAX_SRC 6
#define GGML_MAX_NAME 48
+#define GGML_MAX_OP_PARAMS 32
#define GGML_DEFAULT_N_THREADS 4
@@ -207,6 +217,7 @@
#define GGML_UNUSED(x) (void)(x)
+#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
#define GGML_ASSERT(x) \
do { \
@@ -329,16 +340,6 @@ extern "C" {
GGML_OP_ARGMAX,
GGML_OP_REPEAT,
GGML_OP_REPEAT_BACK,
- GGML_OP_ABS,
- GGML_OP_SGN,
- GGML_OP_NEG,
- GGML_OP_STEP,
- GGML_OP_TANH,
- GGML_OP_ELU,
- GGML_OP_RELU,
- GGML_OP_GELU,
- GGML_OP_GELU_QUICK,
- GGML_OP_SILU,
GGML_OP_SILU_BACK,
GGML_OP_NORM, // normalize
GGML_OP_RMS_NORM,
@@ -368,6 +369,8 @@ extern "C" {
GGML_OP_CLAMP,
GGML_OP_CONV_1D,
GGML_OP_CONV_2D,
+ GGML_OP_POOL_1D,
+ GGML_OP_POOL_2D,
GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF,
@@ -375,9 +378,15 @@ extern "C" {
GGML_OP_WIN_PART,
GGML_OP_WIN_UNPART,
+ GGML_OP_UNARY,
+
GGML_OP_MAP_UNARY,
GGML_OP_MAP_BINARY,
+ GGML_OP_MAP_CUSTOM1_F32,
+ GGML_OP_MAP_CUSTOM2_F32,
+ GGML_OP_MAP_CUSTOM3_F32,
+
GGML_OP_MAP_CUSTOM1,
GGML_OP_MAP_CUSTOM2,
GGML_OP_MAP_CUSTOM3,
@@ -388,6 +397,24 @@ extern "C" {
GGML_OP_COUNT,
};
+ enum ggml_unary_op {
+ GGML_UNARY_OP_ABS,
+ GGML_UNARY_OP_SGN,
+ GGML_UNARY_OP_NEG,
+ GGML_UNARY_OP_STEP,
+ GGML_UNARY_OP_TANH,
+ GGML_UNARY_OP_ELU,
+ GGML_UNARY_OP_RELU,
+ GGML_UNARY_OP_GELU,
+ GGML_UNARY_OP_GELU_QUICK,
+ GGML_UNARY_OP_SILU,
+ };
+
+ enum ggml_object_type {
+ GGML_OBJECT_TENSOR,
+ GGML_OBJECT_GRAPH,
+ GGML_OBJECT_WORK_BUFFER
+ };
// ggml object
struct ggml_object {
@@ -396,7 +423,9 @@ extern "C" {
struct ggml_object * next;
- char padding[8];
+ enum ggml_object_type type;
+
+ char padding[4];
};
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
@@ -416,6 +445,9 @@ extern "C" {
// compute data
enum ggml_op op;
+ // op params - allocated as int32_t for alignment
+ int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
+
bool is_param;
struct ggml_tensor * grad;
@@ -432,7 +464,7 @@ extern "C" {
void * extra; // extra things e.g. for ggml-cuda.cu
- char padding[8];
+ char padding[4];
};
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -453,6 +485,11 @@ extern "C" {
void * abort_callback_data;
};
+ // next prime after GGML_MAX_NODES
+ // #define GGML_GRAPH_HASHTABLE_SIZE 4099
+ // next prime after GGML_MAX_NODES * 2 (nodes + leafs)
+ #define GGML_GRAPH_HASHTABLE_SIZE 8273
+
// computation graph
struct ggml_cgraph {
int n_nodes;
@@ -462,12 +499,16 @@ extern "C" {
struct ggml_tensor * grads[GGML_MAX_NODES];
struct ggml_tensor * leafs[GGML_MAX_NODES];
+ void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
+
// performance
int perf_runs;
int64_t perf_cycles;
int64_t perf_time_us;
};
+ static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
+
// scratch buffer
struct ggml_scratch {
size_t offs;
@@ -529,6 +570,7 @@ extern "C" {
GGML_API const char * ggml_type_name(enum ggml_type type);
GGML_API const char * ggml_op_name (enum ggml_op op);
+ GGML_API const char * ggml_op_symbol(enum ggml_op op);
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
@@ -541,6 +583,8 @@ extern "C" {
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
+ GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
+
// use this to compute the memory overhead of a tensor
GGML_API size_t ggml_tensor_overhead(void);
@@ -552,6 +596,7 @@ extern "C" {
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
+ GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
@@ -611,9 +656,11 @@ extern "C" {
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
- GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
- GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
- GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
+ GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
+
+ GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
+ GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
+ GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
//
// operations on tensors with backpropagation
@@ -623,6 +670,11 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
+ // in-place, returns view(a)
+ GGML_API struct ggml_tensor * ggml_dup_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_add(
struct ggml_context * ctx,
struct ggml_tensor * a,
@@ -847,14 +899,17 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_rms_norm(
struct ggml_context * ctx,
- struct ggml_tensor * a);
+ struct ggml_tensor * a,
+ float eps);
GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
struct ggml_context * ctx,
- struct ggml_tensor * a);
+ struct ggml_tensor * a,
+ float eps);
// a - x
// b - dy
+ // TODO: update with configurable eps
GGML_API struct ggml_tensor * ggml_rms_norm_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
@@ -946,11 +1001,22 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
+ // a -> b, in-place, return view(b)
+ GGML_API struct ggml_tensor * ggml_cpy_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
// make contiguous
GGML_API struct ggml_tensor * ggml_cont(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ // make contiguous, in-place
+ GGML_API struct ggml_tensor * ggml_cont_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
// return view(a), b specifies the new shape
// TODO: when we start computing gradient, make a copy instead of view
GGML_API struct ggml_tensor * ggml_reshape(
@@ -1119,6 +1185,28 @@ extern "C" {
int mode,
int n_ctx);
+ // custom RoPE
+ GGML_API struct ggml_tensor * ggml_rope_custom(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_past,
+ int n_dims,
+ int mode,
+ int n_ctx,
+ float freq_base,
+ float freq_scale);
+
+ // in-place, returns view(a)
+ GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_past,
+ int n_dims,
+ int mode,
+ int n_ctx,
+ float freq_base,
+ float freq_scale);
+
// rotary position embedding backward, i.e compute dx from dy
// a - dy
GGML_API struct ggml_tensor * ggml_rope_back(
@@ -1126,7 +1214,8 @@ extern "C" {
struct ggml_tensor * a,
int n_past,
int n_dims,
- int mode);
+ int mode,
+ int n_ctx);
// alibi position embedding
// in-place, returns view(a)
@@ -1166,13 +1255,38 @@ extern "C" {
// conv_1d with padding = half
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
- GGML_API struct ggml_tensor* ggml_conv_1d_ph(
+ GGML_API struct ggml_tensor * ggml_conv_1d_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s,
int d);
+ enum ggml_op_pool {
+ GGML_OP_POOL_MAX,
+ GGML_OP_POOL_AVG,
+ GGML_OP_POOL_COUNT,
+ };
+
+ GGML_API struct ggml_tensor * ggml_pool_1d(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ enum ggml_op_pool op,
+ int k0, // kernel size
+ int s0, // stride
+ int p0); // padding
+
+ GGML_API struct ggml_tensor * ggml_pool_2d(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ enum ggml_op_pool op,
+ int k0,
+ int k1,
+ int s0,
+ int s1,
+ int p0,
+ int p1);
+
GGML_API struct ggml_tensor * ggml_flash_attn(
struct ggml_context * ctx,
struct ggml_tensor * q,
@@ -1216,6 +1330,16 @@ extern "C" {
int h0,
int w);
+ GGML_API struct ggml_tensor * ggml_unary(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ enum ggml_unary_op op);
+
+ GGML_API struct ggml_tensor * ggml_unary_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ enum ggml_unary_op op);
+
// custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -1225,63 +1349,129 @@ extern "C" {
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
- GGML_API struct ggml_tensor * ggml_map_unary_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_unary_op_f32_t fun);
+ ggml_unary_op_f32_t fun),
+ "use ggml_map_custom1 instead");
- GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_unary_op_f32_t fun);
+ ggml_unary_op_f32_t fun),
+ "use ggml_map_custom1_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_binary_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_binary_op_f32_t fun);
+ ggml_binary_op_f32_t fun),
+ "use ggml_map_custom2 instead");
- GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_binary_op_f32_t fun);
+ ggml_binary_op_f32_t fun),
+ "use ggml_map_custom2_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_custom1_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_custom1_op_f32_t fun);
+ ggml_custom1_op_f32_t fun),
+ "use ggml_map_custom1 instead");
- GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_custom1_op_f32_t fun);
+ ggml_custom1_op_f32_t fun),
+ "use ggml_map_custom1_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_custom2_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_custom2_op_f32_t fun);
+ ggml_custom2_op_f32_t fun),
+ "use ggml_map_custom2 instead");
- GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_custom2_op_f32_t fun);
+ ggml_custom2_op_f32_t fun),
+ "use ggml_map_custom2_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_custom3_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
- ggml_custom3_op_f32_t fun);
+ ggml_custom3_op_f32_t fun),
+ "use ggml_map_custom3 instead");
- GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
- ggml_custom3_op_f32_t fun);
+ ggml_custom3_op_f32_t fun),
+ "use ggml_map_custom3_inplace instead");
+
+ // custom operators v2
+
+ typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
+ typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
+ typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
+
+ #define GGML_N_TASKS_MAX -1
+
+ GGML_API struct ggml_tensor * ggml_map_custom1(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ ggml_custom1_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ ggml_custom1_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom2(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ ggml_custom2_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ ggml_custom2_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom3(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ struct ggml_tensor * c,
+ ggml_custom3_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ struct ggml_tensor * c,
+ ggml_custom3_op_t fun,
+ int n_tasks,
+ void * userdata);
// loss function
@@ -1304,11 +1494,17 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * tensor);
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
+ // graph allocation in a context
+ GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
+ GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
+ GGML_API size_t ggml_graph_overhead(void);
+
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);