MCPcopy
hub / github.com/weaviate/weaviate / TestBatch

Function TestBatch

modules/text2vec-databricks/vectorizer/batch_test.go:29–103  ·  view source on GitHub ↗
(t *testing.T)

Source from the content-addressed store, hash-verified

27)
28
29func TestBatch(t *testing.T) {
30 client := &fakeBatchClient{}
31 cfg := &FakeClassConfig{vectorizePropertyName: false, classConfig: map[string]interface{}{"vectorizeClassName": false}}
32 logger, _ := test.NewNullLogger()
33 cases := []struct {
34 name string
35 objects []*models.Object
36 skip []bool
37 wantErrors map[int]error
38 deadline time.Duration
39 }{
40 {name: "skip all", objects: []*models.Object{{Class: "Car"}}, skip: []bool{true}},
41 {name: "skip first", objects: []*models.Object{{Class: "Car"}, {Class: "Car", Properties: map[string]interface{}{"test": "test"}}}, skip: []bool{true, false}},
42 {name: "one object errors", objects: []*models.Object{{Class: "Car", Properties: map[string]interface{}{"test": "test"}}, {Class: "Car", Properties: map[string]interface{}{"test": "error something"}}}, skip: []bool{false, false}, wantErrors: map[int]error{1: fmt.Errorf("something")}},
43 {name: "first object errors", objects: []*models.Object{{Class: "Car", Properties: map[string]interface{}{"test": "error something"}}, {Class: "Car", Properties: map[string]interface{}{"test": "test"}}}, skip: []bool{false, false}, wantErrors: map[int]error{0: fmt.Errorf("something")}},
44 {name: "vectorize all", objects: []*models.Object{{Class: "Car", Properties: map[string]interface{}{"test": "test"}}, {Class: "Car", Properties: map[string]interface{}{"test": "something"}}}, skip: []bool{false, false}},
45 {name: "multiple vectorizer batches", objects: []*models.Object{
46 {Class: "Car", Properties: map[string]interface{}{"test": "tokens 25"}}, // set limit so next 3 objects are one batch
47 {Class: "Car", Properties: map[string]interface{}{"test": "first object first batch"}},
48 {Class: "Car", Properties: map[string]interface{}{"test": "second object first batch"}},
49 {Class: "Car", Properties: map[string]interface{}{"test": "third object first batch"}},
50 {Class: "Car", Properties: map[string]interface{}{"test": "first object second batch"}}, // rate is 100 again
51 {Class: "Car", Properties: map[string]interface{}{"test": "second object second batch"}},
52 {Class: "Car", Properties: map[string]interface{}{"test": "third object second batch"}},
53 {Class: "Car", Properties: map[string]interface{}{"test": "fourth object second batch"}},
54 }, skip: []bool{false, false, false, false, false, false, false, false}},
55 {name: "multiple vectorizer batches with skips and errors", objects: []*models.Object{
56 {Class: "Car", Properties: map[string]interface{}{"test": "tokens 25"}}, // set limit so next 3 objects are one batch
57 {Class: "Car", Properties: map[string]interface{}{"test": "first object first batch"}},
58 {Class: "Car", Properties: map[string]interface{}{"test": "second object first batch"}},
59 {Class: "Car", Properties: map[string]interface{}{"test": "error something"}},
60 {Class: "Car", Properties: map[string]interface{}{"test": "first object second batch"}}, // rate is 100 again
61 {Class: "Car", Properties: map[string]interface{}{"test": "second object second batch"}},
62 {Class: "Car", Properties: map[string]interface{}{"test": "third object second batch"}},
63 {Class: "Car", Properties: map[string]interface{}{"test": "fourth object second batch"}},
64 }, skip: []bool{false, true, false, false, false, true, false, false}, wantErrors: map[int]error{3: fmt.Errorf("something")}},
65 {name: "skip last item", objects: []*models.Object{
66 {Class: "Car", Properties: map[string]interface{}{"test": "fir test object"}}, // set limit
67 {Class: "Car", Properties: map[string]interface{}{"test": "first object first batch"}},
68 {Class: "Car", Properties: map[string]interface{}{"test": "second object first batch"}},
69 }, skip: []bool{false, false, true}},
70 }
71 for _, tt := range cases {
72 t.Run(tt.name, func(t *testing.T) {
73 v := text2vecbase.New(client,
74 batch.NewBatchVectorizer(client, 50*time.Second, batch.Settings{MaxObjectsPerBatch: 100, MaxTokensPerBatch: func(cfg moduletools.ClassConfig) int { return 500000 }, MaxTimePerBatch: 10},
75 logger, "test"),
76 batch.ReturnBatchTokenizer(1, "", false),
77 )
78 deadline := time.Now().Add(10 * time.Second)
79 if tt.deadline != 0 {
80 deadline = time.Now().Add(tt.deadline)
81 }
82
83 ctx, cancl := context.WithDeadline(context.Background(), deadline)
84 vecs, errs := v.ObjectBatch(
85 ctx, tt.objects, tt.skip, cfg,
86 )

Callers

nothing calls this directly

Calls 10

NewFunction · 0.92
NewBatchVectorizerFunction · 0.92
ReturnBatchTokenizerFunction · 0.92
ErrorfMethod · 0.80
EqualMethod · 0.80
AddMethod · 0.65
NowMethod · 0.65
ObjectBatchMethod · 0.65
LenMethod · 0.65
RunMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…