validateMutation ensures that the value in the edge is not too big. The challange here is that the keys in badger have a limitation on their size (< 2<<16). We need to ensure that no key, either primary or secondary index key is bigger than that. See here for more details: https://github.com/dgraph-
(ctx context.Context, edges []*pb.DirectedEdge)
| 717 | // We need to ensure that no key, either primary or secondary index key is bigger than that. |
| 718 | // See here for more details: https://github.com/dgraph-io/projects/issues/73 |
| 719 | func validateMutation(ctx context.Context, edges []*pb.DirectedEdge) error { |
| 720 | errValueTooBigForIndex := errors.New("value in the mutation is too large for the index") |
| 721 | |
| 722 | // key = meta data + predicate + actual key, this all needs to fit into 64 KB |
| 723 | // we are keeping 536 bytes aside for meta information we put into the key and we |
| 724 | // use 65000 bytes for the rest, that is predicate and the actual key. |
| 725 | const maxKeySize = 65000 |
| 726 | |
| 727 | for _, e := range edges { |
| 728 | maxSizeForDataKey := maxKeySize - len(e.Attr) |
| 729 | |
| 730 | // seems reasonable to assume, the tokens for indexes won't be bigger than the value itself |
| 731 | if len(e.Value) <= maxSizeForDataKey { |
| 732 | continue |
| 733 | } |
| 734 | pred := x.NamespaceAttr(e.Namespace, e.Attr) |
| 735 | update, ok := schema.State().Get(ctx, pred) |
| 736 | if !ok { |
| 737 | continue |
| 738 | } |
| 739 | // only string type can have large values that could cause us issues later |
| 740 | if update.GetValueType() != pb.Posting_STRING { |
| 741 | continue |
| 742 | } |
| 743 | |
| 744 | storageVal := types.Val{Tid: types.TypeID(e.GetValueType()), Value: e.GetValue()} |
| 745 | schemaVal, err := types.Convert(storageVal, types.TypeID(update.GetValueType())) |
| 746 | if err != nil { |
| 747 | return err |
| 748 | } |
| 749 | |
| 750 | for _, tokenizer := range schema.State().Tokenizer(ctx, pred) { |
| 751 | toks, err := tok.BuildTokens(schemaVal.Value, tok.GetTokenizerForLang(tokenizer, e.Lang)) |
| 752 | if err != nil { |
| 753 | return fmt.Errorf("error while building index tokens: %w", err) |
| 754 | } |
| 755 | |
| 756 | for _, tok := range toks { |
| 757 | if len(tok) > maxSizeForDataKey { |
| 758 | return errValueTooBigForIndex |
| 759 | } |
| 760 | } |
| 761 | } |
| 762 | } |
| 763 | |
| 764 | return nil |
| 765 | } |
| 766 | |
| 767 | // validateCondValue checks that a cond string is a well-formed @if(...) or @filter(...) |
| 768 | // clause with balanced parentheses and no trailing content. This prevents DQL injection |
no test coverage detected