SoundDetection classifies the clip at req.Src and returns scored AudioSet tags.
(ctx context.Context, req *pb.SoundDetectionRequest)
| 79 | |
| 80 | // SoundDetection classifies the clip at req.Src and returns scored AudioSet tags. |
| 81 | func (c *Ced) SoundDetection(ctx context.Context, req *pb.SoundDetectionRequest) (*pb.SoundDetectionResponse, error) { |
| 82 | if c.ctxPtr == 0 { |
| 83 | return nil, errors.New("ced: model not loaded") |
| 84 | } |
| 85 | if req.GetSrc() == "" { |
| 86 | return nil, errors.New("ced: SoundDetectionRequest.src (audio path) is required") |
| 87 | } |
| 88 | topK := req.GetTopK() |
| 89 | if topK <= 0 { |
| 90 | topK = 10 // sensible default for a tagging response |
| 91 | } |
| 92 | |
| 93 | c.engineMu.Lock() |
| 94 | out := cstr(CppClassifyPathJSON(c.ctxPtr, req.GetSrc(), topK)) |
| 95 | lastErr := CppLastError(c.ctxPtr) |
| 96 | c.engineMu.Unlock() |
| 97 | |
| 98 | if out == "" { |
| 99 | return nil, fmt.Errorf("ced: classification failed: %s", lastErr) |
| 100 | } |
| 101 | var tags []jsonTag |
| 102 | if err := json.Unmarshal([]byte(out), &tags); err != nil { |
| 103 | return nil, fmt.Errorf("ced: bad classifier JSON: %w", err) |
| 104 | } |
| 105 | |
| 106 | thr := req.GetThreshold() |
| 107 | resp := &pb.SoundDetectionResponse{} |
| 108 | for _, t := range tags { |
| 109 | if t.Score < thr { |
| 110 | continue |
| 111 | } |
| 112 | resp.Detections = append(resp.Detections, &pb.SoundClass{ |
| 113 | Label: t.Label, Score: t.Score, Index: int32(t.Index), |
| 114 | }) |
| 115 | } |
| 116 | sort.Slice(resp.Detections, func(i, j int) bool { |
| 117 | return resp.Detections[i].Score > resp.Detections[j].Score |
| 118 | }) |
| 119 | return resp, nil |
| 120 | } |
| 121 | |
| 122 | func (c *Ced) Free() error { |
| 123 | c.engineMu.Lock() |