MCPcopy
hub / github.com/showlab/Code2Video / format_evaluation_report

Function format_evaluation_report

src/eval_TQ.py:217–286  ·  view source on GitHub ↗
(results: List[EvaluationResult])

Source from the content-addressed store, hash-verified

215
216
217def format_evaluation_report(results: List[EvaluationResult]) -> str:
218 report = """
219========================================
220SKU EDUCATIONAL VIDEO EVALUATION REPORT
221========================================
222
223"""
224
225 if not results:
226 return report + "No results.\n"
227
228 total_concepts = len(results)
229 successful_unlearning = sum(1 for r in results if r.unlearning_success)
230 gains = [r.learning_gain for r in results]
231 pre_scores = [r.pre_unlearning_score for r in results]
232 post_unlearn_scores = [r.post_unlearning_score for r in results]
233 post_video_scores = [r.post_video_score for r in results]
234
235 def _safe_mean(xs):
236 return float(np.mean(xs)) if len(xs) > 0 else float("nan")
237
238 report += "DETAILED RESULTS BY CONCEPT:\n"
239
240 for result in results:
241 effectiveness_rating = "High" if result.learning_gain > 0.3 else "Medium" if result.learning_gain > 0.1 else "Low"
242 report += f"""
243 CONCEPT: {result.concept}
244 ├── Unlearning Success: {'✓' if result.unlearning_success else '✗'}
245 ├── Pre-unlearning Score: {result.pre_unlearning_score:.3f}
246 ├── Post-unlearning Score: {result.post_unlearning_score:.3f}
247 ├── Post-video Score: {result.post_video_score:.3f}
248 ├── Learning Gain: {result.learning_gain:.3f}
249 └── Video Effectiveness: {effectiveness_rating}
250
251 """
252
253 # statistical significance
254 successful_results = [r for r in results if r.unlearning_success]
255 if len(successful_results) > 1:
256 successful_gains = [r.learning_gain for r in successful_results]
257 t_stat, p_value = stats.ttest_1samp(successful_gains, 0)
258 mu = float(np.mean(successful_gains))
259 sd = float(np.std(successful_gains, ddof=1)) if len(successful_gains) > 1 else 0.0
260 n = len(successful_gains)
261 ci_low = mu - 1.96 * (sd / np.sqrt(n)) if n > 1 and sd > 0 else mu
262 ci_high = mu + 1.96 * (sd / np.sqrt(n)) if n > 1 and sd > 0 else mu
263 d = (mu / sd) if sd > 0 else float("inf")
264
265 report += f"""
266 STATISTICAL ANALYSIS (on successfully unlearned concepts):
267 - Learning Gain Distribution: μ={mu:.3f}, σ={sd:.3f}, n={n}
268 - Significance Test (H0: no learning): t={t_stat:.3f}, p={p_value:.3f}
269 - Effect Size (Cohen's d): {d:.3f}
270 - 95% Confidence Interval: [{ci_low:.3f}, {ci_high:.3f}]
271
272 """
273
274 report += "=" * 50 + "\n\n"

Callers 1

mainFunction · 0.85

Calls 1

_safe_meanFunction · 0.85

Tested by

no test coverage detected