| 215 | |
| 216 | |
| 217 | def format_evaluation_report(results: List[EvaluationResult]) -> str: |
| 218 | report = """ |
| 219 | ======================================== |
| 220 | SKU EDUCATIONAL VIDEO EVALUATION REPORT |
| 221 | ======================================== |
| 222 | |
| 223 | """ |
| 224 | |
| 225 | if not results: |
| 226 | return report + "No results.\n" |
| 227 | |
| 228 | total_concepts = len(results) |
| 229 | successful_unlearning = sum(1 for r in results if r.unlearning_success) |
| 230 | gains = [r.learning_gain for r in results] |
| 231 | pre_scores = [r.pre_unlearning_score for r in results] |
| 232 | post_unlearn_scores = [r.post_unlearning_score for r in results] |
| 233 | post_video_scores = [r.post_video_score for r in results] |
| 234 | |
| 235 | def _safe_mean(xs): |
| 236 | return float(np.mean(xs)) if len(xs) > 0 else float("nan") |
| 237 | |
| 238 | report += "DETAILED RESULTS BY CONCEPT:\n" |
| 239 | |
| 240 | for result in results: |
| 241 | effectiveness_rating = "High" if result.learning_gain > 0.3 else "Medium" if result.learning_gain > 0.1 else "Low" |
| 242 | report += f""" |
| 243 | CONCEPT: {result.concept} |
| 244 | ├── Unlearning Success: {'✓' if result.unlearning_success else '✗'} |
| 245 | ├── Pre-unlearning Score: {result.pre_unlearning_score:.3f} |
| 246 | ├── Post-unlearning Score: {result.post_unlearning_score:.3f} |
| 247 | ├── Post-video Score: {result.post_video_score:.3f} |
| 248 | ├── Learning Gain: {result.learning_gain:.3f} |
| 249 | └── Video Effectiveness: {effectiveness_rating} |
| 250 | |
| 251 | """ |
| 252 | |
| 253 | # statistical significance |
| 254 | successful_results = [r for r in results if r.unlearning_success] |
| 255 | if len(successful_results) > 1: |
| 256 | successful_gains = [r.learning_gain for r in successful_results] |
| 257 | t_stat, p_value = stats.ttest_1samp(successful_gains, 0) |
| 258 | mu = float(np.mean(successful_gains)) |
| 259 | sd = float(np.std(successful_gains, ddof=1)) if len(successful_gains) > 1 else 0.0 |
| 260 | n = len(successful_gains) |
| 261 | ci_low = mu - 1.96 * (sd / np.sqrt(n)) if n > 1 and sd > 0 else mu |
| 262 | ci_high = mu + 1.96 * (sd / np.sqrt(n)) if n > 1 and sd > 0 else mu |
| 263 | d = (mu / sd) if sd > 0 else float("inf") |
| 264 | |
| 265 | report += f""" |
| 266 | STATISTICAL ANALYSIS (on successfully unlearned concepts): |
| 267 | - Learning Gain Distribution: μ={mu:.3f}, σ={sd:.3f}, n={n} |
| 268 | - Significance Test (H0: no learning): t={t_stat:.3f}, p={p_value:.3f} |
| 269 | - Effect Size (Cohen's d): {d:.3f} |
| 270 | - 95% Confidence Interval: [{ci_low:.3f}, {ci_high:.3f}] |
| 271 | |
| 272 | """ |
| 273 | |
| 274 | report += "=" * 50 + "\n\n" |