Apply a list of ScoringRules to note inputs and return noteStats augmented with scoring results. This function applies a list of ScoringRules in order. Once each rule has run a final ratingStatus is set for each note. An additional column is added to capture which rules acted on the note and
( noteStats: pd.DataFrame, rules: List[ScoringRule], statusColumn: str, ruleColumn: str, decidedByColumn: Optional[str] = None, )
| 1644 | |
| 1645 | |
| 1646 | def apply_scoring_rules( |
| 1647 | noteStats: pd.DataFrame, |
| 1648 | rules: List[ScoringRule], |
| 1649 | statusColumn: str, |
| 1650 | ruleColumn: str, |
| 1651 | decidedByColumn: Optional[str] = None, |
| 1652 | ) -> pd.DataFrame: |
| 1653 | """Apply a list of ScoringRules to note inputs and return noteStats augmented with scoring results. |
| 1654 | |
| 1655 | This function applies a list of ScoringRules in order. Once each rule has run |
| 1656 | a final ratingStatus is set for each note. An additional column is added to capture |
| 1657 | which rules acted on the note and any additional columns generated by the ScoringRules |
| 1658 | are merged with the scored notes to generate the final return value. |
| 1659 | |
| 1660 | Args: |
| 1661 | noteStats: attributes, aggregates and raw scoring signals for each note. |
| 1662 | rules: ScoringRules which will be applied in the order given. |
| 1663 | statusColumn: str indicating column where status should be assigned. |
| 1664 | ruleColumn: str indicating column where active rules should be stored. |
| 1665 | decidedByColumn: None or str indicating column where the last rule to act on a note is stored. |
| 1666 | |
| 1667 | Returns: |
| 1668 | noteStats with additional columns representing scoring results. |
| 1669 | """ |
| 1670 | # Initialize empty dataframes to store labels for each note and which rules impacted |
| 1671 | # scoring for each note. |
| 1672 | noteLabels = pd.DataFrame.from_dict( |
| 1673 | {c.noteIdKey: pd.Series([], dtype=np.int64), statusColumn: pd.Series([], dtype=object)} |
| 1674 | ) |
| 1675 | noteRules = pd.DataFrame.from_dict( |
| 1676 | {c.noteIdKey: pd.Series([], dtype=np.int64), ruleColumn: pd.Series([], dtype=object)} |
| 1677 | ) |
| 1678 | noteColumns = pd.DataFrame.from_dict({c.noteIdKey: pd.Series([], dtype=np.int64)}) |
| 1679 | |
| 1680 | # Establish state to enforce rule dependencies. |
| 1681 | ruleIDs: Set[RuleID] = set() |
| 1682 | |
| 1683 | # Successively apply each rule |
| 1684 | for rule in rules: |
| 1685 | with c.time_block(f"Applying scoring rule: {rule.get_name()}"): |
| 1686 | logger.info(f"Applying scoring rule: {rule.get_name()}") |
| 1687 | rule.check_dependencies(ruleIDs) |
| 1688 | assert rule.get_rule_id() not in ruleIDs, f"repeat ruleID: {rule.get_name()}" |
| 1689 | ruleIDs.add(rule.get_rule_id()) |
| 1690 | with c.time_block(f"Calling score_notes: {rule.get_name()}"): |
| 1691 | noteStatusUpdates, additionalColumns = rule.score_notes(noteStats, noteLabels, statusColumn) |
| 1692 | if ( |
| 1693 | additionalColumns is not None |
| 1694 | # This rule updates both status and NmrDueToStableCrhTime (in additional column), they can |
| 1695 | # be on different rows. |
| 1696 | and rule.get_rule_id() != RuleID.NMR_DUE_TO_MIN_STABLE_CRH_TIME |
| 1697 | ): |
| 1698 | assert set(noteStatusUpdates[c.noteIdKey]) == set(additionalColumns[c.noteIdKey]) |
| 1699 | |
| 1700 | # Update noteLabels, which will always hold at most one label per note. |
| 1701 | noteLabels = pd.concat([noteLabels, noteStatusUpdates]).groupby(c.noteIdKey).tail(1) |
| 1702 | # Update note rules to have one row per rule which was active for a note |
| 1703 | noteRules = pd.concat( |
nothing calls this directly
no test coverage detected