(state_db: Path, backup_dir: Path | None = None, dry_run: bool = False)
| 93 | |
| 94 | |
| 95 | def repair_state_db(state_db: Path, backup_dir: Path | None = None, dry_run: bool = False) -> dict[str, Any]: |
| 96 | if not state_db.exists(): |
| 97 | return {"updated_workspace_prefix_user_messages": 0, "removed_adjacent_user_duplicates": 0} |
| 98 | if not dry_run and backup_dir is not None: |
| 99 | _backup_file(state_db, backup_dir) |
| 100 | for suffix in ("-wal", "-shm"): |
| 101 | extra = Path(str(state_db) + suffix) |
| 102 | if extra.exists(): |
| 103 | _backup_file(extra, backup_dir) |
| 104 | |
| 105 | con = sqlite3.connect(state_db) |
| 106 | con.row_factory = sqlite3.Row |
| 107 | updated = 0 |
| 108 | deleted = 0 |
| 109 | affected_sessions: set[str] = set() |
| 110 | try: |
| 111 | rows = con.execute( |
| 112 | "select id, session_id, content from messages " |
| 113 | "where role = 'user' and content like '[Workspace:%' order by session_id, id" |
| 114 | ).fetchall() |
| 115 | duplicate_ids: list[int] = [] |
| 116 | for row in rows: |
| 117 | stripped = strip_workspace_prefix(row["content"]) |
| 118 | if stripped and stripped != row["content"]: |
| 119 | updated += 1 |
| 120 | affected_sessions.add(row["session_id"]) |
| 121 | if not dry_run: |
| 122 | con.execute("update messages set content = ? where id = ?", (stripped, row["id"])) |
| 123 | |
| 124 | for sid_row in con.execute("select distinct session_id from messages order by session_id").fetchall(): |
| 125 | sid = sid_row["session_id"] |
| 126 | previous = None |
| 127 | for row in con.execute("select id, role, content from messages where session_id = ? order by id", (sid,)).fetchall(): |
| 128 | if previous and previous["role"] == "user" and row["role"] == "user": |
| 129 | if normalized_text(previous["content"]) and normalized_text(previous["content"]) == normalized_text(row["content"]): |
| 130 | duplicate_ids.append(row["id"]) |
| 131 | affected_sessions.add(sid) |
| 132 | continue |
| 133 | previous = row |
| 134 | |
| 135 | deleted = len(duplicate_ids) |
| 136 | if not dry_run: |
| 137 | for message_id in duplicate_ids: |
| 138 | con.execute("delete from messages where id = ?", (message_id,)) |
| 139 | for sid in sorted(affected_sessions): |
| 140 | message_count = con.execute("select count(*) from messages where session_id = ?", (sid,)).fetchone()[0] |
| 141 | tool_count = con.execute( |
| 142 | "select count(*) from messages where session_id = ? and role = 'tool'", (sid,) |
| 143 | ).fetchone()[0] |
| 144 | con.execute( |
| 145 | "update sessions set message_count = ?, tool_call_count = ? where id = ?", |
| 146 | (message_count, tool_count, sid), |
| 147 | ) |
| 148 | con.commit() |
| 149 | finally: |
| 150 | con.close() |
| 151 | |
| 152 | return { |
no test coverage detected