MCPcopy
hub / github.com/modelscope/ms-swift / preprocess

Method preprocess

swift/dataset/preprocessor/core.py:384–403  ·  view source on GitHub ↗
(self, row: Dict[str, Any])

Source from the content-addressed store, hash-verified

382 self.columns[key] = 'response'
383
384 def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
385 response = row.pop('response', None)
386 if response is not None:
387 if isinstance(response, (list, tuple)):
388 from transformers.utils import strtobool
389
390 # sometimes response is a list, pick one randomly
391 if strtobool(os.environ.get('RANDOM_DATASET_RESPONSE', 'False')):
392 response = self.random_state.choice(response)
393 else:
394 response = response[0]
395 history = row.pop('history', None) or []
396 query = row.pop('query', None)
397 system = row.pop('system', None)
398 if isinstance(history, str): # e.g. "[['query1', 'response1']]"
399 history = ast.literal_eval(history)
400 history.append([query, response])
401
402 row.update({'messages': history_to_messages(history, system)})
403 return row
404
405
406class AlpacaPreprocessor(ResponsePreprocessor):

Callers

nothing calls this directly

Calls 4

history_to_messagesFunction · 0.90
choiceMethod · 0.80
appendMethod · 0.80
updateMethod · 0.45

Tested by

no test coverage detected