MCPcopy
hub / github.com/impira/docquery / pipeline

Function pipeline

src/docquery/transformers_patch.py:81–133  ·  view source on GitHub ↗
(
    task: str = None,
    model: Optional = None,
    tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
    revision: Optional[str] = None,
    device: Optional[Union[int, str, "torch.device"]] = None,
    **pipeline_kwargs
)

Source from the content-addressed store, hash-verified

79
80
81def pipeline(
82 task: str = None,
83 model: Optional = None,
84 tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
85 revision: Optional[str] = None,
86 device: Optional[Union[int, str, "torch.device"]] = None,
87 **pipeline_kwargs
88):
89
90 if model is None and task is not None:
91 model = PIPELINE_DEFAULTS.get(task)
92
93 if revision is None and model is not None:
94 revision = DEFAULT_REVISIONS.get(model)
95
96 # We need to explicitly check for the impira/layoutlm-document-qa model because of challenges with
97 # registering an existing model "flavor" (layoutlm) within transformers after the fact. There may
98 # be a clever way to get around this. Either way, we should be able to remove it once
99 # https://github.com/huggingface/transformers/commit/5c4c869014f5839d04c1fd28133045df0c91fd84
100 # is officially released.
101 config = AutoConfig.from_pretrained(model, revision=revision, **{**pipeline_kwargs})
102
103 if tokenizer is None:
104 tokenizer = AutoTokenizer.from_pretrained(
105 model,
106 revision=revision,
107 config=config,
108 **pipeline_kwargs,
109 )
110
111 if any(a == "LayoutLMForQuestionAnswering" for a in config.architectures):
112 model = LayoutLMForQuestionAnswering.from_pretrained(
113 model, config=config, revision=revision, **{**pipeline_kwargs}
114 )
115
116 if config.model_type == "vision-encoder-decoder":
117 # This _should_ be a feature of transformers -- deriving the feature_extractor automatically --
118 # but is not at the time of writing, so we do it explicitly.
119 pipeline_kwargs["feature_extractor"] = model
120
121 if device is None:
122 # This trick merely simplifies the device argument, so that cuda is used by default if it's
123 # available, which at the time of writing is not a feature of transformers
124 device = 0 if torch.cuda.is_available() else -1
125
126 return transformers_pipeline(
127 task,
128 revision=revision,
129 model=model,
130 tokenizer=tokenizer,
131 device=device,
132 **pipeline_kwargs,
133 )

Callers 6

test_impira_datasetFunction · 0.90
test_impira_datasetFunction · 0.90
mainFunction · 0.85

Calls 1

getMethod · 0.80

Tested by 5

test_impira_datasetFunction · 0.72
test_impira_datasetFunction · 0.72