__init__.py 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998
  1. # Copyright 2018 The HuggingFace Inc. team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import annotations
  15. import json
  16. import os
  17. import warnings
  18. from pathlib import Path
  19. from typing import TYPE_CHECKING, Any, Optional, Union
  20. from huggingface_hub import is_offline_mode, model_info
  21. from ..configuration_utils import PreTrainedConfig
  22. from ..dynamic_module_utils import get_class_from_dynamic_module
  23. from ..feature_extraction_utils import FeatureExtractionMixin, PreTrainedFeatureExtractor
  24. from ..image_processing_utils import BaseImageProcessor
  25. from ..models.auto.configuration_auto import AutoConfig
  26. from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
  27. from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
  28. from ..models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage
  29. from ..models.auto.processing_auto import PROCESSOR_MAPPING, AutoProcessor
  30. from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
  31. from ..processing_utils import ProcessorMixin
  32. from ..tokenization_python import PreTrainedTokenizer
  33. from ..utils import (
  34. CONFIG_NAME,
  35. cached_file,
  36. extract_commit_hash,
  37. find_adapter_config_file,
  38. is_kenlm_available,
  39. is_peft_available,
  40. is_pyctcdecode_available,
  41. is_torch_available,
  42. logging,
  43. )
  44. from .any_to_any import AnyToAnyPipeline
  45. from .audio_classification import AudioClassificationPipeline
  46. from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
  47. from .base import (
  48. ArgumentHandler,
  49. CsvPipelineDataFormat,
  50. JsonPipelineDataFormat,
  51. PipedPipelineDataFormat,
  52. Pipeline,
  53. PipelineDataFormat,
  54. PipelineException,
  55. PipelineRegistry,
  56. get_default_model_and_revision,
  57. load_model,
  58. )
  59. from .depth_estimation import DepthEstimationPipeline
  60. from .document_question_answering import DocumentQuestionAnsweringPipeline
  61. from .feature_extraction import FeatureExtractionPipeline
  62. from .fill_mask import FillMaskPipeline
  63. from .image_classification import ImageClassificationPipeline
  64. from .image_feature_extraction import ImageFeatureExtractionPipeline
  65. from .image_segmentation import ImageSegmentationPipeline
  66. from .image_text_to_text import ImageTextToTextPipeline
  67. from .keypoint_matching import KeypointMatchingPipeline
  68. from .mask_generation import MaskGenerationPipeline
  69. from .object_detection import ObjectDetectionPipeline
  70. from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
  71. from .text_classification import TextClassificationPipeline
  72. from .text_generation import TextGenerationPipeline
  73. from .text_to_audio import TextToAudioPipeline
  74. from .token_classification import (
  75. AggregationStrategy,
  76. NerPipeline,
  77. TokenClassificationArgumentHandler,
  78. TokenClassificationPipeline,
  79. )
  80. from .video_classification import VideoClassificationPipeline
  81. from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline
  82. from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline
  83. from .zero_shot_image_classification import ZeroShotImageClassificationPipeline
  84. from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline
  85. if is_torch_available():
  86. import torch
  87. from ..models.auto.modeling_auto import (
  88. AutoModel,
  89. AutoModelForAudioClassification,
  90. AutoModelForCausalLM,
  91. AutoModelForCTC,
  92. AutoModelForDocumentQuestionAnswering,
  93. AutoModelForImageClassification,
  94. AutoModelForImageSegmentation,
  95. AutoModelForImageTextToText,
  96. AutoModelForKeypointMatching,
  97. AutoModelForMaskedLM,
  98. AutoModelForMaskGeneration,
  99. AutoModelForMultimodalLM,
  100. AutoModelForObjectDetection,
  101. AutoModelForQuestionAnswering,
  102. AutoModelForSemanticSegmentation,
  103. AutoModelForSeq2SeqLM,
  104. AutoModelForSequenceClassification,
  105. AutoModelForSpeechSeq2Seq,
  106. AutoModelForTableQuestionAnswering,
  107. AutoModelForTextToSpectrogram,
  108. AutoModelForTextToWaveform,
  109. AutoModelForTokenClassification,
  110. AutoModelForVideoClassification,
  111. AutoModelForVisualQuestionAnswering,
  112. AutoModelForZeroShotImageClassification,
  113. AutoModelForZeroShotObjectDetection,
  114. )
  115. if TYPE_CHECKING:
  116. from ..modeling_utils import PreTrainedModel
  117. from ..tokenization_utils_tokenizers import PreTrainedTokenizerFast
  118. logger = logging.get_logger(__name__)
  119. # Register all the supported tasks here
  120. TASK_ALIASES = {
  121. "sentiment-analysis": "text-classification",
  122. "ner": "token-classification",
  123. "text-to-speech": "text-to-audio",
  124. }
  125. SUPPORTED_TASKS = {
  126. "audio-classification": {
  127. "impl": AudioClassificationPipeline,
  128. "pt": (AutoModelForAudioClassification,) if is_torch_available() else (),
  129. "default": {"model": ("superb/wav2vec2-base-superb-ks", "372e048")},
  130. "type": "audio",
  131. },
  132. "automatic-speech-recognition": {
  133. "impl": AutomaticSpeechRecognitionPipeline,
  134. "pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (),
  135. "default": {"model": ("facebook/wav2vec2-base-960h", "22aad52")},
  136. "type": "multimodal",
  137. },
  138. "text-to-audio": {
  139. "impl": TextToAudioPipeline,
  140. "pt": (AutoModelForTextToWaveform, AutoModelForTextToSpectrogram) if is_torch_available() else (),
  141. "default": {"model": ("suno/bark-small", "1dbd7a1")},
  142. "type": "text",
  143. },
  144. "feature-extraction": {
  145. "impl": FeatureExtractionPipeline,
  146. "pt": (AutoModel,) if is_torch_available() else (),
  147. "default": {"model": ("distilbert/distilbert-base-cased", "6ea8117")},
  148. "type": "text",
  149. },
  150. "text-classification": {
  151. "impl": TextClassificationPipeline,
  152. "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
  153. "default": {"model": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "714eb0f")},
  154. "type": "text",
  155. },
  156. "token-classification": {
  157. "impl": TokenClassificationPipeline,
  158. "pt": (AutoModelForTokenClassification,) if is_torch_available() else (),
  159. "default": {"model": ("dbmdz/bert-large-cased-finetuned-conll03-english", "4c53496")},
  160. "type": "text",
  161. },
  162. "table-question-answering": {
  163. "impl": TableQuestionAnsweringPipeline,
  164. "pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (),
  165. "default": {"model": ("google/tapas-base-finetuned-wtq", "e3dde19")},
  166. "type": "text",
  167. },
  168. "document-question-answering": {
  169. "impl": DocumentQuestionAnsweringPipeline,
  170. "pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (),
  171. "default": {"model": ("impira/layoutlm-document-qa", "beed3c4")},
  172. "type": "multimodal",
  173. },
  174. "fill-mask": {
  175. "impl": FillMaskPipeline,
  176. "pt": (AutoModelForMaskedLM,) if is_torch_available() else (),
  177. "default": {"model": ("distilbert/distilroberta-base", "fb53ab8")},
  178. "type": "text",
  179. },
  180. "text-generation": {
  181. "impl": TextGenerationPipeline,
  182. "pt": (AutoModelForCausalLM,) if is_torch_available() else (),
  183. "default": {"model": ("HuggingFaceTB/SmolLM3-3B", "a07cc9a")},
  184. "type": "text",
  185. },
  186. "zero-shot-classification": {
  187. "impl": ZeroShotClassificationPipeline,
  188. "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
  189. "default": {
  190. "model": ("facebook/bart-large-mnli", "d7645e1"),
  191. "config": ("facebook/bart-large-mnli", "d7645e1"),
  192. },
  193. "type": "text",
  194. },
  195. "zero-shot-image-classification": {
  196. "impl": ZeroShotImageClassificationPipeline,
  197. "pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (),
  198. "default": {"model": ("openai/clip-vit-base-patch32", "3d74acf")},
  199. "type": "multimodal",
  200. },
  201. "zero-shot-audio-classification": {
  202. "impl": ZeroShotAudioClassificationPipeline,
  203. "pt": (AutoModel,) if is_torch_available() else (),
  204. "default": {"model": ("laion/clap-htsat-fused", "cca9e28")},
  205. "type": "multimodal",
  206. },
  207. "image-classification": {
  208. "impl": ImageClassificationPipeline,
  209. "pt": (AutoModelForImageClassification,) if is_torch_available() else (),
  210. "default": {"model": ("google/vit-base-patch16-224", "3f49326")},
  211. "type": "image",
  212. },
  213. "image-feature-extraction": {
  214. "impl": ImageFeatureExtractionPipeline,
  215. "pt": (AutoModel,) if is_torch_available() else (),
  216. "default": {"model": ("google/vit-base-patch16-224", "3f49326")},
  217. "type": "image",
  218. },
  219. "image-segmentation": {
  220. "impl": ImageSegmentationPipeline,
  221. "pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (),
  222. "default": {"model": ("facebook/detr-resnet-50-panoptic", "d53b52a")},
  223. "type": "multimodal",
  224. },
  225. "image-text-to-text": {
  226. "impl": ImageTextToTextPipeline,
  227. "pt": (AutoModelForImageTextToText,) if is_torch_available() else (),
  228. "default": {"model": ("Qwen/Qwen3-VL-2B-Instruct", "8964489")},
  229. "type": "multimodal",
  230. },
  231. "object-detection": {
  232. "impl": ObjectDetectionPipeline,
  233. "pt": (AutoModelForObjectDetection,) if is_torch_available() else (),
  234. "default": {"model": ("facebook/detr-resnet-50", "1d5f47b")},
  235. "type": "multimodal",
  236. },
  237. "zero-shot-object-detection": {
  238. "impl": ZeroShotObjectDetectionPipeline,
  239. "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (),
  240. "default": {"model": ("google/owlvit-base-patch32", "cbc355f")},
  241. "type": "multimodal",
  242. },
  243. "depth-estimation": {
  244. "impl": DepthEstimationPipeline,
  245. "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (),
  246. "default": {"model": ("Intel/dpt-large", "bc15f29")},
  247. "type": "image",
  248. },
  249. "video-classification": {
  250. "impl": VideoClassificationPipeline,
  251. "pt": (AutoModelForVideoClassification,) if is_torch_available() else (),
  252. "default": {"model": ("MCG-NJU/videomae-base-finetuned-kinetics", "488eb9a")},
  253. "type": "video",
  254. },
  255. "mask-generation": {
  256. "impl": MaskGenerationPipeline,
  257. "pt": (AutoModelForMaskGeneration,) if is_torch_available() else (),
  258. "default": {"model": ("facebook/sam-vit-huge", "87aecf0")},
  259. "type": "multimodal",
  260. },
  261. "keypoint-matching": {
  262. "impl": KeypointMatchingPipeline,
  263. "pt": (AutoModelForKeypointMatching,) if is_torch_available() else (),
  264. "default": {"model": ("magic-leap-community/superglue_outdoor", "f4041f8")},
  265. "type": "image",
  266. },
  267. "any-to-any": {
  268. "impl": AnyToAnyPipeline,
  269. "tf": (),
  270. "pt": (AutoModelForMultimodalLM,) if is_torch_available() else (),
  271. "default": {
  272. "model": {
  273. "pt": ("google/gemma-3n-E4B-it", "c1221e9"),
  274. }
  275. },
  276. "type": "multimodal",
  277. },
  278. }
  279. PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES)
  280. def get_supported_tasks() -> list[str]:
  281. """
  282. Returns a list of supported task strings.
  283. """
  284. return PIPELINE_REGISTRY.get_supported_tasks()
  285. def get_task(model: str, token: str | None = None, **deprecated_kwargs) -> str:
  286. if is_offline_mode():
  287. raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode")
  288. try:
  289. info = model_info(model, token=token)
  290. except Exception as e:
  291. raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}")
  292. if not info.pipeline_tag:
  293. raise RuntimeError(
  294. f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically"
  295. )
  296. if getattr(info, "library_name", "transformers") not in {"transformers", "timm"}:
  297. raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers")
  298. task = info.pipeline_tag
  299. return task
  300. def check_task(task: str) -> tuple[str, dict, Any]:
  301. """
  302. Checks an incoming task string, to validate it's correct and return the default Pipeline and Model classes, and
  303. default models if they exist.
  304. Args:
  305. task (`str`):
  306. The task defining which pipeline will be returned. Currently accepted tasks are:
  307. - `"audio-classification"`
  308. - `"automatic-speech-recognition"`
  309. - `"conversational"`
  310. - `"depth-estimation"`
  311. - `"document-question-answering"`
  312. - `"feature-extraction"`
  313. - `"fill-mask"`
  314. - `"image-classification"`
  315. - `"image-feature-extraction"`
  316. - `"image-segmentation"`
  317. - `"keypoint-matching"`
  318. - `"object-detection"`
  319. - `"table-question-answering"`
  320. - `"text-classification"` (alias `"sentiment-analysis"` available)
  321. - `"text-generation"`
  322. - `"text-to-audio"` (alias `"text-to-speech"` available)
  323. - `"token-classification"` (alias `"ner"` available)
  324. - `"video-classification"`
  325. - `"zero-shot-classification"`
  326. - `"zero-shot-image-classification"`
  327. - `"zero-shot-object-detection"`
  328. Returns:
  329. (normalized_task: `str`, task_defaults: `dict`, task_options: (`tuple`, None)) The normalized task name
  330. (removed alias and options).
  331. """
  332. return PIPELINE_REGISTRY.check_task(task)
  333. def clean_custom_task(task_info):
  334. import transformers
  335. if "impl" not in task_info:
  336. raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.")
  337. pt_class_names = task_info.get("pt", ())
  338. if isinstance(pt_class_names, str):
  339. pt_class_names = [pt_class_names]
  340. task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names)
  341. return task_info, None
  342. # <generated-code>
  343. # fmt: off
  344. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
  345. # The part of the file below was automatically generated from the code.
  346. # Do NOT edit this part of the file manually as any edits will be overwritten by the generation
  347. # of the file. If any change should be done, please apply the changes to the `pipeline` function
  348. # below and run `python utils/check_pipeline_typing.py --fix_and_overwrite` to update the file.
  349. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
  350. from typing import Literal, overload
  351. @overload
  352. def pipeline(task: Literal[None], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> Pipeline: ...
  353. @overload
  354. def pipeline(task: Literal["any-to-any"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> AnyToAnyPipeline: ...
  355. @overload
  356. def pipeline(task: Literal["audio-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> AudioClassificationPipeline: ...
  357. @overload
  358. def pipeline(task: Literal["automatic-speech-recognition"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> AutomaticSpeechRecognitionPipeline: ...
  359. @overload
  360. def pipeline(task: Literal["depth-estimation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> DepthEstimationPipeline: ...
  361. @overload
  362. def pipeline(task: Literal["document-question-answering"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> DocumentQuestionAnsweringPipeline: ...
  363. @overload
  364. def pipeline(task: Literal["feature-extraction"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> FeatureExtractionPipeline: ...
  365. @overload
  366. def pipeline(task: Literal["fill-mask"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> FillMaskPipeline: ...
  367. @overload
  368. def pipeline(task: Literal["image-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageClassificationPipeline: ...
  369. @overload
  370. def pipeline(task: Literal["image-feature-extraction"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageFeatureExtractionPipeline: ...
  371. @overload
  372. def pipeline(task: Literal["image-segmentation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageSegmentationPipeline: ...
  373. @overload
  374. def pipeline(task: Literal["image-text-to-text"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageTextToTextPipeline: ...
  375. @overload
  376. def pipeline(task: Literal["keypoint-matching"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> KeypointMatchingPipeline: ...
  377. @overload
  378. def pipeline(task: Literal["mask-generation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> MaskGenerationPipeline: ...
  379. @overload
  380. def pipeline(task: Literal["object-detection"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ObjectDetectionPipeline: ...
  381. @overload
  382. def pipeline(task: Literal["table-question-answering"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TableQuestionAnsweringPipeline: ...
  383. @overload
  384. def pipeline(task: Literal["text-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TextClassificationPipeline: ...
  385. @overload
  386. def pipeline(task: Literal["text-generation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TextGenerationPipeline: ...
  387. @overload
  388. def pipeline(task: Literal["text-to-audio"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TextToAudioPipeline: ...
  389. @overload
  390. def pipeline(task: Literal["token-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TokenClassificationPipeline: ...
  391. @overload
  392. def pipeline(task: Literal["video-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> VideoClassificationPipeline: ...
  393. @overload
  394. def pipeline(task: Literal["zero-shot-audio-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotAudioClassificationPipeline: ...
  395. @overload
  396. def pipeline(task: Literal["zero-shot-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotClassificationPipeline: ...
  397. @overload
  398. def pipeline(task: Literal["zero-shot-image-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotImageClassificationPipeline: ...
  399. @overload
  400. def pipeline(task: Literal["zero-shot-object-detection"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotObjectDetectionPipeline: ...
  401. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
  402. # The part of the file above was automatically generated from the code.
  403. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
  404. # fmt: on
  405. # </generated-code>
  406. def pipeline(
  407. task: str | None = None,
  408. model: str | PreTrainedModel | None = None,
  409. config: str | PreTrainedConfig | None = None,
  410. tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None,
  411. feature_extractor: str | PreTrainedFeatureExtractor | None = None,
  412. image_processor: str | BaseImageProcessor | None = None,
  413. processor: str | ProcessorMixin | None = None,
  414. revision: str | None = None,
  415. use_fast: bool = True,
  416. token: str | bool | None = None,
  417. device: int | str | torch.device | None = None,
  418. device_map: str | dict[str, int | str] | None = None,
  419. dtype: str | torch.dtype | None = "auto",
  420. trust_remote_code: bool | None = None,
  421. model_kwargs: dict[str, Any] | None = None,
  422. pipeline_class: Any | None = None,
  423. **kwargs: Any,
  424. ) -> Pipeline:
  425. """
  426. Utility factory method to build a [`Pipeline`].
  427. A pipeline consists of:
  428. - One or more components for pre-processing model inputs, such as a [tokenizer](tokenizer),
  429. [image_processor](image_processor), [feature_extractor](feature_extractor), or [processor](processors).
  430. - A [model](model) that generates predictions from the inputs.
  431. - Optional post-processing steps to refine the model's output, which can also be handled by processors.
  432. <Tip>
  433. While there are such optional arguments as `tokenizer`, `feature_extractor`, `image_processor`, and `processor`,
  434. they shouldn't be specified all at once. If these components are not provided, `pipeline` will try to load
  435. required ones automatically. In case you want to provide these components explicitly, please refer to a
  436. specific pipeline in order to get more details regarding what components are required.
  437. </Tip>
  438. Args:
  439. task (`str`):
  440. The task defining which pipeline will be returned. Currently accepted tasks are:
  441. - `"audio-classification"`: will return a [`AudioClassificationPipeline`].
  442. - `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`].
  443. - `"depth-estimation"`: will return a [`DepthEstimationPipeline`].
  444. - `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`].
  445. - `"feature-extraction"`: will return a [`FeatureExtractionPipeline`].
  446. - `"fill-mask"`: will return a [`FillMaskPipeline`]:.
  447. - `"image-classification"`: will return a [`ImageClassificationPipeline`].
  448. - `"image-feature-extraction"`: will return an [`ImageFeatureExtractionPipeline`].
  449. - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
  450. - `"image-text-to-text"`: will return a [`ImageTextToTextPipeline`].
  451. - `"keypoint-matching"`: will return a [`KeypointMatchingPipeline`].
  452. - `"mask-generation"`: will return a [`MaskGenerationPipeline`].
  453. - `"object-detection"`: will return a [`ObjectDetectionPipeline`].
  454. - `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`].
  455. - `"text-classification"` (alias `"sentiment-analysis"` available): will return a
  456. [`TextClassificationPipeline`].
  457. - `"text-generation"`: will return a [`TextGenerationPipeline`]:.
  458. - `"text-to-audio"` (alias `"text-to-speech"` available): will return a [`TextToAudioPipeline`]:.
  459. - `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`].
  460. - `"video-classification"`: will return a [`VideoClassificationPipeline`].
  461. - `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`].
  462. - `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`].
  463. - `"zero-shot-audio-classification"`: will return a [`ZeroShotAudioClassificationPipeline`].
  464. - `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`].
  465. model (`str` or [`PreTrainedModel`], *optional*):
  466. The model that will be used by the pipeline to make predictions. This can be a model identifier or an
  467. actual instance of a pretrained model inheriting from [`PreTrainedModel`].
  468. If not provided, the default for the `task` will be loaded.
  469. config (`str` or [`PreTrainedConfig`], *optional*):
  470. The configuration that will be used by the pipeline to instantiate the model. This can be a model
  471. identifier or an actual pretrained model configuration inheriting from [`PreTrainedConfig`].
  472. If not provided, the default configuration file for the requested model will be used. That means that if
  473. `model` is given, its default configuration will be used. However, if `model` is not supplied, this
  474. `task`'s default model's config is used instead.
  475. tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
  476. The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
  477. identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].
  478. If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
  479. is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
  480. However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
  481. will be loaded.
  482. feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*):
  483. The feature extractor that will be used by the pipeline to encode data for the model. This can be a model
  484. identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`].
  485. Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal
  486. models. Multi-modal models will also require a tokenizer to be passed.
  487. If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If
  488. `model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it
  489. is a string). However, if `config` is also not given or not a string, then the default feature extractor
  490. for the given `task` will be loaded.
  491. image_processor (`str` or [`BaseImageProcessor`], *optional*):
  492. The image processor that will be used by the pipeline to preprocess images for the model. This can be a
  493. model identifier or an actual image processor inheriting from [`BaseImageProcessor`].
  494. Image processors are used for Vision models and multi-modal models that require image inputs. Multi-modal
  495. models will also require a tokenizer to be passed.
  496. If not provided, the default image processor for the given `model` will be loaded (if it is a string). If
  497. `model` is not specified or not a string, then the default image processor for `config` is loaded (if it is
  498. a string).
  499. processor (`str` or [`ProcessorMixin`], *optional*):
  500. The processor that will be used by the pipeline to preprocess data for the model. This can be a model
  501. identifier or an actual processor inheriting from [`ProcessorMixin`].
  502. Processors are used for multi-modal models that require multi-modal inputs, for example, a model that
  503. requires both text and image inputs.
  504. If not provided, the default processor for the given `model` will be loaded (if it is a string). If `model`
  505. is not specified or not a string, then the default processor for `config` is loaded (if it is a string).
  506. revision (`str`, *optional*, defaults to `"main"`):
  507. When passing a task name or a string model identifier: The specific model version to use. It can be a
  508. branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
  509. artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
  510. use_fast (`bool`, *optional*, defaults to `True`):
  511. Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
  512. token (`str` or *bool*, *optional*):
  513. The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
  514. when running `hf auth login`.
  515. device (`int` or `str` or `torch.device`):
  516. Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this
  517. pipeline will be allocated.
  518. device_map (`str` or `dict[str, Union[int, str, torch.device]`, *optional*):
  519. Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set
  520. `device_map="auto"` to compute the most optimized `device_map` automatically (see
  521. [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload)
  522. for more information).
  523. <Tip warning={true}>
  524. Do not use `device_map` AND `device` at the same time as they will conflict
  525. </Tip>
  526. dtype (`str` or `torch.dtype`, *optional*):
  527. Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
  528. (`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
  529. trust_remote_code (`bool`, *optional*, defaults to `False`):
  530. Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
  531. tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
  532. and in which you have read the code, as it will execute code present on the Hub on your local machine.
  533. model_kwargs (`dict[str, Any]`, *optional*):
  534. Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
  535. **model_kwargs)` function.
  536. kwargs (`dict[str, Any]`, *optional*):
  537. Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
  538. corresponding pipeline class for possible values).
  539. Returns:
  540. [`Pipeline`]: A suitable pipeline for the task.
  541. Examples:
  542. ```python
  543. >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
  544. >>> # Sentiment analysis pipeline
  545. >>> analyzer = pipeline("sentiment-analysis")
  546. >>> # Question answering pipeline, specifying the checkpoint identifier
  547. >>> oracle = pipeline(
  548. ... "question-answering", model="distilbert/distilbert-base-cased-distilled-squad", tokenizer="google-bert/bert-base-cased"
  549. ... )
  550. >>> # Named entity recognition pipeline, passing in a specific model and tokenizer
  551. >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
  552. >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
  553. >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
  554. ```"""
  555. if model_kwargs is None:
  556. model_kwargs = {}
  557. code_revision = kwargs.pop("code_revision", None)
  558. commit_hash = kwargs.pop("_commit_hash", None)
  559. local_files_only = kwargs.get("local_files_only", False)
  560. hub_kwargs = {
  561. "revision": revision,
  562. "token": token,
  563. "trust_remote_code": trust_remote_code,
  564. "_commit_hash": commit_hash,
  565. "local_files_only": local_files_only,
  566. }
  567. if task is None and model is None:
  568. raise RuntimeError(
  569. "Impossible to instantiate a pipeline without either a task or a model "
  570. "being specified. "
  571. "Please provide a task class or a model"
  572. )
  573. if model is None and tokenizer is not None:
  574. raise RuntimeError(
  575. "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer"
  576. " may not be compatible with the default model. Please provide a PreTrainedModel class or a"
  577. " path/identifier to a pretrained model when providing tokenizer."
  578. )
  579. if model is None and feature_extractor is not None:
  580. raise RuntimeError(
  581. "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided"
  582. " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class"
  583. " or a path/identifier to a pretrained model when providing feature_extractor."
  584. )
  585. if isinstance(model, Path):
  586. model = str(model)
  587. if commit_hash is None:
  588. pretrained_model_name_or_path = None
  589. if isinstance(config, str):
  590. pretrained_model_name_or_path = config
  591. elif config is None and isinstance(model, str):
  592. pretrained_model_name_or_path = model
  593. if not isinstance(config, PreTrainedConfig) and pretrained_model_name_or_path is not None:
  594. # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible
  595. resolved_config_file = cached_file(
  596. pretrained_model_name_or_path,
  597. CONFIG_NAME,
  598. _raise_exceptions_for_gated_repo=False,
  599. _raise_exceptions_for_missing_entries=False,
  600. _raise_exceptions_for_connection_errors=False,
  601. cache_dir=model_kwargs.get("cache_dir"),
  602. **hub_kwargs,
  603. )
  604. hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash)
  605. else:
  606. hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None)
  607. # Config is the primordial information item.
  608. # Instantiate config if needed
  609. adapter_path = None
  610. if isinstance(config, str):
  611. config = AutoConfig.from_pretrained(
  612. config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs
  613. )
  614. hub_kwargs["_commit_hash"] = config._commit_hash
  615. elif config is None and isinstance(model, str):
  616. # Check for an adapter file in the model path if PEFT is available
  617. if is_peft_available():
  618. # `find_adapter_config_file` doesn't accept `trust_remote_code`
  619. _hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"}
  620. maybe_adapter_path = find_adapter_config_file(
  621. model,
  622. token=hub_kwargs["token"],
  623. revision=hub_kwargs["revision"],
  624. _commit_hash=hub_kwargs["_commit_hash"],
  625. )
  626. if maybe_adapter_path is not None:
  627. with open(maybe_adapter_path, "r", encoding="utf-8") as f:
  628. adapter_config = json.load(f)
  629. adapter_path = model
  630. # Only override the model name/path if the current value doesn't point to a
  631. # complete model with an embedded adapter so that local models with embedded
  632. # adapters will load from the local base model rather than pull the base
  633. # model named in the adapter's config from the hub.
  634. if not os.path.exists(model) or not os.path.exists(os.path.join(model, CONFIG_NAME)):
  635. model = adapter_config["base_model_name_or_path"]
  636. config = AutoConfig.from_pretrained(
  637. model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs
  638. )
  639. hub_kwargs["_commit_hash"] = config._commit_hash
  640. custom_tasks = {}
  641. if config is not None and len(getattr(config, "custom_pipelines", {})) > 0:
  642. custom_tasks = config.custom_pipelines
  643. if task is None and trust_remote_code is not False:
  644. if len(custom_tasks) == 1:
  645. task = list(custom_tasks.keys())[0]
  646. else:
  647. raise RuntimeError(
  648. "We can't infer the task automatically for this model as there are multiple tasks available. Pick "
  649. f"one in {', '.join(custom_tasks.keys())}"
  650. )
  651. if task is None and model is not None:
  652. if not isinstance(model, str):
  653. raise RuntimeError(
  654. "Inferring the task automatically requires to check the hub with a model_id defined as a `str`. "
  655. f"{model} is not a valid model_id."
  656. )
  657. task = get_task(model, token)
  658. # Retrieve the task
  659. if task in custom_tasks:
  660. targeted_task, task_options = clean_custom_task(custom_tasks[task])
  661. if pipeline_class is None:
  662. if not trust_remote_code:
  663. raise ValueError(
  664. "Loading this pipeline requires you to execute the code in the pipeline file in that"
  665. " repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
  666. " set the option `trust_remote_code=True` to remove this error."
  667. )
  668. class_ref = targeted_task["impl"]
  669. pipeline_class = get_class_from_dynamic_module(
  670. class_ref,
  671. model,
  672. code_revision=code_revision,
  673. **hub_kwargs,
  674. )
  675. else:
  676. normalized_task, targeted_task, task_options = check_task(task)
  677. if pipeline_class is None:
  678. pipeline_class = targeted_task["impl"]
  679. # Use default model/config/tokenizer for the task if no model is provided
  680. if model is None:
  681. model, default_revision = get_default_model_and_revision(targeted_task, task_options)
  682. revision = revision if revision is not None else default_revision
  683. logger.warning(
  684. f"No model was supplied, defaulted to {model} and revision {revision}.\n"
  685. "Using a pipeline without specifying a model name and revision in production is not recommended."
  686. )
  687. hub_kwargs["revision"] = revision
  688. if config is None and isinstance(model, str):
  689. config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
  690. hub_kwargs["_commit_hash"] = config._commit_hash
  691. if device_map is not None:
  692. if "device_map" in model_kwargs:
  693. raise ValueError(
  694. 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
  695. " arguments might conflict, use only one.)"
  696. )
  697. if device is not None:
  698. logger.warning(
  699. "Both `device` and `device_map` are specified. `device` will override `device_map`. You"
  700. " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`."
  701. )
  702. model_kwargs["device_map"] = device_map
  703. # BC for the `torch_dtype` argument
  704. if (torch_dtype := kwargs.get("torch_dtype")) is not None:
  705. logger.warning_once("`torch_dtype` is deprecated! Use `dtype` instead!")
  706. # If both are provided, keep `dtype`
  707. dtype = torch_dtype if dtype == "auto" else dtype
  708. if "torch_dtype" in model_kwargs or "dtype" in model_kwargs:
  709. if "torch_dtype" in model_kwargs:
  710. logger.warning_once("`torch_dtype` is deprecated! Use `dtype` instead!")
  711. # If the user did not explicitly provide `dtype` (i.e. the function default "auto" is still
  712. # present) but a value is supplied inside `model_kwargs`, we silently defer to the latter instead of
  713. # raising. This prevents false positives like providing `dtype` only via `model_kwargs` while the
  714. # top-level argument keeps its default value "auto".
  715. if dtype == "auto":
  716. dtype = None
  717. else:
  718. raise ValueError(
  719. 'You cannot use both `pipeline(... dtype=..., model_kwargs={"dtype":...})` as those'
  720. " arguments might conflict, use only one.)"
  721. )
  722. if dtype is not None:
  723. if isinstance(dtype, str) and hasattr(torch, dtype):
  724. dtype = getattr(torch, dtype)
  725. model_kwargs["dtype"] = dtype
  726. model_name = model if isinstance(model, str) else None
  727. # Load the correct model if possible
  728. if isinstance(model, str):
  729. model_classes = targeted_task["pt"]
  730. model = load_model(
  731. adapter_path if adapter_path is not None else model,
  732. model_classes=model_classes,
  733. config=config,
  734. task=task,
  735. **hub_kwargs,
  736. **model_kwargs,
  737. )
  738. hub_kwargs["_commit_hash"] = model.config._commit_hash
  739. # Check which preprocessing classes the pipeline uses
  740. # None values indicate optional classes that the pipeline can run without, we don't raise errors if loading fails
  741. load_tokenizer = pipeline_class._load_tokenizer
  742. load_feature_extractor = pipeline_class._load_feature_extractor
  743. load_image_processor = pipeline_class._load_image_processor
  744. load_processor = pipeline_class._load_processor
  745. if load_tokenizer or load_tokenizer is None:
  746. try:
  747. # Try to infer tokenizer from model or config name (if provided as str)
  748. if tokenizer is None:
  749. if isinstance(model_name, str):
  750. tokenizer = model_name
  751. elif isinstance(config, str):
  752. tokenizer = config
  753. else:
  754. # Impossible to guess what is the right tokenizer here
  755. raise Exception(
  756. "Impossible to guess which tokenizer to use. "
  757. "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
  758. )
  759. # Instantiate tokenizer if needed
  760. if isinstance(tokenizer, (str, tuple)):
  761. if isinstance(tokenizer, tuple):
  762. # For tuple we have (tokenizer name, {kwargs})
  763. use_fast = tokenizer[1].pop("use_fast", use_fast)
  764. tokenizer_identifier = tokenizer[0]
  765. tokenizer_kwargs = tokenizer[1]
  766. else:
  767. tokenizer_identifier = tokenizer
  768. tokenizer_kwargs = model_kwargs.copy()
  769. tokenizer_kwargs.pop("torch_dtype", None), tokenizer_kwargs.pop("dtype", None)
  770. tokenizer = AutoTokenizer.from_pretrained(
  771. tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
  772. )
  773. except Exception as e:
  774. if load_tokenizer:
  775. raise e
  776. else:
  777. tokenizer = None
  778. if load_image_processor or load_image_processor is None:
  779. try:
  780. # Try to infer image processor from model or config name (if provided as str)
  781. if image_processor is None:
  782. if isinstance(model_name, str):
  783. image_processor = model_name
  784. elif isinstance(config, str):
  785. image_processor = config
  786. # Backward compatibility, as `feature_extractor` used to be the name
  787. # for `ImageProcessor`.
  788. elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor):
  789. image_processor = feature_extractor
  790. else:
  791. # Impossible to guess what is the right image_processor here
  792. raise Exception(
  793. "Impossible to guess which image processor to use. "
  794. "Please provide a PreTrainedImageProcessor class or a path/identifier "
  795. "to a pretrained image processor."
  796. )
  797. # Instantiate image_processor if needed
  798. if isinstance(image_processor, (str, tuple)):
  799. image_processor = AutoImageProcessor.from_pretrained(
  800. image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
  801. )
  802. except Exception as e:
  803. if load_image_processor:
  804. raise e
  805. else:
  806. image_processor = None
  807. if load_feature_extractor or load_feature_extractor is None:
  808. try:
  809. # Try to infer feature extractor from model or config name (if provided as str)
  810. if feature_extractor is None:
  811. if isinstance(model_name, str):
  812. feature_extractor = model_name
  813. elif isinstance(config, str):
  814. feature_extractor = config
  815. else:
  816. # Impossible to guess what is the right feature_extractor here
  817. raise Exception(
  818. "Impossible to guess which feature extractor to use. "
  819. "Please provide a PreTrainedFeatureExtractor class or a path/identifier "
  820. "to a pretrained feature extractor."
  821. )
  822. # Instantiate feature_extractor if needed
  823. if isinstance(feature_extractor, (str, tuple)):
  824. feature_extractor = AutoFeatureExtractor.from_pretrained(
  825. feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs
  826. )
  827. config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
  828. pretrained_model_name_or_path or model_name,
  829. **hub_kwargs,
  830. )
  831. processor_class = config_dict.get("processor_class", None)
  832. if processor_class is not None and processor_class.endswith("WithLM") and isinstance(model_name, str):
  833. try:
  834. import kenlm # to trigger `ImportError` if not installed
  835. from pyctcdecode import BeamSearchDecoderCTC
  836. if os.path.isdir(model_name) or os.path.isfile(model_name):
  837. decoder = BeamSearchDecoderCTC.load_from_dir(model_name)
  838. else:
  839. language_model_glob = os.path.join(
  840. BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*"
  841. )
  842. alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME
  843. allow_patterns = [language_model_glob, alphabet_filename]
  844. decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns)
  845. kwargs["decoder"] = decoder
  846. except ImportError as e:
  847. logger.warning(
  848. f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}"
  849. )
  850. if not is_kenlm_available():
  851. logger.warning("Try to install `kenlm`: `pip install kenlm")
  852. if not is_pyctcdecode_available():
  853. logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
  854. except Exception as e:
  855. if load_feature_extractor:
  856. raise e
  857. else:
  858. feature_extractor = None
  859. if load_processor or load_processor is None:
  860. try:
  861. # Try to infer processor from model or config name (if provided as str)
  862. if processor is None:
  863. if isinstance(model_name, str):
  864. processor = model_name
  865. elif isinstance(config, str):
  866. processor = config
  867. else:
  868. # Impossible to guess what is the right processor here
  869. raise Exception(
  870. "Impossible to guess which processor to use. "
  871. "Please provide a processor instance or a path/identifier "
  872. "to a processor."
  873. )
  874. # Instantiate processor if needed
  875. if isinstance(processor, (str, tuple)):
  876. processor = AutoProcessor.from_pretrained(processor, _from_pipeline=task, **hub_kwargs, **model_kwargs)
  877. if not isinstance(processor, ProcessorMixin):
  878. raise TypeError(
  879. "Processor was loaded, but it is not an instance of `ProcessorMixin`. "
  880. f"Got type `{type(processor)}` instead. Please check that you specified "
  881. "correct pipeline task for the model and model has processor implemented and saved."
  882. )
  883. except Exception as e:
  884. if load_processor:
  885. raise e
  886. else:
  887. processor = None
  888. if tokenizer is not None:
  889. kwargs["tokenizer"] = tokenizer
  890. if feature_extractor is not None:
  891. kwargs["feature_extractor"] = feature_extractor
  892. if dtype is not None:
  893. kwargs["dtype"] = dtype
  894. if image_processor is not None:
  895. kwargs["image_processor"] = image_processor
  896. if device is not None:
  897. kwargs["device"] = device
  898. if processor is not None:
  899. kwargs["processor"] = processor
  900. return pipeline_class(model=model, task=task, **kwargs)