Commit
·
1752c7c
1
Parent(s):
6e55444
docs: add comments
Browse filesSigned-off-by: jupyterjazz <[email protected]>
- modeling_lora.py +16 -4
modeling_lora.py
CHANGED
|
@@ -162,6 +162,16 @@ class LoRAParametrization(nn.Module):
|
|
| 162 |
dropout_p: float,
|
| 163 |
alpha: float,
|
| 164 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
if isinstance(layer, nn.Linear):
|
| 166 |
parametrize.register_parametrization(
|
| 167 |
layer,
|
|
@@ -312,11 +322,11 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
| 312 |
config = XLMRobertaFlashConfig.from_pretrained(
|
| 313 |
pretrained_model_name_or_path, *model_args, **kwargs
|
| 314 |
)
|
| 315 |
-
if config.load_trained_adapters:
|
| 316 |
return super().from_pretrained(
|
| 317 |
pretrained_model_name_or_path, *model_args, **kwargs
|
| 318 |
)
|
| 319 |
-
else:
|
| 320 |
roberta = XLMRobertaModel.from_pretrained(
|
| 321 |
pretrained_model_name_or_path, *model_args, **kwargs
|
| 322 |
)
|
|
@@ -358,10 +368,12 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
| 358 |
**kwargs,
|
| 359 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
| 360 |
"""
|
| 361 |
-
Computes sentence embeddings
|
| 362 |
|
|
|
|
|
|
|
| 363 |
task_type(`str`, *optional*, defaults to `None`):
|
| 364 |
-
Specifies the task for which the encoding is intended. If `task_type` is not
|
| 365 |
all LoRA adapters are disabled, and the model reverts to its original,
|
| 366 |
general-purpose weights.
|
| 367 |
"""
|
|
|
|
| 162 |
dropout_p: float,
|
| 163 |
alpha: float,
|
| 164 |
):
|
| 165 |
+
"""
|
| 166 |
+
Registering LoRA adapters to all embedding and linear layers.
|
| 167 |
+
|
| 168 |
+
Additionally, we implement a custom forward function for LoRA parametrization.
|
| 169 |
+
This function modifies the layer's forward pass to optionally use task-specific
|
| 170 |
+
parameters. When a `task_id` is provided, it employs a LoRA parametrization
|
| 171 |
+
to modify the original weights according to the specific task. This allows
|
| 172 |
+
the layer to adapt dynamically to different tasks at runtime. If no `task_id`
|
| 173 |
+
is specified, the layer uses its original weights.
|
| 174 |
+
"""
|
| 175 |
if isinstance(layer, nn.Linear):
|
| 176 |
parametrize.register_parametrization(
|
| 177 |
layer,
|
|
|
|
| 322 |
config = XLMRobertaFlashConfig.from_pretrained(
|
| 323 |
pretrained_model_name_or_path, *model_args, **kwargs
|
| 324 |
)
|
| 325 |
+
if config.load_trained_adapters: # checkpoint already contains LoRA adapters
|
| 326 |
return super().from_pretrained(
|
| 327 |
pretrained_model_name_or_path, *model_args, **kwargs
|
| 328 |
)
|
| 329 |
+
else: # initializing new adapters
|
| 330 |
roberta = XLMRobertaModel.from_pretrained(
|
| 331 |
pretrained_model_name_or_path, *model_args, **kwargs
|
| 332 |
)
|
|
|
|
| 368 |
**kwargs,
|
| 369 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
| 370 |
"""
|
| 371 |
+
Computes sentence embeddings.
|
| 372 |
|
| 373 |
+
sentences(`str` or `List[str]`):
|
| 374 |
+
Sentence or sentences to be encoded
|
| 375 |
task_type(`str`, *optional*, defaults to `None`):
|
| 376 |
+
Specifies the task for which the encoding is intended. If `task_type` is not provided,
|
| 377 |
all LoRA adapters are disabled, and the model reverts to its original,
|
| 378 |
general-purpose weights.
|
| 379 |
"""
|