refactor: set task in lora class rather than xlm roberta

Browse files

Files changed (2) hide show

modeling_lora.py +43 -7
modeling_xlm_roberta.py +1 -24

modeling_lora.py CHANGED Viewed

@@ -1,18 +1,17 @@
 import math
 import os
 from functools import partial
-from typing import Iterator, Optional, Tuple, Union
 import torch
 import torch.nn.utils.parametrize as parametrize
 from torch import nn
 from torch.nn import Parameter
 from transformers import PretrainedConfig
-from .modeling_xlm_roberta import (
-    XLMRobertaFlashConfig,
-    XLMRobertaModel,
-)
 def initialized_weights(
@@ -231,7 +230,6 @@ class XLMRobertaLoRA(XLMRobertaModel):
         # By default, disable LoRA until it's specified which adapter/task to use
         self.current_task = None
     @property
     def main_params_trainable(self):
         return self._main_params_trainable
@@ -273,7 +271,8 @@ class XLMRobertaLoRA(XLMRobertaModel):
                 pretrained_model_name_or_path, *model_args, **kwargs
             )
         else:
-            torch.set_default_dtype(torch.float16)
             return cls(config)
     def _register_lora(self, num_adaptations, rank, dropout_p, alpha):
@@ -327,3 +326,40 @@ class XLMRobertaLoRA(XLMRobertaModel):
         ):
             if "lora" in name or self.main_params_trainable:
                 yield name, param

 import math
 import os
+import warnings
 from functools import partial
+from typing import Iterator, List, Optional, Tuple, Union
+import numpy as np
 import torch
 import torch.nn.utils.parametrize as parametrize
 from torch import nn
 from torch.nn import Parameter
 from transformers import PretrainedConfig
+from .modeling_xlm_roberta import XLMRobertaFlashConfig, XLMRobertaModel
 def initialized_weights(
         # By default, disable LoRA until it's specified which adapter/task to use
         self.current_task = None
     @property
     def main_params_trainable(self):
         return self._main_params_trainable
                 pretrained_model_name_or_path, *model_args, **kwargs
             )
         else:
+            dtype = config.torch_dtype if config.torch_dtype else torch.bfloat16
+            torch.set_default_dtype(dtype)
             return cls(config)
     def _register_lora(self, num_adaptations, rank, dropout_p, alpha):
         ):
             if "lora" in name or self.main_params_trainable:
                 yield name, param
+    @torch.inference_mode()
+    def encode(
+        self,
+        *args,
+        task: Optional[str] = None,
+        **kwargs,
+    ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
+        """
+        Computes sentence embeddings
+        task(`str`, *optional*, defaults to None):
+            Specifies the task for which the encoding is intended. This
+            controls the use of specialized LoRA adapters that are tuned for specific tasks.
+            If provided, the corresponding LoRA adapter is enabled, enhancing the model's
+            performance for that task. If `None` or not provided, LoRA is disabled, and the
+            model uses its original, general-purpose weights.
+        """
+        lora_adapter_num = None
+        if self.config.lora_adaptations:
+            if task:
+                if task in self.config.lora_adaptations:
+                    lora_adapter_num = self.config.lora_adaptations.index(task)
+                else:
+                    raise ValueError(
+                        f"Unsupported task '{task}'. "
+                        f"Supported tasks are: {', '.join(self.config.lora_adaptations)}."
+                    )
+            else:
+                warnings.warn(
+                    f"Task-specific embeddings are disabled. To enable, specify the `task` "
+                    f"argument with one of the supported tasks: {', '.join(self.config.lora_adaptations)}",
+                    category=UserWarning,
+                )
+        self.current_task = lora_adapter_num
+        return super().encode(*args, **kwargs)

modeling_xlm_roberta.py CHANGED Viewed

@@ -452,7 +452,6 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         convert_to_tensor: bool = False,
         device: Optional[torch.device] = None,
         normalize_embeddings: bool = False,
-        task: Optional[str] = None,
         **tokenizer_kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
@@ -482,12 +481,6 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
                 If set to true, returned vectors will have length 1. In that case, the
                 faster dot-product (util.dot_score) instead of cosine similarity can
                 be used.
-            task(`str`, *optional*, defaults to None):
-                Specifies the task for which the encoding is intended. This
-                controls the use of specialized LoRA adapters that are tuned for specific tasks.
-                If provided, the corresponding LoRA adapter is enabled, enhancing the model's
-                performance for that task. If `None` or not provided, LoRA is disabled, and the
-                model uses its original, general-purpose weights.
             tokenizer_kwargs(`Dict[str, Any]`, *optional*, defaults to {}):
                 Keyword arguments for the tokenizer
         Returns:
@@ -525,22 +518,6 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         if device is not None:
             self.to(device)
-        lora_adapter_num = None
-        if self.config.lora_adaptations:
-            if task:
-                if task in self.config.lora_adaptations:
-                    lora_adapter_num = self.config.lora_adaptations.index(task)
-                else:
-                    raise ValueError(
-                        f"Unsupported task '{task}'. "
-                        f"Supported tasks are: {', '.join(self.config.lora_adaptations)}.")
-            else:
-                logger.warning(
-                    f"Task-specific embeddings are disabled. To enable, specify the `task` "
-                    f"argument with one of the supported tasks: {', '.join(self.config.lora_adaptations)}"
-                )
         permutation = np.argsort([-len(i) for i in sentences])
         inverse_permutation = np.argsort(permutation)
         sentences = [sentences[idx] for idx in permutation]
@@ -570,7 +547,7 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
                 return_tensors='pt',
                 **tokenizer_kwargs,
             ).to(self.device)
-            token_embs = self.forward(**encoded_input, lora_adaptation=lora_adapter_num)[0]
             # Accumulate in fp32 to avoid overflow
             token_embs = token_embs.float()

         convert_to_tensor: bool = False,
         device: Optional[torch.device] = None,
         normalize_embeddings: bool = False,
         **tokenizer_kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
                 If set to true, returned vectors will have length 1. In that case, the
                 faster dot-product (util.dot_score) instead of cosine similarity can
                 be used.
             tokenizer_kwargs(`Dict[str, Any]`, *optional*, defaults to {}):
                 Keyword arguments for the tokenizer
         Returns:
         if device is not None:
             self.to(device)
         permutation = np.argsort([-len(i) for i in sentences])
         inverse_permutation = np.argsort(permutation)
         sentences = [sentences[idx] for idx in permutation]
                 return_tensors='pt',
                 **tokenizer_kwargs,
             ).to(self.device)
+            token_embs = self.forward(**encoded_input)[0]
             # Accumulate in fp32 to avoid overflow
             token_embs = token_embs.float()