Create handler.py

d10e47a verified 11 months ago

4.52 kB

	import json
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	from typing import Dict, List, Any

	class EndpointHandler():
	def __init__(self, path=""):
	"""
	Initializes the model and tokenizer.
	Args:
	path (str): Path to the directory containing the model files.
	"""
	# Load model and tokenizer from the path provided by Inference Endpoints
	self.tokenizer = AutoTokenizer.from_pretrained(path)
	self.model = AutoModelForSequenceClassification.from_pretrained(path)

	# Determine device - Inference Endpoints will handle GPU allocation if available
	if torch.cuda.is_available():
	self.device = torch.device("cuda")
	else:
	self.device = torch.device("cpu")
	self.model.to(self.device)
	self.model.eval() # Set model to evaluation mode
	print("Model and tokenizer loaded successfully.")


	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	Processes inference requests.
	Args:
	data (Dict[str, Any]): A dictionary containing the input data.
	Expected key: "inputs" (string or list of strings).
	Optional key: "parameters" (dictionary).
	Returns:
	List[Dict[str, Any]]: A list of dictionaries, where each dictionary
	contains the prediction results for one input string
	(e.g., [{"label": "AI", "score": 0.98}]).
	"""
	# Get inputs from the payload
	inputs = data.pop("inputs", None)
	parameters = data.pop("parameters", {}) # Optional parameters

	if inputs is None:
	raise ValueError("Missing 'inputs' key in request data")

	# Preprocessing: Tokenize the input text
	# Handle both single string and list of strings inputs
	# Padding=True and truncation=True are good defaults for batch processing
	tokenized_inputs = self.tokenizer(
	inputs,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=self.tokenizer.model_max_length # Use model's max sequence length
	).to(self.device)

	# Inference: Run prediction
	with torch.no_grad(): # Disable gradient calculations for inference
	outputs = self.model(**tokenized_inputs)

	# Postprocessing: Convert logits to probabilities and get labels
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=-1)

	results = []
	# Iterate through each input in the batch
	for i in range(probabilities.shape[0]):
	scores = probabilities[i].tolist() # Get probabilities for the i-th input
	predictions = []
	for j, score in enumerate(scores):
	# Map the class index (j) to the actual label string
	label = self.model.config.id2label[j]
	predictions.append({"label": label, "score": score})

	# Sort predictions by score descending if needed, or just return all
	# predictions.sort(key=lambda x: x["score"], reverse=True)
	results.append(predictions) # Append all label scores for this input

	# If the original input was a single string, return just the first result list
	if isinstance(inputs, str):
	# However, the standard API often expects a list even for single inputs
	# So we return results which is already a list containing one list of predictions
	# Let's adjust to return a flat list of predictions if input was single string,
	# matching common pipeline output. But check what your consumer expects.
	# For now, return the list of lists structure for consistency.
	pass # Keep results as list of lists: [[{'label': '...', 'score': ...}]]

	# If you want to return ONLY the top prediction per input string:
	# top_results = []
	# for i in range(probabilities.shape[0]):
	# top_prob, top_idx = torch.max(probabilities[i], dim=0)
	# label = self.model.config.id2label[top_idx.item()]
	# score = top_prob.item()
	# top_results.append({"label": label, "score": score})
	# return top_results

	# Return all labels and scores per input
	return results