File size: 5,011 Bytes
f368b06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import cv2
import numpy as np
import onnxruntime as ort
# --- CONFIGURATION ---
MODEL_PATH = "meiki.text.detect.tiny.v0.onnx"
INPUT_IMAGE_PATH = "input.jpg"
OUTPUT_IMAGE_PATH = "output.tiny.jpg"
# The model expects a 320x320 input image.
MODEL_SIZE = 320
def resize_and_pad(image: np.ndarray, size: int):
"""
Resizes a GRAYSCALE image to the model's expected size,
maintaining aspect ratio and padding.
Returns:
- The padded image ready for the model.
- The ratio used to resize the image.
- The padding amounts (width, height).
"""
# Get the original image dimensions.
original_height, original_width = image.shape
# Calculate the ratio to resize the image.
ratio = min(size / original_width, size / original_height)
new_width = int(original_width * ratio)
new_height = int(original_height * ratio)
# Resize the image using the calculated ratio.
resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
# Create a new square image (320x320) filled with zeros (black).
padded_image = np.zeros((size, size), dtype=np.uint8)
# Calculate padding to center the resized image.
pad_w = (size - new_width) // 2
pad_h = (size - new_height) // 2
# Paste the resized image onto the center of the black square.
padded_image[pad_h:pad_h + new_height, pad_w:pad_w + new_width] = resized_image
return padded_image, ratio, pad_w, pad_h
def main():
"""
Main function to run the inference process.
"""
# --- 1. Load the Model ---
try:
# Create an inference session with the ONNX model.
# We use the CPUExecutionProvider for broad compatibility.
session = ort.InferenceSession(MODEL_PATH, providers=['CPUExecutionProvider'])
print(f"Successfully loaded model: {MODEL_PATH}")
except Exception as e:
print(f"Error: Failed to load the ONNX model. Make sure '{MODEL_PATH}' exists.")
print(f"Details: {e}")
return
# --- 2. Load and Pre-process the Input Image ---
try:
# Read the input image from the file.
original_image = cv2.imread(INPUT_IMAGE_PATH)
if original_image is None:
raise FileNotFoundError(f"Image not found at '{INPUT_IMAGE_PATH}'")
print(f"Successfully loaded image: {INPUT_IMAGE_PATH}")
except Exception as e:
print(f"Error: {e}")
return
# The model requires a grayscale image.
img_gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
# Resize and pad the image to fit the model's 320x320 input size.
padded_image, ratio, pad_w, pad_h = resize_and_pad(img_gray, MODEL_SIZE)
# Normalize the image data to be between 0 and 1.
img_normalized = padded_image.astype(np.float32) / 255.0
# Add batch and channel dimensions to match the model's expected input shape: (1, 1, 320, 320).
image_input_tensor = np.expand_dims(np.expand_dims(img_normalized, axis=0), axis=0)
# --- 3. Run Inference ---
# The model requires a second input specifying the image size. We provide the padded size.
sizes_input_tensor = np.array([[MODEL_SIZE, MODEL_SIZE]], dtype=np.int64)
# Get the names of the model's input nodes.
input_names = [inp.name for inp in session.get_inputs()]
# Prepare the dictionary of inputs for the model.
inputs = {
input_names[0]: image_input_tensor,
input_names[1]: sizes_input_tensor
}
# Run the model. The output will be the detected bounding boxes.
outputs = session.run(None, inputs)
boxes_from_model = outputs[0]
print(f"Found {len(boxes_from_model)} potential text boxes.")
# --- 4. Post-process and Draw Bounding Boxes ---
# The coordinates from the model are relative to the 320x320 padded image.
# We need to scale them back to the original image's coordinate space.
output_image = original_image.copy()
for box in boxes_from_model:
x_min, y_min, x_max, y_max = box
# Step 1: Subtract the padding that was added.
x_min_unpadded = x_min - pad_w
y_min_unpadded = y_min - pad_h
x_max_unpadded = x_max - pad_w
y_max_unpadded = y_max - pad_h
# Step 2: Scale the coordinates back up to the original image size by dividing by the ratio.
final_x_min = int(x_min_unpadded / ratio)
final_y_min = int(y_min_unpadded / ratio)
final_x_max = int(x_max_unpadded / ratio)
final_y_max = int(y_max_unpadded / ratio)
# Draw a red rectangle on the output image.
cv2.rectangle(output_image, (final_x_min, final_y_min), (final_x_max, final_y_max), (0, 0, 255), 2)
# --- 5. Save the Final Image ---
cv2.imwrite(OUTPUT_IMAGE_PATH, output_image)
print(f"Successfully saved result to: {OUTPUT_IMAGE_PATH}")
if __name__ == "__main__":
main()
|