|
|
import cv2
|
|
|
import numpy as np
|
|
|
import onnxruntime as ort
|
|
|
|
|
|
|
|
|
MODEL_PATH = "meiki.text.detect.tiny.v0.onnx"
|
|
|
INPUT_IMAGE_PATH = "input.jpg"
|
|
|
OUTPUT_IMAGE_PATH = "output.tiny.jpg"
|
|
|
|
|
|
|
|
|
MODEL_SIZE = 320
|
|
|
|
|
|
def resize_and_pad(image: np.ndarray, size: int):
|
|
|
"""
|
|
|
Resizes a GRAYSCALE image to the model's expected size,
|
|
|
maintaining aspect ratio and padding.
|
|
|
|
|
|
Returns:
|
|
|
- The padded image ready for the model.
|
|
|
- The ratio used to resize the image.
|
|
|
- The padding amounts (width, height).
|
|
|
"""
|
|
|
|
|
|
original_height, original_width = image.shape
|
|
|
|
|
|
|
|
|
ratio = min(size / original_width, size / original_height)
|
|
|
new_width = int(original_width * ratio)
|
|
|
new_height = int(original_height * ratio)
|
|
|
|
|
|
|
|
|
resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
|
|
|
|
|
|
|
|
|
padded_image = np.zeros((size, size), dtype=np.uint8)
|
|
|
|
|
|
|
|
|
pad_w = (size - new_width) // 2
|
|
|
pad_h = (size - new_height) // 2
|
|
|
|
|
|
|
|
|
padded_image[pad_h:pad_h + new_height, pad_w:pad_w + new_width] = resized_image
|
|
|
|
|
|
return padded_image, ratio, pad_w, pad_h
|
|
|
|
|
|
def main():
|
|
|
"""
|
|
|
Main function to run the inference process.
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
|
session = ort.InferenceSession(MODEL_PATH, providers=['CPUExecutionProvider'])
|
|
|
print(f"Successfully loaded model: {MODEL_PATH}")
|
|
|
except Exception as e:
|
|
|
print(f"Error: Failed to load the ONNX model. Make sure '{MODEL_PATH}' exists.")
|
|
|
print(f"Details: {e}")
|
|
|
return
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
original_image = cv2.imread(INPUT_IMAGE_PATH)
|
|
|
if original_image is None:
|
|
|
raise FileNotFoundError(f"Image not found at '{INPUT_IMAGE_PATH}'")
|
|
|
print(f"Successfully loaded image: {INPUT_IMAGE_PATH}")
|
|
|
except Exception as e:
|
|
|
print(f"Error: {e}")
|
|
|
return
|
|
|
|
|
|
|
|
|
img_gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
|
|
|
|
|
|
|
|
|
padded_image, ratio, pad_w, pad_h = resize_and_pad(img_gray, MODEL_SIZE)
|
|
|
|
|
|
|
|
|
img_normalized = padded_image.astype(np.float32) / 255.0
|
|
|
|
|
|
|
|
|
image_input_tensor = np.expand_dims(np.expand_dims(img_normalized, axis=0), axis=0)
|
|
|
|
|
|
|
|
|
|
|
|
sizes_input_tensor = np.array([[MODEL_SIZE, MODEL_SIZE]], dtype=np.int64)
|
|
|
|
|
|
|
|
|
input_names = [inp.name for inp in session.get_inputs()]
|
|
|
|
|
|
|
|
|
inputs = {
|
|
|
input_names[0]: image_input_tensor,
|
|
|
input_names[1]: sizes_input_tensor
|
|
|
}
|
|
|
|
|
|
|
|
|
outputs = session.run(None, inputs)
|
|
|
boxes_from_model = outputs[0]
|
|
|
|
|
|
print(f"Found {len(boxes_from_model)} potential text boxes.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output_image = original_image.copy()
|
|
|
for box in boxes_from_model:
|
|
|
x_min, y_min, x_max, y_max = box
|
|
|
|
|
|
|
|
|
x_min_unpadded = x_min - pad_w
|
|
|
y_min_unpadded = y_min - pad_h
|
|
|
x_max_unpadded = x_max - pad_w
|
|
|
y_max_unpadded = y_max - pad_h
|
|
|
|
|
|
|
|
|
final_x_min = int(x_min_unpadded / ratio)
|
|
|
final_y_min = int(y_min_unpadded / ratio)
|
|
|
final_x_max = int(x_max_unpadded / ratio)
|
|
|
final_y_max = int(y_max_unpadded / ratio)
|
|
|
|
|
|
|
|
|
cv2.rectangle(output_image, (final_x_min, final_y_min), (final_x_max, final_y_max), (0, 0, 255), 2)
|
|
|
|
|
|
|
|
|
cv2.imwrite(OUTPUT_IMAGE_PATH, output_image)
|
|
|
print(f"Successfully saved result to: {OUTPUT_IMAGE_PATH}")
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
|