add inference scripts

Browse files

Files changed (3) hide show

usage/inference_damo_yolo.py +287 -0
usage/inference_rtdetrv2.py +179 -0
usage/inference_yolov11.py +144 -0

usage/inference_damo_yolo.py ADDED Viewed

	@@ -0,0 +1,287 @@

+#!/usr/bin/env python3
+# Example usage:
+# python tools/inference_dir.py \
+#     --model_path path/to/model.pth \
+#     --config path/to/config.py \
+#     --image_dir path/to/image_dir \
+#     --output_json path/to/output.json \
+#     --infer_size 640 640 \
+#     --device cuda
+import argparse
+import json
+import os
+from pathlib import Path
+if 'PYTORCH_CUDA_ALLOC_CONF' in os.environ:
+    alloc_conf = os.environ['PYTORCH_CUDA_ALLOC_CONF']
+    if 'expandable_segments' in alloc_conf:
+        # Remove expandable_segments option
+        new_conf = ','.join([opt for opt in alloc_conf.split(',') if 'expandable_segments' not in opt])
+        if new_conf:
+            os.environ['PYTORCH_CUDA_ALLOC_CONF'] = new_conf
+        else:
+            os.environ.pop('PYTORCH_CUDA_ALLOC_CONF', None)
+import cv2
+import numpy as np
+import torch
+from loguru import logger
+from PIL import Image
+from tqdm import tqdm
+from damo.base_models.core.ops import RepConv
+from damo.config.base import parse_config
+from damo.detectors.detector import build_local_model
+from damo.utils import postprocess
+from damo.utils.demo_utils import transform_img
+from damo.structures.image_list import ImageList
+from damo.structures.bounding_box import BoxList
+def pad_image(img, target_size):
+    """Pad image to target size."""
+    n, c, h, w = img.shape
+    assert n == 1
+    assert h <= target_size[0] and w <= target_size[1]
+    target_size = [n, c, target_size[0], target_size[1]]
+    pad_imgs = torch.zeros(*target_size)
+    pad_imgs[:, :c, :h, :w].copy_(img)
+    img_sizes = [img.shape[-2:]]
+    pad_sizes = [pad_imgs.shape[-2:]]
+    return ImageList(pad_imgs, img_sizes, pad_sizes)
+def get_image_files(image_dir):
+    """Get all image files from directory."""
+    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif', '.webp'}
+    image_dir = Path(image_dir)
+    image_files = []
+    for ext in image_extensions:
+        image_files.extend(image_dir.glob(f'*{ext}'))
+        image_files.extend(image_dir.glob(f'*{ext.upper()}'))
+    return sorted(image_files)
+class BatchInfer:
+    def __init__(self, config, infer_size=[640, 640], device='cuda', ckpt=None):
+        """Initialize inference engine."""
+        self.ckpt_path = ckpt
+        suffix = ckpt.split('.')[-1]
+        if suffix == 'onnx':
+            self.engine_type = 'onnx'
+        elif suffix == 'trt':
+            self.engine_type = 'tensorRT'
+        elif suffix in ['pt', 'pth']:
+            self.engine_type = 'torch'
+        else:
+            raise ValueError(f'Unknown checkpoint format: {suffix}')
+        if torch.cuda.is_available() and device == 'cuda':
+            self.device = 'cuda'
+        else:
+            self.device = 'cpu'
+            logger.warning('CUDA not available, using CPU')
+        if "class_names" in config.dataset:
+            self.class_names = config.dataset.class_names
+        else:
+            self.class_names = []
+            for i in range(config.model.head.num_classes):
+                self.class_names.append(str(i))
+            self.class_names = tuple(self.class_names)
+        self.infer_size = infer_size
+        config.dataset.size_divisibility = 0
+        self.config = config
+        self.model = self._build_engine(self.config, self.engine_type)
+    def _build_engine(self, config, engine_type):
+        """Build inference engine."""
+        logger.info(f'Inference with {engine_type} engine!')
+        if engine_type == 'torch':
+            model = build_local_model(config, self.device)
+            ckpt = torch.load(self.ckpt_path, map_location=self.device)
+            model.load_state_dict(ckpt['model'], strict=True)
+            for layer in model.modules():
+                if isinstance(layer, RepConv):
+                    layer.switch_to_deploy()
+            model.eval()
+            return model
+        elif engine_type == 'tensorRT':
+            raise NotImplementedError('TensorRT inference not implemented in this script. Use demo.py instead.')
+        elif engine_type == 'onnx':
+            raise NotImplementedError('ONNX inference not implemented in this script. Use demo.py instead.')
+        else:
+            raise NotImplementedError(f'{engine_type} is not supported yet! Please use one of [onnx, torch, tensorRT]')
+    def preprocess(self, origin_img):
+        """Preprocess image for inference."""
+        img = transform_img(origin_img, 0,
+                            **self.config.test.augment.transform,
+                            infer_size=self.infer_size)
+        oh, ow, _ = origin_img.shape
+        img = pad_image(img.tensors, self.infer_size)
+        img = img.to(self.device)
+        return img, (ow, oh)
+    def forward(self, origin_image):
+        """Run inference on image."""
+        image, origin_shape = self.preprocess(origin_image)
+        with torch.no_grad():
+            output = self.model(image)
+        return output, image, origin_shape
+    def postprocess_to_coco(self, preds, image, origin_shape):
+        """Postprocess predictions to COCO format."""
+        output = preds[0]
+        output = output.resize(origin_shape)
+        output = output.convert('xywh')  # Convert to xywh format for COCO
+        # Handle empty predictions
+        if len(output) == 0:
+            return []
+        bboxes = output.bbox.cpu().detach().numpy()
+        scores = output.get_field('scores').cpu().detach().numpy()
+        labels = output.get_field('labels').cpu().detach().numpy()
+        # Model outputs 0-indexed labels (0 to num_classes-1)
+        # COCO category_id is 1-indexed (1 to num_classes)
+        category_ids = labels + 1
+        coco_results = []
+        for k in range(len(bboxes)):
+            coco_results.append({
+                'image_id': None,  # Will be set later
+                'category_id': int(category_ids[k]),
+                'bbox': bboxes[k].tolist(),  # [x, y, width, height]
+                'score': float(scores[k]),
+            })
+        return coco_results
+def main():
+    parser = argparse.ArgumentParser('DAMO-YOLO Directory Inference')
+    parser.add_argument(
+        '--model_path',
+        required=True,
+        type=str,
+        help='Path to model checkpoint (.pth, .pt)'
+    )
+    parser.add_argument(
+        '--config',
+        required=True,
+        type=str,
+        help='Path to config file'
+    )
+    parser.add_argument(
+        '--image_dir',
+        required=True,
+        type=str,
+        help='Path to directory containing images'
+    )
+    parser.add_argument(
+        '--output_json',
+        required=True,
+        type=str,
+        help='Path to output JSON file (COCO format)'
+    )
+    parser.add_argument(
+        '--infer_size',
+        nargs='+',
+        type=int,
+        default=[640, 640],
+        help='Inference image size [height width]'
+    )
+    parser.add_argument(
+        '--device',
+        default='cuda',
+        type=str,
+        help='Device for inference (cuda or cpu)'
+    )
+    parser.add_argument(
+        '--conf_threshold',
+        default=None,
+        type=float,
+        help='Confidence threshold (uses config default if not specified)'
+    )
+    args = parser.parse_args()
+    # Parse config
+    config = parse_config(args.config)
+    # Override confidence threshold if provided
+    if args.conf_threshold is not None:
+        config.model.head.nms_conf_thre = args.conf_threshold
+    # Parse inference size
+    if len(args.infer_size) == 1:
+        infer_size = [args.infer_size[0], args.infer_size[0]]
+    elif len(args.infer_size) == 2:
+        infer_size = args.infer_size
+    else:
+        raise ValueError('infer_size should be 1 or 2 values')
+    # Initialize inference engine
+    logger.info(f'Loading model from {args.model_path}')
+    infer_engine = BatchInfer(
+        config,
+        infer_size=infer_size,
+        device=args.device,
+        ckpt=args.model_path
+    )
+    # Get all image files
+    image_files = get_image_files(args.image_dir)
+    if len(image_files) == 0:
+        logger.error(f'No image files found in {args.image_dir}')
+        return
+    logger.info(f'Found {len(image_files)} images')
+    # Process images
+    all_results = []
+    for img_id, image_path in enumerate(tqdm(image_files, desc='Processing images')):
+        # Load image
+        origin_img = cv2.imread(str(image_path))
+        if origin_img is None:
+            logger.warning(f'Failed to load image: {image_path}')
+            continue
+        origin_img = cv2.cvtColor(origin_img, cv2.COLOR_BGR2RGB)
+        # Run inference
+        preds, image, origin_shape = infer_engine.forward(origin_img)
+        # Postprocess to COCO format
+        coco_results = infer_engine.postprocess_to_coco(preds, image, origin_shape)
+        # Use image filename (without extension) as image_id
+        image_id = image_path.stem
+        for result in coco_results:
+            result['image_id'] = image_id
+            all_results.append(result)
+    # Save results
+    output_dir = Path(args.output_json).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    with open(args.output_json, 'w') as f:
+        json.dump(all_results, f, indent=2)
+    logger.info(f'Saved {len(all_results)} detections to {args.output_json}')
+    logger.info(f'Processed {len(image_files)} images')
+if __name__ == '__main__':
+    main()

usage/inference_rtdetrv2.py ADDED Viewed

	@@ -0,0 +1,179 @@

+"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+# Example usage:
+# python references/deploy/rtdetrv2_torch.py \
+#   -c path/to/model_config.yml \
+#   -r path/to/model.pth \
+#   --im-dir path/to/images_dir \
+#   -d cuda \
+#   -o path/to/output.json
+import torch
+import torch.nn as nn
+import torchvision.transforms as T
+import numpy as np
+import json
+import os
+from pathlib import Path
+from PIL import Image, ImageDraw
+import sys
+# Ensure repository root is on sys.path so `src` package can be imported
+REPO_ROOT = str(Path(__file__).resolve().parents[2])
+if REPO_ROOT not in sys.path:
+    sys.path.insert(0, REPO_ROOT)
+from src.core import YAMLConfig
+def save_coco_format(results, output_file='detections.json'):
+    """Save detection results in COCO format
+    Args:
+        results: List of detection dictionaries
+        output_file: Path to save JSON file
+    """
+    with open(output_file, 'w') as f:
+        json.dump(results, f, indent=2)
+    print(f'Saved COCO format results to {output_file}')
+def get_image_files(path):
+    """Get all image files from a path (file or directory)
+    Args:
+        path: Path to image file or directory
+    Returns:
+        List of image file paths
+    """
+    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
+    path = Path(path)
+    if path.is_file():
+        return [path]
+    elif path.is_dir():
+        image_files = []
+        for ext in image_extensions:
+            image_files.extend(path.glob(f'*{ext}'))
+            image_files.extend(path.glob(f'*{ext.upper()}'))
+        return sorted(image_files)
+    else:
+        raise ValueError(f"Path {path} is neither a file nor a directory")
+def main(args, ):
+    """main
+    """
+    cfg = YAMLConfig(args.config, resume=args.resume)
+    if args.resume:
+        checkpoint = torch.load(args.resume, map_location='cpu')
+        if 'ema' in checkpoint:
+            state = checkpoint['ema']['module']
+        else:
+            state = checkpoint['model']
+    else:
+        raise AttributeError('Only support resume to load model.state_dict by now.')
+    # NOTE load train mode state -> convert to deploy mode
+    cfg.model.load_state_dict(state)
+    class Model(nn.Module):
+        def __init__(self, ) -> None:
+            super().__init__()
+            self.model = cfg.model.deploy()
+            self.postprocessor = cfg.postprocessor.deploy()
+        def forward(self, images, orig_target_sizes):
+            outputs = self.model(images)
+            outputs = self.postprocessor(outputs, orig_target_sizes)
+            return outputs
+    model = Model().to(args.device)
+    model.eval()  # Ensure model is in eval mode
+    # Get image files from either single file or directory
+    if args.im_dir:
+        image_files = get_image_files(args.im_dir)
+    elif args.im_file:
+        image_files = get_image_files(args.im_file)
+    else:
+        raise ValueError("Either --im-file or --im-dir must be provided")
+    print(f'Processing {len(image_files)} image(s)...')
+    # Prepare transforms
+    transforms = T.Compose([
+        T.Resize((640, 640)),
+        T.ToTensor(),
+    ])
+    # Store results for COCO format
+    coco_results = []
+    # Process each image with memory-efficient approach
+    with torch.no_grad():  # Disable gradient computation to save memory
+        for idx, image_path in enumerate(image_files):
+            image_name = image_path.name
+            print(f'Processing {image_name} ({idx+1}/{len(image_files)})...')
+            # Load and prepare image
+            im_pil = Image.open(image_path).convert('RGB')
+            w, h = im_pil.size
+            orig_size = torch.tensor([w, h], dtype=torch.int64)[None].to(args.device)
+            # Transform and run inference
+            im_data = transforms(im_pil)[None].to(args.device)
+            output = model(im_data, orig_size)
+            labels, boxes, scores = output
+            # Move to CPU immediately to free GPU memory
+            labels_cpu = labels[0].cpu()
+            boxes_cpu = boxes[0].cpu()
+            scores_cpu = scores[0].cpu()
+            # Delete GPU tensors immediately
+            del im_data, orig_size, output, labels, boxes, scores
+            if args.device != 'cpu':
+                torch.cuda.empty_cache()  # Clear CUDA cache after each image
+            # Convert to COCO format
+            for label, box, score in zip(labels_cpu, boxes_cpu, scores_cpu):
+                # bbox format in COCO: [x, y, width, height]
+                x1, y1, x2, y2 = box.tolist()
+                bbox = [x1, y1, x2 - x1, y2 - y1]
+                coco_result = {
+                    "image_id": image_name,
+                    "category_id": int(label.item()),
+                    "bbox": bbox,
+                    "score": float(score.item())
+                }
+                coco_results.append(coco_result)
+            # Delete CPU tensors (they're already converted to Python objects)
+            del labels_cpu, boxes_cpu, scores_cpu
+            # Periodically clear cache for large batches
+            if (idx + 1) % 50 == 0 and args.device != 'cpu':
+                torch.cuda.empty_cache()
+                print(f'  Cleared GPU cache after {idx+1} images')
+    # Save COCO format JSON
+    save_coco_format(coco_results, args.output_json)
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(description='RT-DETR PyTorch Inference')
+    parser.add_argument('-c', '--config', type=str, required=True, help='Path to config file')
+    parser.add_argument('-r', '--resume', type=str, required=True, help='Path to checkpoint file')
+    parser.add_argument('-f', '--im-file', type=str, default=None, help='Path to single image file')
+    parser.add_argument('--im-dir', type=str, default=None, help='Path to directory containing images')
+    parser.add_argument('-d', '--device', type=str, default='cpu', help='Device to run inference on (cpu/cuda)')
+    parser.add_argument('-o', '--output-json', type=str, default='detections.json', help='Path to save COCO format JSON')
+    parser.add_argument('--output-dir', type=str, default='results', help='Directory to save visualization images')
+    args = parser.parse_args()
+    main(args)

usage/inference_yolov11.py ADDED Viewed

	@@ -0,0 +1,144 @@

+#!/usr/bin/env python3
+"""
+Example usage:
+python inference.py \
+--model_path path/to/model.pth \
+--image_dir path/to/image_dir \
+--output_json path/to/output.json \
+"""
+import argparse
+import json
+import os
+from pathlib import Path
+from ultralytics import YOLO
+def convert_bbox_to_coco_format(bbox):
+    """
+    Convert YOLO bbox format [x_min, y_min, x_max, y_max] to COCO format [x_min, y_min, width, height].
+    Args:
+        bbox: List or tensor [x_min, y_min, x_max, y_max]
+    Returns:
+        List [x_min, y_min, width, height]
+    """
+    x_min, y_min, x_max, y_max = bbox[:4]
+    width = x_max - x_min
+    height = y_max - y_min
+    return [float(x_min), float(y_min), float(width), float(height)]
+def run_inference(model_path, image_dir, output_json_path):
+    """
+    Run YOLOv11 inference on images in a directory and save results in COCO format.
+    Args:
+        model_path: Path to the YOLOv11 model file (.pt)
+        image_dir: Directory containing images to process
+        output_json_path: Path where output JSON will be saved
+    """
+    # Load the model
+    print(f"Loading model from {model_path}...")
+    model = YOLO(model_path)
+    # Get all image files from the directory
+    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
+    image_dir_path = Path(image_dir)
+    image_files = [
+        f for f in image_dir_path.iterdir()
+        if f.suffix.lower() in image_extensions
+    ]
+    image_files.sort()  # Sort for consistent ordering
+    if not image_files:
+        raise ValueError(f"No image files found in {image_dir}")
+    print(f"Found {len(image_files)} images to process...")
+    # Run inference
+    coco_results = []
+    image_id_map = {}  # Map filename to image_id
+    for idx, image_file in enumerate(image_files):
+        image_id = image_file.stem  # Use filename without extension as image_id
+        image_id_map[str(image_file)] = image_id
+        # Run inference on the image (use GPU if available)
+        results = model(str(image_file), device='cuda', verbose=False)
+        # Process results for this image
+        result = results[0]  # Get first (and only) result
+        if result.boxes is not None and len(result.boxes) > 0:
+            boxes = result.boxes
+            for i in range(len(boxes)):
+                # Get box coordinates, class, and confidence
+                box = boxes.xyxy[i].cpu().numpy()  # [x_min, y_min, x_max, y_max]
+                cls = int(boxes.cls[i].cpu().numpy())  # class_id
+                conf = float(boxes.conf[i].cpu().numpy())  # confidence score
+                # Convert to COCO bbox format
+                coco_bbox = convert_bbox_to_coco_format(box)
+                # Add to results
+                coco_results.append({
+                    "image_id": image_id,
+                    "category_id": cls,
+                    "bbox": coco_bbox,
+                    "score": conf
+                })
+        else:
+            # No detections for this image
+            print(f"No detections in {image_file.name}")
+    # Save results to JSON file
+    output_path = Path(output_json_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, 'w') as f:
+        json.dump(coco_results, f, indent=2)
+    print(f"\nInference complete!")
+    print(f"Total images processed: {len(image_files)}")
+    print(f"Total detections: {len(coco_results)}")
+    print(f"Results saved to: {output_json_path}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run YOLOv11 inference on images and output results in COCO format"
+    )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        required=True,
+        help="Path to the YOLOv11 model file (.pt)"
+    )
+    parser.add_argument(
+        "--image_dir",
+        type=str,
+        required=True,
+        help="Directory containing images to process"
+    )
+    parser.add_argument(
+        "--output_json_path",
+        type=str,
+        required=True,
+        help="Path where output JSON file will be saved"
+    )
+    args = parser.parse_args()
+    # Validate inputs
+    if not os.path.exists(args.model_path):
+        raise FileNotFoundError(f"Model file not found: {args.model_path}")
+    if not os.path.isdir(args.image_dir):
+        raise NotADirectoryError(f"Image directory not found: {args.image_dir}")
+    run_inference(args.model_path, args.image_dir, args.output_json_path)