Camshowrecordings/model/sam_samantha/5 Updated <5000+ Recommended>
# ------------------------------------------------------------------ # 4️⃣ Pre‑process a single frame (example uses OpenCV) # ------------------------------------------------------------------ def preprocess(img: np.ndarray, cfg) -> torch.Tensor: # Resize while keeping aspect ratio (optional) target_sz = cfg["model"]["image_size"] img_resized = cv2.resize(img, (target_sz, target_sz))
model: name: sam_samantha version: 5 backbone: vit_h image_size: 1024 num_classes: 1 # Usually segmentation → binary mask preprocess: normalize: true mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] device: cuda Below is a minimal, self‑contained script that loads the model and runs a single inference on a video frame.
# ------------------------------------------------------------------ # 3️⃣ Load checkpoint # ------------------------------------------------------------------ ckpt_path = Path(__file__).parent / "model" / "sam_samantha" / "5" / "model.ckpt" model = build_model(cfg) state_dict = torch.load(ckpt_path, map_location="cpu") model.load_state_dict(state_dict) model.eval() camshowrecordings/model/sam_samantha/5
img = cv2.imread(args.image_path) if img is None: raise FileNotFoundError(f"Cannot read args.image_path")
| File | Description | |------|-------------| | config.yaml | Human‑readable config (input size, number of classes, preprocessing steps). | | model.ckpt | Serialized weights (PyTorch format). | | tokenizer/ | Byte‑pair‑encoding files if the model uses textual prompts. | | README.md | Often contains version‑specific notes, e.g., known bugs or recommended hardware. | | | tokenizer/ | Byte‑pair‑encoding files if the
device = torch.device(cfg.get("device", "cpu")) model.to(device)
mask = infer(img)
def process_video(in_path: Path, out_path: Path, stride: int = 5): cap = cv2.VideoCapture(str(in_path)) if not cap.isOpened(): raise RuntimeError(f"Cannot open in_path")