courtvision.data

`AnnotationDataPath`

Bases: BaseModel

Tracks the location of the data for a single data item

Source code in courtvision/data.py

class AnnotationDataPath(BaseModel):
    """Tracks the location of the data for a single data item"""

    video_url: Optional[Path] = None
    video_local_path: Optional[Path] = None
    image_local_path: Optional[Path] = None
    image: Optional[Path] = Field(None, validation_alias=AliasChoices("img", "image"))
    # TODO: #1 use aliases to have a single source of truth file locations
    #       both locally and on reomote - eg: s3

    class Config:
        allow_population_by_field_name = True

`CameraInfo` `dataclass`

Camera calibration information

Source code in courtvision/data.py

@dataclass
class CameraInfo:
    """Camera calibration information"""

    valid_for_clip_ids: set[str]
    camera_matrix: np.array
    distortion_coefficients: np.array
    rotation_vector: np.array
    translation_vector: np.array
    image_width: int
    image_height: int
    error_in_reprojected_planar_points: float
    error_in_reprojected_points: float

    def world_space_to_camera_space(self) -> torch.Tensor:
        rotation_matrix, _ = cv2.Rodrigues(self.rotation_vector)
        return torch.tensor(
            np.vstack(
                [
                    np.hstack((rotation_matrix, self.translation_vector)),
                    np.array([0, 0, 0, 1]),
                ]
            )
        )

    def save(self, file_name: Path):
        """Saves the camera calibration information to a file

        Args:
            file_name (Path): The file to save the camera calibration information to.
        """
        np.savez(
            file_name,
            camera_matrix=self.camera_matrix,
            distortion_coefficients=self.distortion_coefficients,
            rotation_vector=self.rotation_vector,
            translation_vector=self.translation_vector,
            image_width=self.image_width,
            image_height=self.image_height,
            error_in_reprojected_planar_points=self.error_in_reprojected_planar_points,
            error_in_reprojected_points=self.error_in_reprojected_points,
            valid_for_clip_ids=self.valid_for_clip_ids,
        )

    @staticmethod
    def load(file_name: str) -> Self:
        """Loads the camera calibration information from a file.

        Args:
            file_name (str): Full path to .npz file

        Returns:
            CameraInfo: Camera calibration information
        """
        data = np.load(file_name, allow_pickle=True)
        return CameraInfo(
            camera_matrix=data["camera_matrix"],
            distortion_coefficients=data["distortion_coefficients"],
            rotation_vector=data["rotation_vector"],
            translation_vector=data["translation_vector"],
            image_width=data["image_width"],
            image_height=data["image_height"],
            error_in_reprojected_planar_points=data[
                "error_in_reprojected_planar_points"
            ],
            error_in_reprojected_points=data["error_in_reprojected_points"],
            valid_for_clip_ids=data["valid_for_clip_ids"].tolist(),
        )

`load(file_name)` `staticmethod`

Loads the camera calibration information from a file.

Parameters:

Name	Type	Description	Default
`file_name`	`str`	Full path to .npz file	required

Returns:

Name	Type	Description
`CameraInfo`	`Self`	Camera calibration information

Source code in courtvision/data.py

@staticmethod
def load(file_name: str) -> Self:
    """Loads the camera calibration information from a file.

    Args:
        file_name (str): Full path to .npz file

    Returns:
        CameraInfo: Camera calibration information
    """
    data = np.load(file_name, allow_pickle=True)
    return CameraInfo(
        camera_matrix=data["camera_matrix"],
        distortion_coefficients=data["distortion_coefficients"],
        rotation_vector=data["rotation_vector"],
        translation_vector=data["translation_vector"],
        image_width=data["image_width"],
        image_height=data["image_height"],
        error_in_reprojected_planar_points=data[
            "error_in_reprojected_planar_points"
        ],
        error_in_reprojected_points=data["error_in_reprojected_points"],
        valid_for_clip_ids=data["valid_for_clip_ids"].tolist(),
    )

`save(file_name)`

Saves the camera calibration information to a file

Parameters:

Name	Type	Description	Default
`file_name`	`Path`	The file to save the camera calibration information to.	required

Source code in courtvision/data.py

def save(self, file_name: Path):
    """Saves the camera calibration information to a file

    Args:
        file_name (Path): The file to save the camera calibration information to.
    """
    np.savez(
        file_name,
        camera_matrix=self.camera_matrix,
        distortion_coefficients=self.distortion_coefficients,
        rotation_vector=self.rotation_vector,
        translation_vector=self.translation_vector,
        image_width=self.image_width,
        image_height=self.image_height,
        error_in_reprojected_planar_points=self.error_in_reprojected_planar_points,
        error_in_reprojected_points=self.error_in_reprojected_points,
        valid_for_clip_ids=self.valid_for_clip_ids,
    )

`CourtVisionArtifacts` `dataclass`

Tracks the artifacts used in the pipeline

Source code in courtvision/data.py

@dataclass
class CourtVisionArtifacts:
    """Tracks the artifacts used in the pipeline"""

    local_cache_path: Path
    dataset: PadelDataset
    ball_detector: BallDetector
    ball_tracker: ParticleFilter
    player_detector: PlayerDetector

    court_layout: PadelCourt
    camera_info_path: Path
    _camera_info: CameraInfo = field(init=False, default=None)

    @property
    def camera_info(self):
        if self._camera_info is None and self.camera_info_path.exists():

            self._camera_info = CameraInfo.load(
                self.local_cache_path / "camera_info.npz"
            )

        return self._camera_info

    @camera_info.setter
    def camera_info(self, value):
        self._camera_info = value

    class Config:
        arbitrary_types_allowed = True

`CourtVisionBallDataset`

Bases: VisionDataset

Source code in courtvision/data.py

class CourtVisionBallDataset(VisionDataset):
    def __init__(
        self,
        dataset: PadelDataset,
        root: str,
        download: bool = True,
        show: Callable | None = None,
        load_from_disk: Callable[[Path], torch.Tensor] | None = None,
        transforms: Callable | None = None,
        transform: Callable | None = None,
        target_transform: Callable | None = None,
    ):
        super().__init__(root, transforms, transform, target_transform)
        # self.root = root  # TODO: See what base class does and if we can use it
        self.dataset = dataset
        from rich.progress import track

        if download:
            for sample in track(dataset.samples, description=f"Downloading data"):
                sample.data.image_local_path = download_data_item(
                    s3_uri=sample.data.image,
                    local_path=self.dataset.local_data_dir
                    / sample.data.image.parent.name
                    / sample.data.image.name,
                )

    def __len__(self):
        return len(self.dataset.samples)

    def __getitem__(self, idx) -> tuple[CourtAnnotatedSample, torch.Tensor]:
        from courtvision.vis import load_timg

        # TODO: Data module should have IO functions injected into it
        sample = self.dataset.samples[idx]
        image = load_timg(sample.data.image_local_path)
        return (
            image,
            sample,
        )

    @staticmethod
    def collate_fn(batch):
        """Collate function for the dataloader"""
        images, samples = zip(*batch)

        targets = [
            {
                "boxes": annotations_to_bbox(sample.annotations),
                "labels": annotations_to_label(sample.annotations),
                "file_location": sample.data.image_local_path.as_posix(),
            }
            for sample in samples
        ]

        return [o.squeeze(0) for o in images], targets

    @staticmethod
    def find_image_path(root: Path | str, sample: CourtAnnotatedSample):
        """Finds the image path from a sample"""
        server_file_path = Path(*sample.data.image.parts[2:])  # remove /data/
        filename = Path(f"{root}/{server_file_path}")
        return filename

    @staticmethod
    def show_sample(annotation: list[Annotation], image: torch.Tensor):
        """Plots an image and its annotation"""
        # TODO: Data module should have vis functions injected into it
        from courtvision.vis import draw_rect

        def draw_annotaion(annotation: Annotation, image: torch.Tensor):
            bboxes = [
                r.value for r in annotation.result if isinstance(r.value, RectValue)
            ]

            original_sizes = [
                (r.original_width, r.original_height)
                for r in annotation.result
                if isinstance(r.value, RectValue)
            ]
            if bboxes:
                rects = torch.stack(
                    [
                        torch.tensor(
                            [
                                (bbox.x / 100.0) * w_h[0],
                                (bbox.y / 100.0) * w_h[1],
                                (bbox.x + bbox.width) / 100.0 * w_h[0],
                                (bbox.y + bbox.height) / 100.0 * w_h[1],
                            ]
                        ).unsqueeze(0)
                        for bbox, w_h in zip(bboxes, original_sizes)
                    ]
                ).permute(1, 0, 2)
                print(rects.shape)
                draw_rect(image, bboxes=rects)

            keypoints = [
                r.value for r in annotation.result if isinstance(r.value, KeypointValue)
            ]
            original_sizes = [
                (r.original_width, r.original_height)
                for r in annotation.result
                if isinstance(r.value, KeypointValue)
            ]
            if keypoints:
                point_width = 1.0
                rects = torch.stack(
                    [
                        torch.tensor(
                            [
                                (point.x / 100.0) * w_h[0],
                                (point.y / 100.0) * w_h[1],
                                (point.x + point_width) / 100.0 * w_h[0],
                                (point.y + point_width) / 100.0 * w_h[1],
                            ]
                        ).unsqueeze(0)
                        for point, w_h in zip(keypoints, original_sizes)
                    ]
                ).permute(1, 0, 2)

                draw_rect(image, bboxes=rects)

        for annot in annotation:
            draw_annotaion(annot, image)
        plt.imshow(image.squeeze(0).permute(1, 2, 0))

`collate_fn(batch)` `staticmethod`

Collate function for the dataloader

Source code in courtvision/data.py

@staticmethod
def collate_fn(batch):
    """Collate function for the dataloader"""
    images, samples = zip(*batch)

    targets = [
        {
            "boxes": annotations_to_bbox(sample.annotations),
            "labels": annotations_to_label(sample.annotations),
            "file_location": sample.data.image_local_path.as_posix(),
        }
        for sample in samples
    ]

    return [o.squeeze(0) for o in images], targets

`find_image_path(root, sample)` `staticmethod`

Finds the image path from a sample

Source code in courtvision/data.py

@staticmethod
def find_image_path(root: Path | str, sample: CourtAnnotatedSample):
    """Finds the image path from a sample"""
    server_file_path = Path(*sample.data.image.parts[2:])  # remove /data/
    filename = Path(f"{root}/{server_file_path}")
    return filename

`show_sample(annotation, image)` `staticmethod`

Plots an image and its annotation

Source code in courtvision/data.py

@staticmethod
def show_sample(annotation: list[Annotation], image: torch.Tensor):
    """Plots an image and its annotation"""
    # TODO: Data module should have vis functions injected into it
    from courtvision.vis import draw_rect

    def draw_annotaion(annotation: Annotation, image: torch.Tensor):
        bboxes = [
            r.value for r in annotation.result if isinstance(r.value, RectValue)
        ]

        original_sizes = [
            (r.original_width, r.original_height)
            for r in annotation.result
            if isinstance(r.value, RectValue)
        ]
        if bboxes:
            rects = torch.stack(
                [
                    torch.tensor(
                        [
                            (bbox.x / 100.0) * w_h[0],
                            (bbox.y / 100.0) * w_h[1],
                            (bbox.x + bbox.width) / 100.0 * w_h[0],
                            (bbox.y + bbox.height) / 100.0 * w_h[1],
                        ]
                    ).unsqueeze(0)
                    for bbox, w_h in zip(bboxes, original_sizes)
                ]
            ).permute(1, 0, 2)
            print(rects.shape)
            draw_rect(image, bboxes=rects)

        keypoints = [
            r.value for r in annotation.result if isinstance(r.value, KeypointValue)
        ]
        original_sizes = [
            (r.original_width, r.original_height)
            for r in annotation.result
            if isinstance(r.value, KeypointValue)
        ]
        if keypoints:
            point_width = 1.0
            rects = torch.stack(
                [
                    torch.tensor(
                        [
                            (point.x / 100.0) * w_h[0],
                            (point.y / 100.0) * w_h[1],
                            (point.x + point_width) / 100.0 * w_h[0],
                            (point.y + point_width) / 100.0 * w_h[1],
                        ]
                    ).unsqueeze(0)
                    for point, w_h in zip(keypoints, original_sizes)
                ]
            ).permute(1, 0, 2)

            draw_rect(image, bboxes=rects)

    for annot in annotation:
        draw_annotaion(annot, image)
    plt.imshow(image.squeeze(0).permute(1, 2, 0))

`CourtVisionDataset`

Bases: VisionDataset

Source code in courtvision/data.py

class CourtVisionDataset(VisionDataset):
    def __init__(
        self,
        dataset: PadelDataset,
        root: str,
        transforms: Callable | None = None,
        transform: Callable | None = None,
        target_transform: Callable | None = None,
    ):
        self.root = root  # TODO: See what base class does and if we can use it
        self.dataset = dataset
        super().__init__(root, transforms, transform, target_transform)

    def __len__(self):
        return len(self.dataset.samples)

    def __getitem__(self, idx) -> tuple[CourtAnnotatedSample, torch.Tensor]:
        from courtvision.vis import load_timg

        # TODO: Data module should have IO functions injected into it
        sample = self.dataset.samples[idx]
        image = load_timg(CourtVisionDataset.find_image_path(self.root, sample=sample))
        return (
            sample,
            image,
        )

    @staticmethod
    def collate_fn(batch):
        """Collate function for the dataloader"""
        samples, images = zip(*batch)
        targets = [
            {
                "boxes": annotations_to_bbox(sample.annotations),
                "labels": torch.ones(1, dtype=torch.int64),
            }
            for sample in samples
        ]

        return targets, [o.squeeze(0) for o in images]

    @staticmethod
    def find_image_path(root: Path | str, sample: CourtAnnotatedSample):
        server_file_path = Path(*sample.data.image.parts[2:])  # remove /data/
        filename = Path(f"{root}/{server_file_path}")
        return filename

    @staticmethod
    def show_sample(annotation: list[Annotation], image: torch.Tensor):
        """Plots an image and its annotation"""
        # TODO: Data module should have vis functions injected into it
        from courtvision.vis import draw_rect

        def draw_annotaion(annotation: Annotation, image: torch.Tensor):
            bboxes = [
                r.value for r in annotation.result if isinstance(r.value, RectValue)
            ]

            original_sizes = [
                (r.original_width, r.original_height)
                for r in annotation.result
                if isinstance(r.value, RectValue)
            ]
            if bboxes:
                rects = torch.stack(
                    [
                        torch.tensor(
                            [
                                (bbox.x / 100.0) * w_h[0],
                                (bbox.y / 100.0) * w_h[1],
                                (bbox.x + bbox.width) / 100.0 * w_h[0],
                                (bbox.y + bbox.height) / 100.0 * w_h[1],
                            ]
                        ).unsqueeze(0)
                        for bbox, w_h in zip(bboxes, original_sizes)
                    ]
                ).permute(1, 0, 2)
                print(rects.shape)
                draw_rect(image, bboxes=rects)

            keypoints = [
                r.value for r in annotation.result if isinstance(r.value, KeypointValue)
            ]
            original_sizes = [
                (r.original_width, r.original_height)
                for r in annotation.result
                if isinstance(r.value, KeypointValue)
            ]
            if keypoints:
                point_width = 1.0
                rects = torch.stack(
                    [
                        torch.tensor(
                            [
                                (point.x / 100.0) * w_h[0],
                                (point.y / 100.0) * w_h[1],
                                (point.x + point_width) / 100.0 * w_h[0],
                                (point.y + point_width) / 100.0 * w_h[1],
                            ]
                        ).unsqueeze(0)
                        for point, w_h in zip(keypoints, original_sizes)
                    ]
                ).permute(1, 0, 2)

                draw_rect(image, bboxes=rects)

        for annot in annotation:
            draw_annotaion(annot, image)
        plt.imshow(image.squeeze(0).permute(1, 2, 0))

`collate_fn(batch)` `staticmethod`

Collate function for the dataloader

Source code in courtvision/data.py

@staticmethod
def collate_fn(batch):
    """Collate function for the dataloader"""
    samples, images = zip(*batch)
    targets = [
        {
            "boxes": annotations_to_bbox(sample.annotations),
            "labels": torch.ones(1, dtype=torch.int64),
        }
        for sample in samples
    ]

    return targets, [o.squeeze(0) for o in images]

`show_sample(annotation, image)` `staticmethod`

Plots an image and its annotation

Source code in courtvision/data.py

@staticmethod
def show_sample(annotation: list[Annotation], image: torch.Tensor):
    """Plots an image and its annotation"""
    # TODO: Data module should have vis functions injected into it
    from courtvision.vis import draw_rect

    def draw_annotaion(annotation: Annotation, image: torch.Tensor):
        bboxes = [
            r.value for r in annotation.result if isinstance(r.value, RectValue)
        ]

        original_sizes = [
            (r.original_width, r.original_height)
            for r in annotation.result
            if isinstance(r.value, RectValue)
        ]
        if bboxes:
            rects = torch.stack(
                [
                    torch.tensor(
                        [
                            (bbox.x / 100.0) * w_h[0],
                            (bbox.y / 100.0) * w_h[1],
                            (bbox.x + bbox.width) / 100.0 * w_h[0],
                            (bbox.y + bbox.height) / 100.0 * w_h[1],
                        ]
                    ).unsqueeze(0)
                    for bbox, w_h in zip(bboxes, original_sizes)
                ]
            ).permute(1, 0, 2)
            print(rects.shape)
            draw_rect(image, bboxes=rects)

        keypoints = [
            r.value for r in annotation.result if isinstance(r.value, KeypointValue)
        ]
        original_sizes = [
            (r.original_width, r.original_height)
            for r in annotation.result
            if isinstance(r.value, KeypointValue)
        ]
        if keypoints:
            point_width = 1.0
            rects = torch.stack(
                [
                    torch.tensor(
                        [
                            (point.x / 100.0) * w_h[0],
                            (point.y / 100.0) * w_h[1],
                            (point.x + point_width) / 100.0 * w_h[0],
                            (point.y + point_width) / 100.0 * w_h[1],
                        ]
                    ).unsqueeze(0)
                    for point, w_h in zip(keypoints, original_sizes)
                ]
            ).permute(1, 0, 2)

            draw_rect(image, bboxes=rects)

    for annot in annotation:
        draw_annotaion(annot, image)
    plt.imshow(image.squeeze(0).permute(1, 2, 0))

`KeypointValue`

Bases: BaseModel

Specifies a keypoint and it's labels

Source code in courtvision/data.py

class KeypointValue(BaseModel):
    """Specifies a keypoint and it's labels"""

    x: float
    y: float
    width: float
    keypointlabels: list[str]

`LabelValue`

Bases: BaseModel

Specifies a clip segment and it's labels

Source code in courtvision/data.py

class LabelValue(BaseModel):
    """Specifies a clip segment and it's labels"""

    start: float
    end: float
    labels: list[str]

`PadelCourt` `dataclass`

Padel court dimensions and locations of key points

Source code in courtvision/data.py

@dataclass
class PadelCourt:
    """Padel court dimensions and locations of key points"""

    # The scale of the court is in meters
    # Setting this to 100.0 means that the court is 1_000cm x 2_000cm
    court_scale: float = 10.0

    # REF: https://www.lta.org.uk/4ad2a4/siteassets/play/padel/file/lta-padel-court-guidance.pdf
    width: float = 10.0 * court_scale
    length: float = 20.0 * court_scale
    backwall_height: float = 3.0 * court_scale
    backwall_fence_height: float = 4.0 * court_scale
    serve_line_from_back_line: float = 3.0 * court_scale
    line_width: float = 0.05 * court_scale
    net_height: float = 0.78 * court_scale  # 0.78m

    @classmethod
    @property
    def center_line(cls) -> np.array:
        return np.array(
            [
                (cls.width / 2, cls.length - cls.serve_line_from_back_line),
                (cls.width / 2, cls.serve_line_from_back_line),
            ],
            dtype=np.int32,
        ).reshape(-1, 1, 2)

    @classmethod
    @property
    def net_line(cls) -> np.array:
        return np.array(
            [(0, cls.length / 2), (cls.width, cls.length / 2)], dtype=np.int64
        ).reshape(-1, 1, 2)

    @classmethod
    @property
    def near_serve_line(cls):
        return np.array(
            [
                (0, cls.length - cls.serve_line_from_back_line),
                (cls.width, cls.length - cls.serve_line_from_back_line),
            ],
            np.int32,
        ).reshape(-1, 1, 2)

    @classmethod
    @property
    def far_serve_line(cls):
        return np.array(
            [
                (0, cls.serve_line_from_back_line),
                (cls.width, cls.serve_line_from_back_line),
            ],
            dtype=np.int32,
        ).reshape(-1, 1, 2)

    @classmethod
    @property
    def front_left(cls):
        return (0.0, 0.0)

    @classmethod
    @property
    def front_right(cls):
        return (cls.width, 0)

    @classmethod
    @property
    def top_front_left_vertical_plane(cls):
        # x, z
        return (0.0, cls.backwall_height)

    @classmethod
    @property
    def top_front_right_vertical_plane(cls):
        # x, z
        return (cls.width, cls.backwall_height)

    @classmethod
    @property
    def back_left(cls):
        return (0.0, cls.length)

    @classmethod
    @property
    def back_right(cls):
        return (cls.width, cls.length)

    @classmethod
    @property
    def left_near_serve_line(cls):
        return (0.0, cls.serve_line_from_back_line)

    @classmethod
    @property
    def right_near_serve_line(cls):
        return (cls.width, cls.serve_line_from_back_line)

    @classmethod
    @property
    def left_far_serve_line(cls):
        return (0.0, cls.length - cls.serve_line_from_back_line)

    @classmethod
    @property
    def right_far_serve_line(cls):
        return (cls.width, cls.length - cls.serve_line_from_back_line)

    @classmethod
    @property
    def m_top_front_left(cls):
        # TODO: add thes
        raise NotImplementedError()

    @classmethod
    @property
    def n_top_front_right(cls):
        raise NotImplementedError()

    @classmethod
    @property
    def o_top_back_left(cls):
        raise NotImplementedError()

    @classmethod
    @property
    def p_top_back_right(cls):
        raise NotImplementedError()

    @classmethod
    @property
    def q_top_net_line_left(cls):
        raise NotImplementedError()

    @classmethod
    @property
    def r_top_net_line_right(cls):
        raise NotImplementedError()

    # Normalised:
    @classmethod
    @property
    def center_line_n(cls) -> np.array:
        return np.array(
            [
                ((cls.width / 2) / cls.width, cls.length / cls.length),
                ((cls.width / 2) / cls.width, 0),
            ],
            dtype=np.int32,
        ).reshape(-1, 1, 2)

    @classmethod
    @property
    def net_line_n(cls) -> np.array:
        return np.array(
            [
                (0, (cls.length / 2) / cls.length),
                (cls.width / cls.width, (cls.length / 2) / cls.length),
            ],
            dtype=np.int64,
        ).reshape(-1, 1, 2)

    @classmethod
    @property
    def front_left_n(cls):
        return (cls.front_left[0] / cls.width, cls.front_left[1] / cls.length)

    @classmethod
    @property
    def front_right_n(cls):
        return (cls.front_right[0] / cls.width, cls.front_right[1] / cls.length)

    @classmethod
    @property
    def top_front_left_vertical_plane_n(cls):
        # x, z
        return (0.0, 0.0)

    @classmethod
    @property
    def top_front_right_vertical_plane_n(cls):
        # x, z
        return (cls.width / cls.width, 0.0)

    @classmethod
    @property
    def front_left_vertical_plane_n(cls):
        # x, z
        return (0.0, cls.backwall_height / cls.backwall_height)

    @classmethod
    @property
    def front_right_vertical_plane_n(cls):
        # x, z
        return (cls.width / cls.width, cls.backwall_height / cls.backwall_height)

    @classmethod
    @property
    def back_left_n(cls):
        return (cls.back_left[0] / cls.width, cls.back_left[1] / cls.length)

    @classmethod
    @property
    def back_right_n(cls):
        return (cls.back_right[0] / cls.width, cls.back_right[1] / cls.length)

    @classmethod
    @property
    def left_near_serve_line_n(cls):
        return (
            cls.left_near_serve_line[0] / cls.width,
            cls.left_near_serve_line[1] / cls.length,
        )

    @classmethod
    @property
    def right_near_serve_line_n(cls):
        return (
            cls.right_near_serve_line[0] / cls.width,
            cls.right_near_serve_line[1] / cls.length,
        )

    @classmethod
    @property
    def left_far_serve_line_n(cls):
        return (
            cls.left_far_serve_line[0] / cls.width,
            cls.left_far_serve_line[1] / cls.length,
        )

    @classmethod
    @property
    def right_far_serve_line_n(cls):
        return (
            cls.right_far_serve_line[0] / cls.width,
            cls.right_far_serve_line[1] / cls.length,
        )

`RectValue`

Bases: BaseModel

Specifies a rectangle and it's labels

Source code in courtvision/data.py

class RectValue(BaseModel):
    """Specifies a rectangle and it's labels"""

    x: float
    y: float
    width: float
    height: float
    rectanglelabels: list[str]

`VideoRectSequence`

Bases: BaseModel

Specifies a rectangle and it's labels for a frames in a sequence

Source code in courtvision/data.py

class VideoRectSequence(BaseModel):
    """Specifies a rectangle and it's labels for a frames in a sequence"""

    frame: int
    enabled: bool
    rotation: float
    x: float
    y: float
    width: float
    height: float
    time: float

`VideoRectValue`

Bases: BaseModel

Specifies a sequence of rectangles and it's labels

Source code in courtvision/data.py

class VideoRectValue(BaseModel):
    """Specifies a sequence of rectangles and it's labels"""

    # TODO: rename to VideoRectSequenceValue https://github.com/BenjaminDev/courtvision/issues/11
    framesCount: int
    duration: float
    sequence: list[VideoRectSequence]
    labels: list[str]

`annotations_to_bbox(annotations)`

Grab the bounding boxes from the annotations.

Note

Coordinates are in image coordinates and not normalised coordinates.

Parameters:

Name	Type	Description	Default
`annotations`	`list[Annotation]`	Annotations from the dataset.	required

Returns:

Type	Description
`torch.Tensor`	torch.Tensor: A tensor of bounding boxes in image coordinates.

Source code in courtvision/data.py

def annotations_to_bbox(annotations: list[Annotation]) -> torch.Tensor:
    """Grab the bounding boxes from the annotations.
    !!! note
        Coordinates are in image coordinates and *not* normalised coordinates.
    Args:
        annotations (list[Annotation]): Annotations from the dataset.

    Returns:
        torch.Tensor: A tensor of bounding boxes in image coordinates.
    """ """"""
    bboxes = []
    original_sizes = []
    for annotation in annotations:
        bboxes.extend(
            [r.value for r in annotation.result if isinstance(r.value, RectValue)]
        )
        original_sizes.extend(
            [
                (r.original_width, r.original_height)
                for r in annotation.result
                if isinstance(r.value, RectValue)
            ]
        )
    return torch.stack(
        [
            torch.tensor(
                [
                    (bbox.x / 100.0) * w_h[0],
                    (bbox.y / 100.0) * w_h[1],
                    (bbox.x + bbox.width) / 100.0 * w_h[0],
                    (bbox.y + bbox.height) / 100.0 * w_h[1],
                ]
            )
            for bbox, w_h in zip(bboxes, original_sizes)
        ]
    )

`annotations_to_label(annotations)`

Grab the labels from the annotations

Note

Currently only supports a single label and rects only!

Parameters:

Name	Type	Description	Default
`annotations`	`list[Annotation]`	Annotations from the dataset.	required

Returns:

Type	Description
`torch.IntTensor`	torch.IntTensor: A tensor of labels.

Source code in courtvision/data.py

def annotations_to_label(annotations: list[Annotation]) -> torch.IntTensor:
    """Grab the labels from the annotations
    !!! note
        Currently only supports a single label and rects only!

    Args:
        annotations (list[Annotation]): Annotations from the dataset.

    Returns:
        torch.IntTensor: A tensor of labels.
    """
    labels = []
    for annotation in annotations:
        labels.extend(
            [
                r.value.rectanglelabels[0]
                for r in annotation.result
                if isinstance(r.value, RectValue)
            ]
        )

    return torch.ones(len(labels), dtype=torch.int64)

`collate_fn(batch)`

Collate function for the dataloader

Source code in courtvision/data.py

def collate_fn(batch):
    """Collate function for the dataloader"""
    samples, images = zip(*batch)
    targets = [
        {
            "boxes": annotations_to_bbox(sample.annotations),
            "labels": torch.ones(1, dtype=torch.int64),
        }
        for sample in samples
    ]

    return targets, [o.squeeze(0) for o in images]

`dict_to_points(keypoints)`

Unpacks a dict of keypoints into a np.array of points and a list of labels

Parameters:

Name	Type	Description	Default
`keypoints`	`dict[str, tuple[float, float]]`	Dict of keypoints	required

Returns:

Type	Description
`tuple[np.array, list[str]]`	np.array, list[str]: Nx2 array of points and list of labels

Source code in courtvision/data.py

def dict_to_points(
    keypoints: dict[str, tuple[float, float]]
) -> tuple[np.array, list[str]]:
    """Unpacks a dict of keypoints into a np.array of points and a list of labels

    Args:
        keypoints (dict[str, tuple[float, float]]): Dict of keypoints

    Returns:
        np.array, list[str]: Nx2 array of points and list of labels
    """
    keypoints = dict(sorted(keypoints.items(), key=lambda x: x[0]))
    return np.array(list(keypoints.values())).astype(np.float32), list(keypoints.keys())

`download_data_item(s3_uri, local_path, s3_client=None, use_cached=True)`

Note

courtvision-padel-dataset profile must be configured in ~/.aws/credentials

Parameters:

Name	Type	Description	Default
`s3_uri`	`str`	S3 uri to file	required
`local_path`	`Path`	Path to file on local filesystem	required
`s3_client`	`_type_`	A suitable s3_client (access to s3). Defaults to None.	`None`
`use_cached`	`bool`	If True and the file exists uses the one on disk. Defaults to True.	`True`

Returns:

Name	Type	Description
`Path`	`Path`	Path to the data item on local filesystem

Source code in courtvision/data.py

def download_data_item(
    s3_uri: str, local_path: Path, s3_client=None, use_cached=True
) -> Path:
    """

    !!! note
        `courtvision-padel-dataset` profile must be configured in ~/.aws/credentials

    Args:
        s3_uri (str): S3 uri to file
        local_path (Path): Path to file on local filesystem
        s3_client (_type_, optional): A suitable s3_client (access to s3). Defaults to None.
        use_cached (bool, optional): If True and the file exists uses the one on disk. Defaults to True.

    Returns:
        Path: Path to the data item on local filesystem
    """
    if use_cached and local_path.exists():
        return local_path

    if s3_client is None:
        import boto3

        session = boto3.Session(profile_name="courtvision-padel-dataset")
        s3_client = session.client("s3", region_name="us-east-1")
    bucket_name = s3_uri.parents[-3].name
    object_name = "/".join(s3_uri.parts[-3:])
    local_path.parent.mkdir(parents=True, exist_ok=True)

    with open(local_path, "wb") as fp:
        s3_client.download_fileobj(bucket_name, object_name, fp)
    return local_path

`frames_from_clip_segments(dataset, local_path, stream_type=StreamType.VIDEO)`

Graps frames for each clip segment in the dataset. A unique id is generated for each clip segment. Frames can be either audio or video frames.

Parameters:

Name	Type	Description	Default
`dataset`	`PadelDataset`	A dataset of annotated clips	required
`local_path`	`Path`	if the file is not already downloaded, it will be downloaded to this path	required
`stream_type`	`StreamType`	Either `StreamType.VIDEO` or `StreamType.AUDIO`. Defaults to StreamType.VIDEO.	`StreamType.VIDEO`

Yields:

Type	Description
`Tuple[dict[str, torch.Tensor], str]`	`{"data": torch.Tensor, "pts": torch.Tensor}, unique_id, match_id`
`Tuple[dict[str, torch.Tensor], str]`	where `unique_id` is the md5 of the annotation unique_id and the start and end times of the clip.
`Tuple[dict[str, torch.Tensor], str]`	And `pts` is a presentation timestamp of the frame expressed in seconds.
`Tuple[dict[str, torch.Tensor], str]`	`match_id` is the parent folder of the clip.

Source code in courtvision/data.py

def frames_from_clip_segments(
    dataset: PadelDataset,
    local_path: Path,
    stream_type: StreamType = StreamType.VIDEO,
) -> Tuple[dict[str, torch.Tensor], str]:
    """
    Graps frames for each clip segment in the dataset. A unique id is generated for each clip segment.
    Frames can be either audio or video frames.

    Args:
        dataset (PadelDataset): A dataset of annotated clips
        local_path (Path): if the file is not already downloaded, it will be downloaded to this path
        stream_type (StreamType, optional): Either `StreamType.VIDEO` or `StreamType.AUDIO`. Defaults to StreamType.VIDEO.

    Yields:
        `{"data": torch.Tensor, "pts": torch.Tensor}, unique_id, match_id`
        where `unique_id` is the md5 of the annotation unique_id and the start and end times of the clip.
        And `pts` is a presentation timestamp of the frame expressed in seconds.
        `match_id` is the parent folder of the clip.
    """
    from rich.progress import track

    for sample in track(dataset.samples, description=f"Downloading data"):
        # for sample in dataset.samples:
        sample.data.video_local_path = download_data_item(
            s3_uri=sample.data.video_url,
            local_path=local_path
            / sample.data.video_url.parent.name
            / sample.data.video_url.name,
        )
    for sample in dataset.samples:
        for annotation in sample.annotations:
            for result in annotation.result:
                if isinstance(result.value, LabelValue):
                    start_time = result.value.start
                    end_time = result.value.end
                    reader = torchvision.io.VideoReader(
                        sample.data.video_local_path.as_posix(), stream_type.value
                    )
                    reader.seek(start_time)
                try:
                    while frame := next(reader):
                        if frame["pts"] < start_time:
                            # seeks is not always accuarte!
                            # burn frames until we get to the right time.
                            # Alternative - build torchvision from source with video_reader backend
                            continue
                        if frame["pts"] > end_time:
                            break
                        yield frame, md5(
                            f"{start_time}{end_time}{annotation.unique_id}".encode()
                        ).hexdigest(), f"{sample.data.video_local_path.parent.name}"
                except Exception as e:
                    print(f"{sample.data.video_local_path=} has invalid data {e=}")
                    continue

`get_keypoints_as_dict(results)`

Go through the results and return a dict of keypoints

Parameters:

Name	Type	Description	Default
`results`	`list[GeneralResult]`	List of results from the annotation	required

Returns:

Type	Description
`dict[str, tuple[float, float]]`	dict[str, tuple[float, float]]: keypoints in absolute coordinates eg: keypoints["{some keypoint}"] = (x, y)

Source code in courtvision/data.py

def get_keypoints_as_dict(
    results: list[GeneralResult],
) -> dict[str, tuple[float, float]]:
    """Go through the results and return a dict of keypoints

    Args:
        results (list[GeneralResult]): List of results from the annotation

    Returns:
        dict[str, tuple[float, float]]: keypoints in absolute coordinates eg: keypoints["{some keypoint}"] = (x, y)
    """
    keypoints = {}
    for result in results:
        if isinstance(result.value, KeypointValue):
            keypoints[result.value.keypointlabels[0]] = (
                result.value.x / 100.0 * result.original_width,
                result.value.y / 100.0 * result.original_height,
            )
    return keypoints

`get_normalized_calibration_image_points_and_clip_ids(dataset)`

Note

This assumes that the calibration points are the only annotations with a VideoRectValue and the points of the same label are in the same place as the last one which will be used.

Note

Points are normalized to 0-1. Not -1 to 1 like in kornia.

Parameters:

Name	Type	Description	Default
`dataset`	`PadelDataset`	Dataset descibing a video with calibration points.	required

Returns:

Name	Type	Description
`image_points`	`dict[str, tuple[float, float]]`	Returns a dict of image points in normalized coordinates. And
	`set[str]`	the clip_ids (set[str]) that are accociated with the calibration points.

Source code in courtvision/data.py

def get_normalized_calibration_image_points_and_clip_ids(
    dataset: PadelDataset,
) -> tuple[dict[str, tuple[float, float]], set[str]]:
    """
    !!! note
        This assumes that the calibration points are the only annotations with a VideoRectValue
        and the points of the same label are in the same place as the last one which will be used.

    !!! note
        Points are normalized to 0-1. Not -1 to 1 like in kornia.
    Args:
        dataset (PadelDataset): Dataset descibing a video with calibration points.

    Returns:
        image_points (dict[str, tuple[float, float]]): Returns a dict of image points in normalized coordinates. And
        the clip_ids (set[str]) that are accociated with the calibration points.
    """
    calibration_image_points = {}
    clip_source = set([])
    for sample in dataset.samples:
        if sample.data.video_url:
            clip_source.add(sample.data.video_url)
        if sample.data.video_local_path:
            clip_source.add(sample.data.video_local_path)
        if sample.data.image:
            clip_source.add(sample.data.image)
        if not clip_source:
            raise ValueError("No clip source found")
        for annotation in sample.annotations:

            for result in annotation.result:
                if isinstance(result.value, VideoRectValue):
                    for label, rect in zip(result.value.labels, result.value.sequence):
                        calibration_image_points[label] = (
                            (rect.x + rect.width / 2) / 100.0,
                            (rect.y + rect.height / 2) / 100.0,
                        )
    return calibration_image_points, clip_source

`validate_dataloader(dataloader)`

Runs over all items in a dataloader and validates the annotations.

Parameters:

Name	Type	Description	Default
`dataloader`	`DataLoader`	A dataloader with a collate_fn that returns a list of annotations and a list of images.	required

Source code in courtvision/data.py

def validate_dataloader(dataloader: DataLoader):
    """Runs over all items in a dataloader and validates the annotations.

    Args:
        dataloader (DataLoader): A dataloader with a collate_fn that returns a
                                list of annotations and a list of images.
    """ """"""
    for (images, targets) in dataloader:
        assert all(o["boxes"].shape for o in targets)
        assert all(o.shape for o in images)
        for image, target in zip(images, targets):
            height, width = image.shape[1:]
            assert all(x > 0 and x < width for x in target["boxes"][0][::2])
            assert all(y > 0 and y < height for y in target["boxes"][0][1::2])

courtvision.data

AnnotationDataPath

CameraInfo dataclass

load(file_name) staticmethod

save(file_name)

CourtVisionArtifacts dataclass

CourtVisionBallDataset

collate_fn(batch) staticmethod

find_image_path(root, sample) staticmethod

show_sample(annotation, image) staticmethod

CourtVisionDataset

collate_fn(batch) staticmethod

show_sample(annotation, image) staticmethod

KeypointValue

LabelValue

PadelCourt dataclass

RectValue

VideoRectSequence

VideoRectValue

annotations_to_bbox(annotations)

annotations_to_label(annotations)

collate_fn(batch)

dict_to_points(keypoints)

download_data_item(s3_uri, local_path, s3_client=None, use_cached=True)

frames_from_clip_segments(dataset, local_path, stream_type=StreamType.VIDEO)

get_keypoints_as_dict(results)

get_normalized_calibration_image_points_and_clip_ids(dataset)

validate_dataloader(dataloader)

`AnnotationDataPath`

`CameraInfo` `dataclass`

`load(file_name)` `staticmethod`

`save(file_name)`

`CourtVisionArtifacts` `dataclass`

`CourtVisionBallDataset`

`collate_fn(batch)` `staticmethod`

`find_image_path(root, sample)` `staticmethod`

`show_sample(annotation, image)` `staticmethod`

`CourtVisionDataset`

`collate_fn(batch)` `staticmethod`

`show_sample(annotation, image)` `staticmethod`

`KeypointValue`

`LabelValue`

`PadelCourt` `dataclass`

`RectValue`

`VideoRectSequence`

`VideoRectValue`

`annotations_to_bbox(annotations)`

`annotations_to_label(annotations)`

`collate_fn(batch)`

`dict_to_points(keypoints)`

`download_data_item(s3_uri, local_path, s3_client=None, use_cached=True)`

`frames_from_clip_segments(dataset, local_path, stream_type=StreamType.VIDEO)`

`get_keypoints_as_dict(results)`

`get_normalized_calibration_image_points_and_clip_ids(dataset)`

`validate_dataloader(dataloader)`