Skip to content

list

The dataset list subcommand

list_dataset(challenge, return_list=True, config_ctx=ConfigContext(), challenge_ctx=ChallengeContext())

Lists the datasets available for this challenge

Parameters:

Name Type Description Default
challenge str

one of

  • [int] challenge id
  • [str] challenge slug
  • [str] challenge url
required
return_list bool

if true, will return the datasets instead of pretty printing

True
config_ctx ConfigContext

CLI config

ConfigContext()
challenge_ctx ChallengeContext

Challenge config

ChallengeContext()

Returns:

Type Description
Union[NoneType, List[dict]]

list of datasets if return_list was True; None otherwise

Source code in aicrowd/dataset/list.py
def list_dataset(
    challenge: str,
    return_list: bool = True,
    config_ctx: ConfigContext = ConfigContext(),
    challenge_ctx: ChallengeContext = ChallengeContext(),
) -> Union[None, List[dict]]:
    """
    Lists the datasets available for this challenge

    Args:
        challenge: one of

            - [`int`] challenge id
            - [`str`] challenge slug
            - [`str`] challenge url
        return_list: if true, will return the datasets instead of pretty printing
        config_ctx: CLI config
        challenge_ctx: Challenge config

    Returns:
        list of datasets if `return_list` was True; None otherwise
    """
    log = logging.getLogger()

    api_key = must_get_api_key(config_ctx)
    challenge_id = challenge_ctx.challenge.get(ChallengeConstants.CONFIG_ID_KEY)

    # couldn't get from config, try the --challenge option
    if challenge_id is None:
        challenge_id, _ = parse_cli_challenge(challenge, api_key)

    # still couldn't deduce challenge
    if challenge_id is None:
        log.error("Failed to parse challenge")
        raise ChallengeNotFoundException(
            "Challenge with the given details could not be found",
            exit_code=INVALID_PARAMETER,
        )

    datasets = get_datasets(challenge_id, api_key)
    log.info("Got %d datasets", len(datasets))

    if return_list:
        return datasets

    console = Console()
    table = Table(
        Column("#", max_width=2),
        "Title",
        "Description",
        Column(header="Size", justify="right"),
        title=f"Datasets for challenge #{challenge_id}",
        show_header=True,
        header_style="bold magenta",
        box=box.SQUARE,
    )

    empty_to_dash = lambda x: "-" if not x or len(x) == 0 else x

    key_mapper = [
        ("title", empty_to_dash),
        ("description", empty_to_dash),
        ("external_file_size", humanize_size),
    ]

    for i, ds in enumerate(datasets):
        vals = [str(i)]

        for key, mapper in key_mapper:
            vals.append(mapper(ds.get(key)))

        table.add_row(*vals)

    console.print(table, justify="left")