Skip to content

split

TargetBlocks

Markdown cell heading in the notebook and their phase mappings

extract_partial_notebook(nb, start_block, end_block, remove=False)

Collects or removes cells blocks withing start and end headers

Parameters:

Name Type Description Default
nb Dict[str, Any]

Input notebook

required
start_block str

Regular expression for the starting block

required
end_block str

Regular expression for the ending block

required
remove bool

Determines if

False

Examples:

To extract the notebook cells within start_block and end_block

>>>extract_partial_notebook(nb, start_block="^# Heading 1", end_block="^# Heading 2")

To remove the notebook cells within similar start and end blocks, use remove=True

Returns:

Type Description
Dict[str, Any]

Notebook with cells with/without between the start and end blocks

Source code in aicrowd/notebook/split.py
def extract_partial_notebook(
    nb: Dict[str, Any], start_block: str, end_block: str, remove: bool = False
) -> Dict[str, Any]:
    """
    Collects or removes cells blocks withing start and end headers

    Args:
        nb: Input notebook
        start_block: Regular expression for the starting block
        end_block: Regular expression for the ending block
        remove: Determines if

    Examples:
        To extract the notebook cells within `start_block` and `end_block`
        >>>extract_partial_notebook(nb, start_block="^# Heading 1", end_block="^# Heading 2")

        To remove the notebook cells within similar start and end blocks, use
        `remove=True`

    Returns:
        Notebook with cells with/without between the start and end blocks
    """
    target_cells = []
    start_copy = False
    for _cell in nb["cells"]:
        if is_not_empty_cell(_cell):
            if start_copy or re.search(start_block, _cell["source"][0]):
                start_copy = True
            if re.search(end_block, _cell["source"][0]):
                if not remove:
                    break
                start_copy = False
            if extraction_direction(start_copy, remove):
                target_cells.append(_cell)
    nb["cells"] = target_cells
    return nb

is_not_empty_cell(cell)

Check for non empty cells

Parameters:

Name Type Description Default
cell Dict[str, Any]

Jupyter cell

required

Returns:

Type Description
bool

True of the cell is not empty

Source code in aicrowd/notebook/split.py
def is_not_empty_cell(cell: Dict[str, Any]) -> bool:
    """
    Check for non empty cells

    Args:
        cell: Jupyter cell

    Returns:
        True of the cell is not empty
    """
    return len(cell["source"]) > 0

split_notebook(file_path, output_dir)

Split the submission notebook into install.ipynb, train.ipynb and predict.ipynb

Note: API key scrubbing should be done before this

Parameters:

Name Type Description Default
file_path str

Path to the submission notebook

required
output_dir str

Directory to place the output files in

required
Source code in aicrowd/notebook/split.py
def split_notebook(file_path: str, output_dir: str):
    """
    Split the submission notebook into install.ipynb, train.ipynb and
    predict.ipynb

    **Note:** API key scrubbing should be done before this

    Args:
        file_path: Path to the submission notebook
        output_dir: Directory to place the output files in
    """
    print("Collecting notebook...")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    write_installation_notebook(file_path, output_dir)
    write_training_notebook(file_path, output_dir)
    write_prediction_notebook(file_path, output_dir)

write_installation_notebook(file_path, output_dir)

Extracts and writes install.ipynb to the submission directory

Parameters:

Name Type Description Default
file_path str

Path to the submission notebook

required
output_dir str

Path to the submission directory

required
Source code in aicrowd/notebook/split.py
def write_installation_notebook(file_path: str, output_dir: str):
    """
    Extracts and writes install.ipynb to the submission directory

    Args:
        file_path: Path to the submission notebook
        output_dir: Path to the submission directory
    """
    nb = read_notebook(file_path)
    partial_nb = extract_partial_notebook(
        nb, TargetBlocks.INSTALL_START, TargetBlocks.INSTALL_END
    )
    write_notebook(os.path.join(output_dir, "install.ipynb"), partial_nb)

write_prediction_notebook(file_path, output_dir)

Extracts and writes predict.ipynb to the submission directory

Parameters:

Name Type Description Default
file_path str

Path to the submission notebook

required
output_dir str

Path to the submission directory

required
Source code in aicrowd/notebook/split.py
def write_prediction_notebook(file_path: str, output_dir: str):
    """
    Extracts and writes predict.ipynb to the submission directory

    Args:
        file_path: Path to the submission notebook
        output_dir: Path to the submission directory
    """
    nb = read_notebook(file_path)
    partial_nb = extract_partial_notebook(nb, ".*", TargetBlocks.SUBMIT)
    partial_nb = extract_partial_notebook(
        partial_nb, TargetBlocks.INSTALL_START, TargetBlocks.INSTALL_END, remove=True
    )
    partial_nb = extract_partial_notebook(
        partial_nb, TargetBlocks.TRAIN_START, TargetBlocks.TRAIN_END, remove=True
    )
    write_notebook(os.path.join(output_dir, "predict.ipynb"), partial_nb)

write_training_notebook(file_path, output_dir)

Extracts and writes train.ipynb to the submission directory

Parameters:

Name Type Description Default
file_path str

Path to the submission notebook

required
output_dir str

Path to the submission directory

required
Source code in aicrowd/notebook/split.py
def write_training_notebook(file_path: str, output_dir: str):
    """
    Extracts and writes train.ipynb to the submission directory

    Args:
        file_path: Path to the submission notebook
        output_dir: Path to the submission directory
    """
    nb = read_notebook(file_path)
    partial_nb = extract_partial_notebook(nb, ".*", TargetBlocks.TRAIN_END)
    partial_nb = extract_partial_notebook(
        partial_nb, TargetBlocks.INSTALL_START, TargetBlocks.INSTALL_END, remove=True
    )
    write_notebook(os.path.join(output_dir, "train.ipynb"), partial_nb)