helper

Helper functions for data loading with progress indication in Streamlit.

This module provides cached data loading functions for CSV and Parquet files, as well as utilities to initialize Streamlit session state from preprocessed datasets. All functions use Polars for efficient DataFrame operations and integrate with Streamlit's caching and UI feedback mechanisms.

custom_exception_handler ¶

custom_exception_handler(exception)

Handle exceptions with logging and user-friendly Streamlit display.

Parameters:

Name	Type	Description	Default
`exception`	`Exception`	The exception instance to handle.	required

Note

This function logs the full exception traceback and displays a user-friendly error message in the Streamlit UI instead of showing the raw Python traceback.

Source code in src/mangetamain/utils/helper.py

def custom_exception_handler(exception: Exception) -> None:
    """Handle exceptions with logging and user-friendly Streamlit display.

    Args:
        exception: The exception instance to handle.

    Note:
        This function logs the full exception traceback and displays a
        user-friendly error message in the Streamlit UI instead of showing
        the raw Python traceback.
    """
    import streamlit as st  # noqa: PLC0415

    from mangetamain.utils.logger import get_logger  # noqa: PLC0415

    logger = get_logger()
    logger.error(f"An error occurred: {exception}")
    st.error("An unexpected error occurred. Please contact support.")

load_csv_with_progress ¶

load_csv_with_progress(file_path)

Read a CSV file into a Polars DataFrame while showing a Streamlit spinner.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the CSV file to read.	required

Returns:

Type	Description
`DataFrame`	A tuple (df, load_time) where `df` is the loaded Polars DataFrame and
`float`	`load_time` is the elapsed time in seconds.

Source code in src/mangetamain/utils/helper.py

@st.cache_data  # type: ignore[misc]
def load_csv_with_progress(file_path: str) -> tuple[pl.DataFrame, float]:
    """Read a CSV file into a Polars DataFrame while showing a Streamlit spinner.

    Args:
      file_path: Path to the CSV file to read.

    Returns:
      A tuple (df, load_time) where ``df`` is the loaded Polars DataFrame and
      ``load_time`` is the elapsed time in seconds.
    """
    start_time = time.time()
    with st.spinner(f"Loading data from {file_path}..."):
        df = pl.read_csv(file_path)
    load_time = time.time() - start_time
    logger.info(
        f"Data loaded successfully from {file_path} in {load_time:.2f} seconds.",
    )
    return df, load_time

load_data_from_parquet_and_pickle ¶

load_data_from_parquet_and_pickle()

Load ALL application data ONCE and cache it globally across all users.

This function is called once per application lifecycle. The first user will trigger the data loading (90s), but all subsequent users will get instant access (<0.01s) thanks to @st.cache_resource.

The function reads several precomputed parquet files and returns the resulting Polars DataFrames / Series as a tuple.

Source code in src/mangetamain/utils/helper.py

@st.cache_resource(show_spinner=False)  # type: ignore[misc]
def load_data_from_parquet_and_pickle() -> tuple[
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.Series,
    pl.Series,
    RecipeAnalyzer | None,
    bool,
]:
    """Load ALL application data ONCE and cache it globally across all users.

    This function is called once per application lifecycle. The first user will
    trigger the data loading (90s), but all subsequent users will get instant
    access (<0.01s) thanks to @st.cache_resource.

    The function reads several precomputed parquet files and returns the
    resulting Polars DataFrames / Series as a tuple.
    """
    logger.info("🔄 Starting data load (this happens ONCE globally)...")
    start_time = time.time()
    try:
        (
            df_interactions,
            df_interactions_nna,
            df_recipes,
            df_recipes_nna,
            df_total_nt,
            df_total,
            df_total_court,
            df_user,
        ) = _load_all_dataframes()
        proportion_m, proportion_s = _load_proportions()
        recipe_analyzer = _load_recipe_analyzer()
        data_loaded = True
        total_time = time.time() - start_time
        logger.info(
            f"✅ ALL DATA LOADED successfully in {total_time:.2f}s "
            f"(cached globally for all users)",
        )
    except Exception as e:
        logger.error(
            f"❌ Error loading data: {e}, please run backend/dataprocessor "
            f"first to initialize application data.",
        )
        st.error(
            f"Error loading data: {e}, please run backend/dataprocessor "
            f"first to initialize application data.",
        )
        # Return empty data on error
        data_loaded = False
        df_interactions = pl.DataFrame()
        df_interactions_nna = pl.DataFrame()
        df_recipes = pl.DataFrame()
        df_recipes_nna = pl.DataFrame()
        df_total_nt = pl.DataFrame()
        df_total = pl.DataFrame()
        df_total_court = pl.DataFrame()
        df_user = pl.DataFrame()
        proportion_m = pl.Series()
        proportion_s = pl.Series()
        recipe_analyzer = None
    return (
        df_interactions,
        df_interactions_nna,
        df_recipes,
        df_recipes_nna,
        df_total_nt,
        df_total,
        df_total_court,
        df_user,
        proportion_m,
        proportion_s,
        recipe_analyzer,
        data_loaded,
    )

load_parquet_with_progress ¶

load_parquet_with_progress(file_path)

Read a Parquet file into a Polars DataFrame (cached globally with zero-copy).

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the Parquet file to read.	required

Returns:

Type	Description
`DataFrame`	A Polars DataFrame loaded from the specified parquet file.

Source code in src/mangetamain/utils/helper.py

@st.cache_resource(show_spinner=False)  # type: ignore[misc]
def load_parquet_with_progress(file_path: str) -> pl.DataFrame:
    """Read a Parquet file into a Polars DataFrame (cached globally with zero-copy).

    Args:
      file_path: Path to the Parquet file to read.

    Returns:
      A Polars DataFrame loaded from the specified parquet file.
    """
    t = time.time()
    df = pl.read_parquet(file_path)
    elapsed = time.time() - t
    logger.info(f"✅ {file_path} loaded in {elapsed:.2f}s - Shape: {df.shape}")
    return df