Skip to content

helper

Helper functions for data loading with progress indication in Streamlit.

This module provides cached data loading functions for CSV and Parquet files, as well as utilities to initialize Streamlit session state from preprocessed datasets. All functions use Polars for efficient DataFrame operations and integrate with Streamlit's caching and UI feedback mechanisms.

custom_exception_handler

custom_exception_handler(exception)

Handle exceptions with logging and user-friendly Streamlit display.

Parameters:

Name Type Description Default
exception Exception

The exception instance to handle.

required
Note

This function logs the full exception traceback and displays a user-friendly error message in the Streamlit UI instead of showing the raw Python traceback.

Source code in src/mangetamain/utils/helper.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def custom_exception_handler(exception: Exception) -> None:
    """Handle exceptions with logging and user-friendly Streamlit display.

    Args:
        exception: The exception instance to handle.

    Note:
        This function logs the full exception traceback and displays a
        user-friendly error message in the Streamlit UI instead of showing
        the raw Python traceback.
    """
    import streamlit as st  # noqa: PLC0415

    from mangetamain.utils.logger import get_logger  # noqa: PLC0415

    logger = get_logger()
    logger.error(f"An error occurred: {exception}")
    st.error("An unexpected error occurred. Please contact support.")

load_csv_with_progress

load_csv_with_progress(file_path)

Read a CSV file into a Polars DataFrame while showing a Streamlit spinner.

Parameters:

Name Type Description Default
file_path str

Path to the CSV file to read.

required

Returns:

Type Description
DataFrame

A tuple (df, load_time) where df is the loaded Polars DataFrame and

float

load_time is the elapsed time in seconds.

Source code in src/mangetamain/utils/helper.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
@st.cache_data  # type: ignore[misc]
def load_csv_with_progress(file_path: str) -> tuple[pl.DataFrame, float]:
    """Read a CSV file into a Polars DataFrame while showing a Streamlit spinner.

    Args:
      file_path: Path to the CSV file to read.

    Returns:
      A tuple (df, load_time) where ``df`` is the loaded Polars DataFrame and
      ``load_time`` is the elapsed time in seconds.
    """
    start_time = time.time()
    with st.spinner(f"Loading data from {file_path}..."):
        df = pl.read_csv(file_path)
    load_time = time.time() - start_time
    logger.info(
        f"Data loaded successfully from {file_path} in {load_time:.2f} seconds.",
    )
    return df, load_time

load_data_from_parquet_and_pickle

load_data_from_parquet_and_pickle()

Load ALL application data ONCE and cache it globally across all users.

This function is called once per application lifecycle. The first user will trigger the data loading (90s), but all subsequent users will get instant access (<0.01s) thanks to @st.cache_resource.

The function reads several precomputed parquet files and returns the resulting Polars DataFrames / Series as a tuple.

Source code in src/mangetamain/utils/helper.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
@st.cache_resource(show_spinner=False)  # type: ignore[misc]
def load_data_from_parquet_and_pickle() -> tuple[
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.DataFrame,
    pl.Series,
    pl.Series,
    RecipeAnalyzer | None,
    bool,
]:
    """Load ALL application data ONCE and cache it globally across all users.

    This function is called once per application lifecycle. The first user will
    trigger the data loading (90s), but all subsequent users will get instant
    access (<0.01s) thanks to @st.cache_resource.

    The function reads several precomputed parquet files and returns the
    resulting Polars DataFrames / Series as a tuple.
    """
    logger.info("🔄 Starting data load (this happens ONCE globally)...")
    start_time = time.time()
    try:
        (
            df_interactions,
            df_interactions_nna,
            df_recipes,
            df_recipes_nna,
            df_total_nt,
            df_total,
            df_total_court,
            df_user,
        ) = _load_all_dataframes()
        proportion_m, proportion_s = _load_proportions()
        recipe_analyzer = _load_recipe_analyzer()
        data_loaded = True
        total_time = time.time() - start_time
        logger.info(
            f"✅ ALL DATA LOADED successfully in {total_time:.2f}s "
            f"(cached globally for all users)",
        )
    except Exception as e:
        logger.error(
            f"❌ Error loading data: {e}, please run backend/dataprocessor "
            f"first to initialize application data.",
        )
        st.error(
            f"Error loading data: {e}, please run backend/dataprocessor "
            f"first to initialize application data.",
        )
        # Return empty data on error
        data_loaded = False
        df_interactions = pl.DataFrame()
        df_interactions_nna = pl.DataFrame()
        df_recipes = pl.DataFrame()
        df_recipes_nna = pl.DataFrame()
        df_total_nt = pl.DataFrame()
        df_total = pl.DataFrame()
        df_total_court = pl.DataFrame()
        df_user = pl.DataFrame()
        proportion_m = pl.Series()
        proportion_s = pl.Series()
        recipe_analyzer = None
    return (
        df_interactions,
        df_interactions_nna,
        df_recipes,
        df_recipes_nna,
        df_total_nt,
        df_total,
        df_total_court,
        df_user,
        proportion_m,
        proportion_s,
        recipe_analyzer,
        data_loaded,
    )

load_parquet_with_progress

load_parquet_with_progress(file_path)

Read a Parquet file into a Polars DataFrame (cached globally with zero-copy).

Parameters:

Name Type Description Default
file_path str

Path to the Parquet file to read.

required

Returns:

Type Description
DataFrame

A Polars DataFrame loaded from the specified parquet file.

Source code in src/mangetamain/utils/helper.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
@st.cache_resource(show_spinner=False)  # type: ignore[misc]
def load_parquet_with_progress(file_path: str) -> pl.DataFrame:
    """Read a Parquet file into a Polars DataFrame (cached globally with zero-copy).

    Args:
      file_path: Path to the Parquet file to read.

    Returns:
      A Polars DataFrame loaded from the specified parquet file.
    """
    t = time.time()
    df = pl.read_parquet(file_path)
    elapsed = time.time() - t
    logger.info(f"✅ {file_path} loaded in {elapsed:.2f}s - Shape: {df.shape}")
    return df