Source code for cellmil.statistics.stats_printer

import wandb
import pandas as pd
from tqdm import tqdm
import bambi as bmb  # type: ignore
import arviz as az
from markdown_it import MarkdownIt
from weasyprint import HTML  # type: ignore
from typing import Any, cast
from statsmodels.formula.api import mixedlm  # type: ignore
from concurrent.futures import ThreadPoolExecutor, as_completed
from cellmil.interfaces.StatsPrinterConfig import StatsPrinterConfig
from cellmil.utils.wandb import WandbClient
from cellmil.utils import logger


[docs]class StatsPrinter: # Constants for configuration keys FEATURES_KEY = "FEATURES" GNNS_KEY = "GNNs" MILS_KEY = "MILs" GNN_KEY = "GNN" MIL_KEY = "MIL" # Constants for DataFrame columns COLUMN_EXPERIMENT_ID = "EXPERIMENT_ID" COLUMN_TASK = "TASK" METRICS = ["f1", "recall", "precision", "auroc"]
[docs] def __init__(self, config: StatsPrinterConfig): self.config = config wandb.login() # Ensure wandb is logged in self.tasks: list[str] = ["ADENOvsSQUA", "PDL1", "DCR", "OS6", "OS24"] # TODO: Make this configurable self.base_run_config: dict[str, Any] = { self.FEATURES_KEY: "RESNET", self.GNN_KEY: None, self.MIL_KEY: "ATTENTION", } self.run_configs: dict[str, Any] = { self.FEATURES_KEY: [ "RESNET", "GIGAPATH", "UNI", "MORPHO", "PYRAD", "GRAPH", "ALL", ], self.GNNS_KEY: [None, "GAT", "SMALLWORLD"], self.MILS_KEY: ["ATTENTION", "CLAM", "HEAD4TYPE"], } # TODO: ---- self.df = pd.DataFrame() self.wandb_client = WandbClient( team=self.config.team, projects=self.config.projects, tasks=self.tasks ) self.runs = self.wandb_client.get_runs(preprocess=True) logger.info(f"Total accessible runs after preprocessing: {len(self.runs)}") self._load_runs_into_df() if self.df.empty: raise RuntimeError( "DataFrame is empty after loading runs. Check logs for errors during run processing." ) print(self.df.head())
def _load_runs_into_df(self): def process_run(run: Any) -> dict[str, str | int | None | float]: """Process a single run and return its data.""" experiment_id = self.wandb_client.get_experiment_id(run) run_data: dict[str, str | int | None | float] = { self.COLUMN_EXPERIMENT_ID: experiment_id, self.COLUMN_TASK: self.wandb_client.get_task(experiment_id), **self._get_run_config(experiment_id), **{ metric: self.wandb_client.get_metric(run, metric) for metric in self.METRICS }, } return run_data data: list[dict[str, Any]] = [] # Use ThreadPoolExecutor for parallel processing (better for I/O-bound operations) with ThreadPoolExecutor(max_workers=8) as executor: # Submit all tasks future_to_run = { executor.submit(process_run, run): run for run in self.runs } # Process completed tasks with progress bar with tqdm(total=len(self.runs), desc="Loading runs into DataFrame") as pbar: for future in as_completed(future_to_run): try: run_data = future.result() data.append(run_data) except Exception as e: run = future_to_run[future] logger.error(f"Error processing run {run.name}: {e}") finally: pbar.update(1) self.df = pd.DataFrame(data) logger.info(f"Loaded {len(self.df)} runs into DataFrame.")
[docs] def _get_run_config(self, experiment_id: str) -> dict[str, int]: """ Get the run configuration associated with a given experiment ID as one-hot encoded features. Excludes base configuration options. Args: experiment_id: The ID of the experiment Returns: A dictionary with one-hot encoded columns for each configuration option (excluding base config) """ run_config: dict[str, int] = {} # Check for each feature type (excluding base) for feature in self.run_configs[self.FEATURES_KEY]: if feature != self.base_run_config[self.FEATURES_KEY]: run_config[feature] = 1 if feature in experiment_id else 0 # Check for each GNN type (excluding base, which is None) for gnn in self.run_configs[self.GNNS_KEY]: if gnn is not None and gnn != self.base_run_config[self.GNN_KEY]: run_config[gnn] = 1 if gnn in experiment_id else 0 # Check for each MIL type (excluding base) for mil in self.run_configs[self.MILS_KEY]: if mil != self.base_run_config[self.MIL_KEY]: run_config[mil] = 1 if mil in experiment_id else 0 return run_config
[docs] def _fit_frequentist_models( self, metric: str, config_columns: list[str] ) -> dict[str, dict[str, Any]]: """ Fit frequentist Linear Mixed Effects models for each task. Args: config_columns: List of configuration column names Returns: Dictionary mapping task names to their model results """ logger.info("Fitting frequentist LME models...") results: dict[str, dict[str, Any]] = {} for task in self.tasks: task_df = self.df[self.df[self.COLUMN_TASK] == task].copy() if task_df.empty: logger.warning(f"No data found for task: {task}") results[task] = {"error": "No data available"} continue # Only include configs that have variance available_configs = [ config for config in config_columns if task_df[config].nunique() >= 2 ] if not available_configs: logger.warning(f"No configurations with variance for task: {task}") results[task] = {"error": "No configurations with variance"} continue logger.info(f"Available configurations for {task}: {available_configs}") # Create formula fixed_effects_formula = " + ".join(available_configs) formula = f"{metric} ~ {fixed_effects_formula}" try: logger.info(f"Fitting LME model for {task}: {formula}") # Fit the model model = mixedlm( formula, data=task_df, groups=task_df[self.COLUMN_EXPERIMENT_ID] ) result = model.fit(method="powell", reml=True) # type: ignore # Store results results[task] = { "model": result, "formula": formula, "available_configs": available_configs, "task_df": task_df, "n_runs": len(task_df), "n_experiments": task_df[self.COLUMN_EXPERIMENT_ID].nunique(), "mean_metric": task_df[metric].mean(), "std_metric": task_df[metric].std(), } logger.info(f"Successfully fitted model for {task}") except Exception as e: logger.error(f"Error fitting LME for task {task}: {e}") results[task] = {"error": str(e)} import traceback traceback.print_exc() return results
[docs] def _fit_bayesian_models( self, metric: str, config_columns: list[str] ) -> dict[str, dict[str, Any]]: """ Fit Bayesian hierarchical models for each task using Bambi. Args: config_columns: List of configuration column names Returns: Dictionary mapping task names to their model results """ logger.info("Fitting Bayesian hierarchical models...") results: dict[str, dict[str, Any]] = {} for task in self.tasks: task_df = self.df[self.df[self.COLUMN_TASK] == task].copy() if task_df.empty: logger.warning(f"No data found for task: {task}") results[task] = {"error": "No data available"} continue # Only include configs that have variance available_configs = [ config for config in config_columns if task_df[config].nunique() >= 2 ] if not available_configs: logger.warning(f"No configurations with variance for task: {task}") results[task] = {"error": "No configurations with variance"} continue logger.info( f"Available configurations for {task} (Bayesian): {available_configs}" ) # Create formula with random intercept fixed_effects_formula = " + ".join(available_configs) formula = ( f"{metric} ~ {fixed_effects_formula} + (1|{self.COLUMN_EXPERIMENT_ID})" ) try: logger.info(f"Fitting Bayesian model for {task}: {formula}") # Build Bambi model model = bmb.Model(formula, data=task_df) # Fit the model with MCMC # Increase target_accept to reduce divergences idata = model.fit( # type: ignore draws=2000, tune=1000, chains=4, random_seed=42, target_accept=0.95, # Increase from default 0.8 to reduce divergences ) # Get posterior summary using arviz summary = az.summary(idata, hdi_prob=0.95) # type: ignore # Store results results[task] = { "model": model, "idata": idata, "summary": summary, "formula": formula, "available_configs": available_configs, "task_df": task_df, "n_runs": len(task_df), "n_experiments": task_df[self.COLUMN_EXPERIMENT_ID].nunique(), "mean_metric": task_df[metric].mean(), "std_metric": task_df[metric].std(), } logger.info(f"Successfully fitted Bayesian model for {task}") except Exception as e: logger.error(f"Error fitting Bayesian model for task {task}: {e}") results[task] = {"error": str(e)} import traceback traceback.print_exc() return results
[docs] def create(self, metric: str): """ Perform both frequentist and Bayesian analyses, then generate a comprehensive report. """ logger.info("Starting statistical analysis (Frequentist + Bayesian)...") # Get all configuration columns (exclude metadata and ALL metrics) config_columns = [ col for col in self.df.columns if col not in [self.COLUMN_EXPERIMENT_ID, self.COLUMN_TASK] + self.METRICS ] logger.info(f"Configuration columns: {config_columns}") # Fit both types of models frequentist_results = self._fit_frequentist_models(metric, config_columns) bayesian_results = self._fit_bayesian_models(metric, config_columns) # bayesian_results = {} # Temporarily disable Bayesian fitting for faster testing # Generate the report markdown_content = self._generate_report( metric, config_columns, frequentist_results, bayesian_results ) # Write markdown to file output_md_file = "statistical_analysis_report.md" with open(output_md_file, "w", encoding="utf-8") as f: f.write(markdown_content) logger.info(f"Markdown report saved to: {output_md_file}") # Generate PDF from markdown output_pdf_file = "statistical_analysis_report.pdf" try: self._generate_pdf_from_markdown(markdown_content, output_pdf_file) logger.info(f"PDF report generated: {output_pdf_file}") except Exception as e: logger.error(f"Failed to generate PDF: {e}") markdown_lines = markdown_content.split("\n") section_count = len([line for line in markdown_lines if line.startswith("## ")]) print(f" Total sections: {section_count}") print(f" Total lines: {len(markdown_lines)}")
[docs] def _generate_report( self, metric: str, config_columns: list[str], frequentist_results: dict[str, dict[str, Any]], bayesian_results: dict[str, dict[str, Any]], ) -> str: """ Generate a comprehensive markdown report with both frequentist and Bayesian results. Args: config_columns: List of configuration column names frequentist_results: Results from frequentist LME models bayesian_results: Results from Bayesian hierarchical models Returns: Complete markdown report as a string """ markdown_lines: list[str] = [] # Header markdown_lines.append("# Statistical Analysis Report") markdown_lines.append("") markdown_lines.append( f"**Generated on:** {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}" ) markdown_lines.append("") # Overview markdown_lines.append("## Overview") markdown_lines.append("") markdown_lines.append(f"- **Total Runs Analyzed:** {len(self.df)}") markdown_lines.append( f"- **Total Experiments:** {self.df[self.COLUMN_EXPERIMENT_ID].nunique()}" ) markdown_lines.append(f"- **Tasks:** {', '.join(self.tasks)}") markdown_lines.append( f"- **Configuration Options:** {', '.join(config_columns)}" ) markdown_lines.append("") # Methodology markdown_lines.append("## Methodology") markdown_lines.append("") markdown_lines.append( "This report presents results from two complementary statistical approaches:" ) markdown_lines.append("") markdown_lines.append("### 1. Frequentist Linear Mixed Effects (LME) Models") markdown_lines.append("") markdown_lines.append( "- **Fixed Effects:** Configuration options (binary indicators)" ) markdown_lines.append( "- **Random Effects:** Random intercept for each experiment ID (5-fold CV)" ) markdown_lines.append("- **Estimation:** REML (Restricted Maximum Likelihood)") markdown_lines.append("- **Inference:** p-values and confidence intervals") markdown_lines.append("") markdown_lines.append("### 2. Bayesian Hierarchical Models") markdown_lines.append("") markdown_lines.append( "- **Fixed Effects:** Configuration options (binary indicators)" ) markdown_lines.append( "- **Random Effects:** Random intercept for each experiment ID (5-fold CV)" ) markdown_lines.append( "- **Estimation:** MCMC sampling (2000 draws, 1000 tuning, 4 chains, target_accept=0.95)" ) markdown_lines.append( "- **Inference:** Posterior distributions and 95% HDI (Highest Density Intervals)" ) markdown_lines.append("") markdown_lines.append("**Outcome Variable:** Maximum validation F1 score") markdown_lines.append("") markdown_lines.append("---") markdown_lines.append("") # Results for each task for task in self.tasks: freq_result = frequentist_results.get(task, {}) bayes_result = bayesian_results.get(task, {}) markdown_lines.append(f"## Task: {task}") markdown_lines.append("") # Check if we have errors if "error" in freq_result and "error" in bayes_result: markdown_lines.append("**No analysis available for this task**") markdown_lines.append("") markdown_lines.append(f"- Frequentist: {freq_result['error']}") markdown_lines.append(f"- Bayesian: {bayes_result['error']}") markdown_lines.append("") markdown_lines.append("---") markdown_lines.append("") continue # Dataset summary (use whichever result is available) result_for_summary = ( freq_result if "n_runs" in freq_result else bayes_result ) if "n_runs" in result_for_summary: markdown_lines.append("### Dataset Summary") markdown_lines.append("") markdown_lines.append( f"- **Total runs:** {result_for_summary['n_runs']}" ) markdown_lines.append( f"- **Total experiments:** {result_for_summary['n_experiments']}" ) # markdown_lines.append( # # f"- **Mean F1 score:** {result_for_summary['mean_f1']:.4f} ± {result_for_summary['std_f1']:.4f}" # ) markdown_lines.append("") # Frequentist results if "model" in freq_result: markdown_lines.extend( self._format_frequentist_results(metric, freq_result) ) elif "error" in freq_result: markdown_lines.append("### Frequentist Analysis Error") markdown_lines.append("") markdown_lines.append(f"```\n{freq_result['error']}\n```") markdown_lines.append("") # Bayesian results if "model" in bayes_result: markdown_lines.extend(self._format_bayesian_results(bayes_result)) elif "error" in bayes_result: markdown_lines.append("### Bayesian Analysis Error") markdown_lines.append("") markdown_lines.append(f"```\n{bayes_result['error']}\n```") markdown_lines.append("") markdown_lines.append("---") markdown_lines.append("") # Overall summary markdown_lines.append("## Overall Summary Across All Tasks") markdown_lines.append("") markdown_lines.append( "This section provides descriptive statistics aggregated across all tasks." ) markdown_lines.append("") for config in config_columns: if self.df[config].nunique() < 2: continue markdown_lines.append(f"### {config}") markdown_lines.append("") markdown_lines.append("| Level | Mean F1 | Std Dev | N |") markdown_lines.append("|-------|---------|---------|---|") for level in sorted(self.df[config].unique()): # type: ignore level_data = cast(pd.DataFrame, self.df[self.df[config] == level]) level_label = "Yes" if level == 1 else "No" markdown_lines.append( f"| {level_label} | {level_data[metric].mean():.4f} | " f"{level_data[metric].std():.4f} | {len(level_data)} |" ) markdown_lines.append("") return "\n".join(markdown_lines)
[docs] def _format_frequentist_results( self, metric: str, result: dict[str, Any] ) -> list[str]: """Format frequentist LME results for the markdown report.""" lines: list[str] = [] model = result["model"] available_configs = result["available_configs"] task_df = result["task_df"] lines.append("### Frequentist Analysis (Linear Mixed Effects)") lines.append("") lines.append(f"**Model Specification:** `{result['formula']}`") lines.append("") lines.append( f"**Random Effect:** Random intercept by `{self.COLUMN_EXPERIMENT_ID}`" ) lines.append("") # Fixed effects table lines.append("#### Fixed Effects") lines.append("") # Intercept if "Intercept" in model.params.index: intercept = model.params["Intercept"] intercept_se = model.bse["Intercept"] lines.append( f"**Intercept (Baseline F1):** {intercept:.4f} ± {intercept_se:.4f}" ) lines.append("") lines.append( "> Expected F1 for baseline configuration (RESNET + no GNN + ATTENTION)." ) lines.append("") # Configuration effects table lines.append( "| Configuration | Coefficient | Std Error | 95% CI | p-value | Sig |" ) lines.append( "|---------------|-------------|-----------|--------|---------|-----|" ) for config in available_configs: if config in model.params.index: coef = model.params[config] pval = model.pvalues[config] stderr = model.bse[config] ci_lower = model.conf_int().loc[config, 0] ci_upper = model.conf_int().loc[config, 1] significance = ( "***" if pval < 0.001 else "**" if pval < 0.01 else "*" if pval < 0.05 else "ns" ) lines.append( f"| {config} | {coef:+.4f} | {stderr:.4f} | " f"[{ci_lower:.4f}, {ci_upper:.4f}] | {pval:.4f} | {significance} |" ) lines.append("") lines.append( "*Significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant*" ) lines.append("") # Significant effects interpretation lines.append("**Interpretation of Significant Effects:**") lines.append("") significant_found = False for config in available_configs: if config in model.params.index: coef = model.params[config] pval = model.pvalues[config] if pval < 0.05: significant_found = True direction = "increases" if coef > 0 else "decreases" arrow = "⬆️" if coef > 0 else "⬇️" lines.append( f"- {arrow} **{config}:** {direction.capitalize()} F1 by " f"{abs(coef):.4f} (p={pval:.4f})" ) if not significant_found: lines.append("*No statistically significant effects (p < 0.05)*") lines.append("") # Random effects lines.append("#### Random Effects") lines.append("") random_effect_var = ( model.cov_re.iloc[0, 0] if hasattr(model, "cov_re") else model.scale ) residual_var = model.scale total_var = random_effect_var + residual_var icc = random_effect_var / total_var lines.append("| Component | Value |") lines.append("|-----------|-------|") lines.append(f"| Random intercept variance (τ²) | {random_effect_var:.6f} |") lines.append(f"| Residual variance (σ²) | {residual_var:.6f} |") lines.append(f"| **ICC** | **{icc:.4f}** |") lines.append("") lines.append( f"> **ICC:** {icc * 100:.2f}% of variance is between experiments. " f"Measures similarity within 5-fold CV." ) lines.append("") # Descriptive stats lines.append("#### Descriptive Statistics") lines.append("") for config in available_configs: lines.append(f"**{config}:**") lines.append("") lines.append("| Level | Mean F1 | Std Dev | N |") lines.append("|-------|---------|---------|---|") for level in sorted(task_df[config].unique()): level_data = task_df[task_df[config] == level] level_label = "Yes" if level == 1 else "No" lines.append( f"| {level_label} | {level_data[metric].mean():.4f} | " f"{level_data[metric].std():.4f} | {len(level_data)} |" ) lines.append("") return lines
[docs] def _format_bayesian_results(self, result: dict[str, Any]) -> list[str]: """Format Bayesian hierarchical model results for the markdown report.""" lines: list[str] = [] summary = result["summary"] available_configs = result["available_configs"] idata = result["idata"] # Get inference data for posterior samples lines.append("### Bayesian Analysis (Hierarchical Model)") lines.append("") lines.append(f"**Model Specification:** `{result['formula']}`") lines.append("") lines.append( "**MCMC Settings:** 2000 draws, 1000 tuning, 4 chains, target_accept=0.95" ) lines.append("") # Fixed effects table lines.append("#### Posterior Distributions (Fixed Effects)") lines.append("") lines.append("| Parameter | Mean | SD | 95% HDI | R-hat |") lines.append("|-----------|------|----|---------|----|") for param in summary.index: if param == "Intercept" or param in available_configs: mean_val = summary.loc[param, "mean"] sd_val = summary.loc[param, "sd"] # arviz uses hdi_2.5% and hdi_97.5% by default hdi_lower = summary.loc[param, "hdi_2.5%"] hdi_upper = summary.loc[param, "hdi_97.5%"] r_hat = summary.loc[param, "r_hat"] lines.append( f"| {param} | {mean_val:.4f} | {sd_val:.4f} | " f"[{hdi_lower:.4f}, {hdi_upper:.4f}] | {r_hat:.3f} |" ) lines.append("") lines.append("*HDI: Highest Density Interval (Bayesian credible interval)*") lines.append("*R-hat: Convergence diagnostic (should be < 1.01)*") lines.append("") # Interpretation with probabilities lines.append("**Interpretation:**") lines.append("") # Access posterior samples for probability calculations posterior = idata.posterior for config in available_configs: if config in summary.index: mean_val = summary.loc[config, "mean"] hdi_lower = summary.loc[config, "hdi_2.5%"] hdi_upper = summary.loc[config, "hdi_97.5%"] # Calculate probability of increase/decrease from posterior samples if config in posterior.data_vars: # Get all posterior samples for this parameter samples = posterior[config].values.flatten() prob_positive = ( samples > 0 ).mean() * 100 # Probability of increase prob_negative = ( samples < 0 ).mean() * 100 # Probability of decrease # Check if credible interval excludes zero if hdi_lower > 0 and hdi_upper > 0: arrow = "⬆️" lines.append( f"- {arrow} **{config}:** Increases F1 by {abs(mean_val):.4f} " f"(95% HDI excludes 0)" ) lines.append( f" - Probability of increase: {prob_positive:.1f}%, " f"decrease: {prob_negative:.1f}%" ) elif hdi_lower < 0 and hdi_upper < 0: arrow = "⬇️" lines.append( f"- {arrow} **{config}:** Decreases F1 by {abs(mean_val):.4f} " f"(95% HDI excludes 0)" ) lines.append( f" - Probability of increase: {prob_positive:.1f}%, " f"decrease: {prob_negative:.1f}%" ) else: lines.append( f"- **{config}:** Effect uncertain (95% HDI includes 0)" ) lines.append( f" - Probability of increase: {prob_positive:.1f}%, " f"decrease: {prob_negative:.1f}%" ) else: # Fallback if samples not available if (hdi_lower > 0 and hdi_upper > 0) or ( hdi_lower < 0 and hdi_upper < 0 ): direction = "increases" if mean_val > 0 else "decreases" arrow = "⬆️" if mean_val > 0 else "⬇️" lines.append( f"- {arrow} **{config}:** {direction.capitalize()} F1 by " f"{abs(mean_val):.4f} (95% HDI excludes 0)" ) else: lines.append( f"- **{config}:** Effect uncertain (95% HDI includes 0)" ) lines.append("") # Random effects variance lines.append("#### Random Effects (Group-Level)") lines.append("") # Find group-level standard deviation in summary group_params = [ idx for idx in summary.index if self.COLUMN_EXPERIMENT_ID in idx ] if group_params: # Limit to top 10 by absolute mean to fit on page group_data: list[tuple[Any, ...]] = [] for param in group_params: mean_val = summary.loc[param, "mean"] sd_val = summary.loc[param, "sd"] hdi_lower = summary.loc[param, "hdi_2.5%"] hdi_upper = summary.loc[param, "hdi_97.5%"] group_data.append((param, mean_val, sd_val, hdi_lower, hdi_upper)) # Sort by absolute mean (largest effects first) group_data.sort(key=lambda x: abs(x[1]), reverse=True) # Show only top 10 to fit on page lines.append( f"**Top 10 Experiment Random Effects** (out of {len(group_params)} total):" ) lines.append("") lines.append("| Experiment | Mean | SD | 95% HDI |") lines.append("|------------|------|----|---------|") for param, mean_val, sd_val, hdi_lower, hdi_upper in group_data[:10]: # Shorten parameter name - extract just the experiment ID # Format is typically like "1|EXPERIMENT_ID[experiment_name]" short_name = param if "[" in param and "]" in param: # Extract text between brackets short_name = param[param.find("[") + 1 : param.find("]")] elif "|" in param: # Take last part after | short_name = param.split("|")[-1] lines.append( f"| {short_name} | {mean_val:.4f} | {sd_val:.4f} | " f"[{hdi_lower:.4f}, {hdi_upper:.4f}] |" ) lines.append("") lines.append( "*Showing top 10 experiments with largest random effects. " "Full results available in the inference data object.*" ) lines.append("") return lines
[docs] def _generate_pdf_from_markdown(self, markdown_content: str, output_file: str): """ Convert markdown content to PDF using markdown_it and weasyprint. Args: markdown_content: The markdown text to convert output_file: Path to the output PDF file """ # Initialize markdown-it with table support md = MarkdownIt("commonmark", {"breaks": True, "html": True}).enable("table") # Convert markdown to HTML html_body = md.render(markdown_content) # Create a complete HTML document with styling html_template = f""" <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Statistical Analysis Report</title> <style> @page {{ size: A4; margin: 2cm; }} body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; line-height: 1.6; color: #333; max-width: 100%; }} h1 {{ color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; margin-top: 0; }} h2 {{ color: #2980b9; border-bottom: 2px solid #bdc3c7; padding-bottom: 8px; margin-top: 30px; page-break-after: avoid; }} h3 {{ color: #34495e; margin-top: 20px; page-break-after: avoid; }} h4 {{ color: #7f8c8d; margin-top: 15px; page-break-after: avoid; }} table {{ border-collapse: collapse; width: 100%; margin: 15px 0; font-size: 0.85em; page-break-inside: avoid; table-layout: auto; }} th {{ background-color: #3498db; color: white; padding: 8px; text-align: left; font-weight: bold; word-wrap: break-word; }} td {{ padding: 6px; border: 1px solid #ddd; word-wrap: break-word; overflow-wrap: break-word; max-width: 200px; }} tr:nth-child(even) {{ background-color: #f8f9fa; }} tr:hover {{ background-color: #e8f4f8; }} blockquote {{ border-left: 4px solid #3498db; padding-left: 15px; margin: 15px 0; background-color: #ecf0f1; padding: 10px 15px; font-style: italic; page-break-inside: avoid; }} code {{ background-color: #f8f9fa; padding: 2px 6px; border-radius: 3px; font-family: 'Courier New', monospace; font-size: 0.9em; }} pre {{ background-color: #f8f9fa; padding: 15px; border-radius: 5px; overflow-x: auto; page-break-inside: avoid; }} hr {{ border: none; border-top: 1px solid #bdc3c7; margin: 30px 0; }} ul, ol {{ margin: 10px 0; padding-left: 30px; }} li {{ margin: 5px 0; }} p {{ margin: 10px 0; text-align: justify; }} .page-break {{ page-break-after: always; }} em {{ color: #7f8c8d; font-size: 0.9em; }} strong {{ color: #2c3e50; }} </style> </head> <body> {html_body} </body> </html> """ # Convert HTML to PDF HTML(string=html_template).write_pdf(output_file) # type: ignore logger.info(f"PDF successfully generated: {output_file}")