Coverage for orchestr_ant_ion / dummy.py: 74%
43 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-19 08:36 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-19 08:36 +0000
1"""Simple dummy ML preprocessing pipeline for tests and demos."""
3from __future__ import annotations
5import numpy as np
6from loguru import logger
9class SimpleMLPreprocessor:
10 """Generate synthetic features and labels with a simple pipeline."""
12 def __init__(self, n_samples: int) -> None:
13 """Initialize the preprocessor with the number of samples."""
14 self.n_samples = n_samples
15 self._rng = np.random.default_rng()
16 self.features = np.array([])
17 self.labels = np.array([])
18 self.normalized = np.array([])
19 self.stats = {}
21 def generate_synthetic_data(self) -> tuple[np.ndarray, np.ndarray]:
22 """Generate synthetic features and labels."""
23 logger.debug(
24 "Generating {} samples of synthetic features and labels...",
25 self.n_samples,
26 )
27 self.features = self._rng.normal(5.0, 2.0, (self.n_samples, 3))
28 self.labels = (self.features.sum(axis=1) > 15).astype(int)
29 logger.info("First 5 feature vectors: {}", self.features[:5])
30 logger.info("First 5 labels: {}", self.labels[:5])
31 return self.features, self.labels
33 def normalize_features(self) -> np.ndarray:
34 """Normalize features with z-score normalization."""
35 if self.features.size == 0:
36 logger.warning("No features to normalize.")
37 return np.array([])
39 mean = self.features.mean(axis=0)
40 std = self.features.std(axis=0)
41 self.normalized = np.where(std == 0, 0.0, (self.features - mean) / std)
43 self.stats = {"mean": mean.tolist(), "std": std.tolist()}
44 logger.debug("Feature normalization stats: {}", self.stats)
45 return self.normalized
47 def apply_joke_labeling(self) -> np.ndarray:
48 """Convert numeric labels into joke strings."""
49 if self.labels.size == 0:
50 logger.warning("No labels to convert into jokes.")
51 return np.array([])
53 jokes = np.where(self.labels == 1, "Definitely ML", "Possibly Not")
54 logger.info("First 5 joke labels: {}", jokes[:5])
55 return jokes
57 def run_pipeline(self) -> dict[str, object]:
58 """Run the full preprocessing pipeline and return results."""
59 logger.info(
60 "Running ML preprocessing pipeline for {} samples...",
61 self.n_samples,
62 )
63 self.generate_synthetic_data()
64 self.normalize_features()
65 jokes = self.apply_joke_labeling()
67 result = {
68 "features": self.features,
69 "labels": self.labels,
70 "normalized": self.normalized,
71 **self.stats,
72 "joke_labels": jokes,
73 }
74 logger.success("ML pipeline complete!")
75 return result