Coverage for orchestr_ant_ion/dummy.py: 74%

1"""Simple dummy ML preprocessing pipeline for tests and demos."""

3from __future__ import annotations

5import numpy as np

6from loguru import logger

9class SimpleMLPreprocessor:

10 """Generate synthetic features and labels with a simple pipeline."""

12 def __init__(self, n_samples: int) -> None:

13 """Initialize the preprocessor with the number of samples."""

14 self.n_samples = n_samples

15 self._rng = np.random.default_rng()

16 self.features = np.array([])

17 self.labels = np.array([])

18 self.normalized = np.array([])

19 self.stats = {}

21 def generate_synthetic_data(self) -> tuple[np.ndarray, np.ndarray]:

22 """Generate synthetic features and labels."""

23 logger.debug(

24 "Generating {} samples of synthetic features and labels...",

25 self.n_samples,

26 )

27 self.features = self._rng.normal(5.0, 2.0, (self.n_samples, 3))

28 self.labels = (self.features.sum(axis=1) > 15).astype(int)

29 logger.info("First 5 feature vectors: {}", self.features[:5])

30 logger.info("First 5 labels: {}", self.labels[:5])

31 return self.features, self.labels

33 def normalize_features(self) -> np.ndarray:

34 """Normalize features with z-score normalization."""

35 if self.features.size == 0:

36 logger.warning("No features to normalize.")

37 return np.array([])

39 mean = self.features.mean(axis=0)

40 std = self.features.std(axis=0)

41 self.normalized = np.where(std == 0, 0.0, (self.features - mean) / std)

43 self.stats = {"mean": mean.tolist(), "std": std.tolist()}

44 logger.debug("Feature normalization stats: {}", self.stats)

45 return self.normalized

47 def apply_joke_labeling(self) -> np.ndarray:

48 """Convert numeric labels into joke strings."""

49 if self.labels.size == 0:

50 logger.warning("No labels to convert into jokes.")

51 return np.array([])

53 jokes = np.where(self.labels == 1, "Definitely ML", "Possibly Not")

54 logger.info("First 5 joke labels: {}", jokes[:5])

55 return jokes

57 def run_pipeline(self) -> dict[str, object]:

58 """Run the full preprocessing pipeline and return results."""

59 logger.info(

60 "Running ML preprocessing pipeline for {} samples...",

61 self.n_samples,

62 )

63 self.generate_synthetic_data()

64 self.normalize_features()

65 jokes = self.apply_joke_labeling()

67 result = {

68 "features": self.features,

69 "labels": self.labels,

70 "normalized": self.normalized,

71 **self.stats,

72 "joke_labels": jokes,

73 }

74 logger.success("ML pipeline complete!")

75 return result

Coverage for orchestr_ant_ion / dummy.py: 74%

43 statements