Coverage for orchestr_ant_ion / dummy.py: 74%

43 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-19 08:36 +0000

1"""Simple dummy ML preprocessing pipeline for tests and demos.""" 

2 

3from __future__ import annotations 

4 

5import numpy as np 

6from loguru import logger 

7 

8 

9class SimpleMLPreprocessor: 

10 """Generate synthetic features and labels with a simple pipeline.""" 

11 

12 def __init__(self, n_samples: int) -> None: 

13 """Initialize the preprocessor with the number of samples.""" 

14 self.n_samples = n_samples 

15 self._rng = np.random.default_rng() 

16 self.features = np.array([]) 

17 self.labels = np.array([]) 

18 self.normalized = np.array([]) 

19 self.stats = {} 

20 

21 def generate_synthetic_data(self) -> tuple[np.ndarray, np.ndarray]: 

22 """Generate synthetic features and labels.""" 

23 logger.debug( 

24 "Generating {} samples of synthetic features and labels...", 

25 self.n_samples, 

26 ) 

27 self.features = self._rng.normal(5.0, 2.0, (self.n_samples, 3)) 

28 self.labels = (self.features.sum(axis=1) > 15).astype(int) 

29 logger.info("First 5 feature vectors: {}", self.features[:5]) 

30 logger.info("First 5 labels: {}", self.labels[:5]) 

31 return self.features, self.labels 

32 

33 def normalize_features(self) -> np.ndarray: 

34 """Normalize features with z-score normalization.""" 

35 if self.features.size == 0: 

36 logger.warning("No features to normalize.") 

37 return np.array([]) 

38 

39 mean = self.features.mean(axis=0) 

40 std = self.features.std(axis=0) 

41 self.normalized = np.where(std == 0, 0.0, (self.features - mean) / std) 

42 

43 self.stats = {"mean": mean.tolist(), "std": std.tolist()} 

44 logger.debug("Feature normalization stats: {}", self.stats) 

45 return self.normalized 

46 

47 def apply_joke_labeling(self) -> np.ndarray: 

48 """Convert numeric labels into joke strings.""" 

49 if self.labels.size == 0: 

50 logger.warning("No labels to convert into jokes.") 

51 return np.array([]) 

52 

53 jokes = np.where(self.labels == 1, "Definitely ML", "Possibly Not") 

54 logger.info("First 5 joke labels: {}", jokes[:5]) 

55 return jokes 

56 

57 def run_pipeline(self) -> dict[str, object]: 

58 """Run the full preprocessing pipeline and return results.""" 

59 logger.info( 

60 "Running ML preprocessing pipeline for {} samples...", 

61 self.n_samples, 

62 ) 

63 self.generate_synthetic_data() 

64 self.normalize_features() 

65 jokes = self.apply_joke_labeling() 

66 

67 result = { 

68 "features": self.features, 

69 "labels": self.labels, 

70 "normalized": self.normalized, 

71 **self.stats, 

72 "joke_labels": jokes, 

73 } 

74 logger.success("ML pipeline complete!") 

75 return result