Birth Generator 1.1.1
Procedural birth date generation for C++23
Loading...
Searching...
No Matches
birthgen.hpp
Go to the documentation of this file.
1#ifndef DASMIG_BIRTHGEN_HPP
2#define DASMIG_BIRTHGEN_HPP
3
4#include "random.hpp"
5#include <algorithm>
6#include <array>
7#include <charconv>
8#include <chrono>
9#include <cstddef>
10#include <cstdint>
11#include <filesystem>
12#include <format>
13#include <fstream>
14#include <ostream>
15#include <random>
16#include <ranges>
17#include <stdexcept>
18#include <string>
19#include <string_view>
20#include <system_error>
21#include <unordered_map>
22#include <utility>
23#include <vector>
24
25/// @file birthgen.hpp
26/// @brief Birth generator library — procedural birthday generation for C++23.
27/// @author Diego Dasso Migotto (diegomigotto at hotmail dot com)
28/// @see See doc/usage.md for the narrative tutorial.
29
30namespace dasmig
31{
32
33/// @brief Dataset size tier for resource loading.
34#ifndef DASMIG_DATASET_DEFINED
35#define DASMIG_DATASET_DEFINED
36enum class dataset : std::uint8_t
37{
38 lite, ///< ~195 sovereign states only.
39 full ///< ~237 countries and territories (UN WPP coverage).
40};
41#endif
42
43/// @brief Biological sex for birth generation.
44#ifndef DASMIG_SEX_DEFINED
45#define DASMIG_SEX_DEFINED
46enum class sex : std::uint8_t
47{
48 male,
49 female
50};
51#endif
52
53/// @brief Strong type for specifying a birth year.
54///
55/// Wraps a plain integer to prevent overload ambiguity with seed
56/// parameters. Must be constructed explicitly: `year_t{1990}`.
57struct year_t
58{
59 std::uint16_t value;
60 constexpr explicit year_t(std::uint16_t v) noexcept : value{v} {}
61};
62
63/// @brief Specifies an inclusive age range [min, max].
65{
66 std::uint8_t min;
67 std::uint8_t max;
68};
69
70/// @brief Return type for birth generation, holding all data fields.
71///
72/// Supports implicit conversion to std::string (returns ISO date)
73/// and streaming via operator<<.
74class birth
75{
76 public:
77 std::string country_code; ///< ISO 3166-1 alpha-2 code.
78 std::uint16_t year{0}; ///< Birth year.
79 std::uint8_t month{0}; ///< Birth month (1–12).
80 std::uint8_t day{0}; ///< Birth day (1–31).
81 std::uint8_t age{0}; ///< Age in completed years.
82 sex bio_sex{sex::male}; ///< Biological sex.
83 std::uint8_t weekday{0}; ///< Day of week (0=Sun, 1=Mon, …, 6=Sat).
84 double le_remaining{0.0}; ///< Estimated years of life remaining.
85 std::string cohort; ///< Generational cohort label.
86
87 /// @brief Retrieve the random seed used to generate this birth.
88 [[nodiscard]] std::uint64_t seed() const { return _seed; }
89
90 /// @brief ISO 8601 date string (YYYY-MM-DD).
91 [[nodiscard]] std::string date_string() const
92 {
93 return std::format("{:04d}-{:02d}-{:02d}",
94 static_cast<int>(year),
95 static_cast<int>(month),
96 static_cast<int>(day));
97 }
98
99 /// @brief Implicit conversion to std::string.
100 /// @return ISO 8601 date string.
101 operator std::string() const // NOLINT(hicpp-explicit-conversions)
102 {
103 return date_string();
104 }
105
106 /// @brief Stream the ISO date to an output stream.
107 friend std::ostream& operator<<(std::ostream& os, const birth& b)
108 {
109 os << b.date_string();
110 return os;
111 }
112
113 private:
114 std::uint64_t _seed{0};
115 friend class bthg;
116};
117
118/// @brief Birth generator that produces demographically plausible
119/// random birthdays using UN WPP 2024 population data.
120///
121/// The generation pipeline:
122/// 1. Select biological sex (population M:F ratio).
123/// 2. Select age from country-specific age pyramid.
124/// 3. Compute birth year = current year − age.
125/// 4. Select month from latitude-based seasonal weights.
126/// 5. Select day within month (weekday-aware deficit).
127///
128/// Can be used as a singleton via instance() or constructed independently.
129///
130/// @par Thread safety
131/// Each instance is independent. Concurrent calls to get_birth() on
132/// the **same** instance require external synchronisation.
133class bthg
134{
135 public:
136 /// @brief Default constructor — creates an empty generator with no data.
137 bthg() : _ref_year(current_year_()) {}
138
139 bthg(const bthg&) = delete;
140 bthg& operator=(const bthg&) = delete;
141 bthg(bthg&&) noexcept = default;
142 bthg& operator=(bthg&&) noexcept = default;
143 ~bthg() = default;
144
145 /// @brief Access the global singleton instance.
146 ///
147 /// Auto-probes common resource paths on first access.
148 static bthg& instance()
149 {
150 static bthg inst{auto_probe_tag{}};
151 return inst;
152 }
153
154 // -- Generation -------------------------------------------------------
155
156 /// @brief Generate a random birth for a specific country.
157 /// @param cca2 ISO 3166-1 alpha-2 country code (e.g. "US", "BR").
158 /// @throws std::runtime_error If no data has been loaded.
159 /// @throws std::invalid_argument If the country code is unknown.
160 [[nodiscard]] birth get_birth(std::string_view cca2)
161 {
162 return generate_(lookup_entry_(cca2), draw_seed_(), {});
163 }
164
165 /// @brief Generate a deterministic birth for a specific country.
166 [[nodiscard]] birth get_birth(std::string_view cca2,
167 std::uint64_t call_seed) const
168 {
169 return generate_(lookup_entry_(cca2), call_seed, {});
170 }
171
172 /// @brief Generate a random birth from a random country.
173 /// @throws std::runtime_error If no data has been loaded.
174 [[nodiscard]] birth get_birth()
175 {
176 return get_birth(draw_seed_());
177 }
178
179 /// @brief Generate a deterministic birth from a random country.
180 [[nodiscard]] birth get_birth(std::uint64_t call_seed) const
181 {
182 if (_entries.empty())
183 {
184 throw std::runtime_error(
185 "No birth data loaded. Call load() first.");
186 }
187 effolkronium::random_local rng;
188 rng.seed(static_cast<std::mt19937::result_type>(
189 call_seed ^ (call_seed >> seed_shift_)));
190
191 auto idx = _weighted
192 ? _country_dist(rng.engine())
193 : _country_uniform(rng.engine());
194 return get_birth(_cca2_order[idx], call_seed); // NOLINT
195 }
196
197 // -- Sex-specific generation ------------------------------------------
198
199 /// @brief Generate a random birth with a predetermined sex.
200 [[nodiscard]] birth get_birth(std::string_view cca2, sex bio_sex)
201 {
202 return generate_(lookup_entry_(cca2), draw_seed_(),
203 {.fix_sex = true, .forced_sex = bio_sex});
204 }
205
206 /// @brief Generate a deterministic birth with a predetermined sex.
207 [[nodiscard]] birth get_birth(std::string_view cca2, sex bio_sex,
208 std::uint64_t call_seed) const
209 {
210 return generate_(lookup_entry_(cca2), call_seed,
211 {.fix_sex = true, .forced_sex = bio_sex});
212 }
213
214 /// @brief Generate a random birth from a random country with a
215 /// predetermined sex.
216 [[nodiscard]] birth get_birth(sex bio_sex)
217 {
218 auto seed = draw_seed_();
219 if (_entries.empty())
220 {
221 throw std::runtime_error(
222 "No birth data loaded. Call load() first.");
223 }
224 effolkronium::random_local rng;
225 rng.seed(static_cast<std::mt19937::result_type>(
226 seed ^ (seed >> seed_shift_)));
227 auto idx = _weighted
228 ? _country_dist(rng.engine())
229 : _country_uniform(rng.engine());
230 return generate_(lookup_entry_(_cca2_order[idx]), seed, // NOLINT
231 {.fix_sex = true, .forced_sex = bio_sex});
232 }
233
234 /// @brief Generate a deterministic birth from a random country with a
235 /// predetermined sex.
236 [[nodiscard]] birth get_birth(sex bio_sex,
237 std::uint64_t call_seed) const
238 {
239 if (_entries.empty())
240 {
241 throw std::runtime_error(
242 "No birth data loaded. Call load() first.");
243 }
244 effolkronium::random_local rng;
245 rng.seed(static_cast<std::mt19937::result_type>(
246 call_seed ^ (call_seed >> seed_shift_)));
247
248 auto idx = _weighted
249 ? _country_dist(rng.engine())
250 : _country_uniform(rng.engine());
251 return get_birth(_cca2_order[idx], bio_sex, call_seed); // NOLINT
252 }
253
254 // -- Year-specific generation -----------------------------------------
255
256 /// @brief Generate a random birth for a specific year.
257 /// @param cca2 ISO 3166-1 alpha-2 country code.
258 /// @param year Birth year (e.g. `year_t{1990}`). Age is derived as
259 /// ref_year − year and clamped to [0, 100].
260 [[nodiscard]] birth get_birth(std::string_view cca2, year_t year)
261 {
262 return generate_(lookup_entry_(cca2), draw_seed_(),
263 {.fix_year = true, .forced_year = year.value});
264 }
265
266 /// @brief Generate a deterministic birth for a specific year.
267 [[nodiscard]] birth get_birth(std::string_view cca2, year_t year,
268 std::uint64_t call_seed) const
269 {
270 return generate_(lookup_entry_(cca2), call_seed,
271 {.fix_year = true, .forced_year = year.value});
272 }
273
274 // -- Sex + year generation --------------------------------------------
275
276 /// @brief Generate a random birth with predetermined sex and year.
277 [[nodiscard]] birth get_birth(std::string_view cca2, sex bio_sex,
278 year_t year)
279 {
280 return generate_(lookup_entry_(cca2), draw_seed_(),
281 {.fix_sex = true, .forced_sex = bio_sex,
282 .fix_year = true, .forced_year = year.value});
283 }
284
285 /// @brief Generate a deterministic birth with predetermined sex and
286 /// year.
287 [[nodiscard]] birth get_birth(std::string_view cca2, sex bio_sex,
288 year_t year,
289 std::uint64_t call_seed) const
290 {
291 return generate_(lookup_entry_(cca2), call_seed,
292 {.fix_sex = true, .forced_sex = bio_sex,
293 .fix_year = true, .forced_year = year.value});
294 }
295
296 // -- Age-range generation ---------------------------------------------
297
298 /// @brief Generate a random birth with age within [range.min,
299 /// range.max].
300 /// @throws std::invalid_argument If range.min > range.max.
301 [[nodiscard]] birth get_birth(std::string_view cca2, age_range range)
302 {
303 if (range.min > range.max)
304 {
305 throw std::invalid_argument(
306 "age_range: min must be <= max");
307 }
308 return generate_(lookup_entry_(cca2), draw_seed_(),
309 {.fix_age_range = true,
310 .age_min = range.min,
311 .age_max = std::min<std::uint8_t>(
312 range.max,
313 static_cast<std::uint8_t>(max_age_))});
314 }
315
316 /// @brief Generate a deterministic birth with age within
317 /// [range.min, range.max].
318 [[nodiscard]] birth get_birth(std::string_view cca2,
319 age_range range,
320 std::uint64_t call_seed) const
321 {
322 if (range.min > range.max)
323 {
324 throw std::invalid_argument(
325 "age_range: min must be <= max");
326 }
327 return generate_(lookup_entry_(cca2), call_seed,
328 {.fix_age_range = true,
329 .age_min = range.min,
330 .age_max = std::min<std::uint8_t>(
331 range.max,
332 static_cast<std::uint8_t>(max_age_))});
333 }
334
335 // -- Seeding ----------------------------------------------------------
336
337 /// @brief Seed the internal random engine for deterministic sequences.
338 bthg& seed(std::uint64_t seed_value)
339 {
340 _engine.seed(seed_value);
341 return *this;
342 }
343
344 /// @brief Reseed the engine with a non-deterministic source.
346 {
347 _engine.seed(std::random_device{}());
348 return *this;
349 }
350
351 /// @brief Set whether country selection is population-weighted.
352 bthg& weighted(bool enable)
353 {
354 _weighted = enable;
355 return *this;
356 }
357
358 /// @brief Query whether country selection is population-weighted.
359 [[nodiscard]] bool weighted() const { return _weighted; }
360
361 // -- Data management --------------------------------------------------
362
363 /// @brief Check whether any data has been loaded.
364 [[nodiscard]] bool has_data() const { return !_entries.empty(); }
365
366 /// @brief Return the number of loaded countries.
367 [[nodiscard]] std::size_t country_count() const
368 {
369 return _entries.size();
370 }
371
372 /// @brief Load birth data from a resource directory.
373 ///
374 /// Expects the directory to contain: countries.tsv,
375 /// age_pyramid.tsv, monthly_births.tsv.
376 void load(const std::filesystem::path& dir)
377 {
378 if (!std::filesystem::is_directory(dir))
379 {
380 return;
381 }
382
383 load_countries_(dir / "countries.tsv");
384 load_age_pyramid_(dir / "age_pyramid.tsv");
385 load_monthly_(dir / "monthly_births.tsv");
386 rebuild_indices_();
387 }
388
389 /// @brief Load a specific dataset tier from auto-probed paths.
390 [[nodiscard]] bool load(dataset tier)
391 {
392 std::string_view sub = (tier == dataset::full) ? "full" : "lite";
393 auto found = std::ranges::find_if(
394 probe_bases_, [&](std::string_view base) {
395 auto d = std::filesystem::path{base} / sub;
396 return std::filesystem::is_regular_file(
397 d / "countries.tsv");
398 });
399 if (found != probe_bases_.end())
400 {
401 load(std::filesystem::path{*found} / sub);
402 return true;
403 }
404 return false;
405 }
406
407 private:
408 // -- Internal data structures -----------------------------------------
409
410 struct entry
411 {
412 std::string cca2;
413 std::string cca3;
414 std::string name;
415 double le_male{0};
416 double le_female{0};
417 double csection_rate{0};
418 double total_male{0};
419 double total_female{0};
420
421 // Pre-built age distributions (indexed 0..MAX_AGE).
422 mutable std::discrete_distribution<unsigned> male_age_dist;
423 mutable std::discrete_distribution<unsigned> female_age_dist;
424
425 // Pre-built month distribution (indexed 0..11).
426 mutable std::discrete_distribution<unsigned> month_dist;
427 };
428
429 std::unordered_map<std::string, entry> _entries;
430
431 // Ordered cca2 list for random-country selection.
432 std::vector<std::string> _cca2_order;
433 mutable std::discrete_distribution<std::size_t> _country_dist;
434 mutable std::uniform_int_distribution<std::size_t> _country_uniform;
435
436 bool _weighted{true};
437 int _ref_year{};
438
439 static constexpr unsigned seed_shift_{32U};
440 static constexpr std::size_t max_age_{100};
441
442 static constexpr std::array<std::string_view, 3> probe_bases_{
443 "resources", "../resources", "birth-generator/resources"};
444
445 std::mt19937_64 _engine{std::random_device{}()};
446
447 struct auto_probe_tag {};
448
449 explicit bthg(auto_probe_tag /*tag*/) : _ref_year(current_year_())
450 {
451 auto found = std::ranges::find_if(
452 probe_bases_, [](std::string_view p) {
453 return std::filesystem::exists(p) &&
454 std::filesystem::is_directory(p);
455 });
456 if (found != probe_bases_.end())
457 {
458 const std::filesystem::path base{*found};
459 auto lite = base / "lite";
460 auto full = base / "full";
461 if (std::filesystem::is_regular_file(
462 lite / "countries.tsv"))
463 {
464 load(lite);
465 }
466 else if (std::filesystem::is_regular_file(
467 full / "countries.tsv"))
468 {
469 load(full);
470 }
471 }
472 }
473
474 // -- Helpers ----------------------------------------------------------
475
476 static int current_year_()
477 {
478 auto now = std::chrono::system_clock::now();
479 auto dp = std::chrono::floor<std::chrono::days>(now);
480 auto ymd = std::chrono::year_month_day{dp};
481 return static_cast<int>(ymd.year());
482 }
483
484 std::uint64_t draw_seed_()
485 {
486 return static_cast<std::uint64_t>(_engine());
487 }
488
489 [[nodiscard]] const entry& lookup_entry_(
490 std::string_view cca2) const
491 {
492 if (_entries.empty())
493 {
494 throw std::runtime_error(
495 "No birth data loaded. Call load() first.");
496 }
497 auto it = _entries.find(std::string{cca2});
498 if (it == _entries.end())
499 {
500 throw std::invalid_argument(
501 std::string{"Unknown country code: "} += cca2);
502 }
503 return it->second;
504 }
505
506 // Optional constraints passed to generate_().
507 struct gen_opts_
508 {
509 bool fix_sex{false};
510 sex forced_sex{sex::male};
511
512 bool fix_year{false};
513 std::uint16_t forced_year{0};
514
515 bool fix_age_range{false};
516 std::uint8_t age_min{0};
517 std::uint8_t age_max{100};
518 };
519
520 // NOLINTNEXTLINE(readability-function-cognitive-complexity)
521 [[nodiscard]] birth generate_(const entry& e,
522 std::uint64_t call_seed,
523 const gen_opts_& opts) const
524 {
525 effolkronium::random_local rng;
526 rng.seed(static_cast<std::mt19937::result_type>(
527 call_seed ^ (call_seed >> seed_shift_)));
528
529 birth b;
530 b._seed = call_seed;
531 b.country_code = e.cca2;
532
533 // 1. Sex — fixed or weighted by total male:female population.
534 if (opts.fix_sex)
535 {
536 b.bio_sex = opts.forced_sex;
537 }
538 else
539 {
540 const double total = e.total_male + e.total_female;
541 const double male_prob =
542 (total > 0) ? (e.total_male / total) : 0.5;
543 std::bernoulli_distribution sex_dist(1.0 - male_prob);
544 b.bio_sex = sex_dist(rng.engine()) ? sex::female : sex::male;
545 }
546
547 // 2. Age / year.
548 if (opts.fix_year)
549 {
550 b.year = opts.forced_year;
551 const int age = _ref_year - static_cast<int>(b.year);
552 b.age = static_cast<std::uint8_t>(
553 std::clamp(age, 0, static_cast<int>(max_age_)));
554 }
555 else if (opts.fix_age_range)
556 {
557 // Rejection-sample within [age_min, age_max].
558 static constexpr unsigned max_rejection_tries_{200};
559 unsigned raw = (b.bio_sex == sex::male)
560 ? e.male_age_dist(rng.engine())
561 : e.female_age_dist(rng.engine());
562 for (unsigned r = 0; r < max_rejection_tries_; ++r)
563 {
564 if (raw >= opts.age_min && raw <= opts.age_max) break;
565 raw = (b.bio_sex == sex::male)
566 ? e.male_age_dist(rng.engine())
567 : e.female_age_dist(rng.engine());
568 }
569 // Clamp as fallback if rejection sampling exhausted.
570 b.age = static_cast<std::uint8_t>(
571 std::clamp(raw, static_cast<unsigned>(opts.age_min),
572 static_cast<unsigned>(opts.age_max)));
573 b.year = static_cast<std::uint16_t>(_ref_year - b.age);
574 }
575 else
576 {
577 b.age = (b.bio_sex == sex::male)
578 ? e.male_age_dist(rng.engine())
579 : e.female_age_dist(rng.engine());
580 b.year = static_cast<std::uint16_t>(_ref_year - b.age);
581 }
582
583 // 3. Month — from seasonal weights.
584 b.month = static_cast<std::uint8_t>(
585 e.month_dist(rng.engine()) + 1);
586
587 // 4. Day within month.
588 auto yr = std::chrono::year{static_cast<int>(b.year)};
589 auto mo = std::chrono::month{b.month};
590 auto last_day = static_cast<unsigned>(
591 std::chrono::year_month_day_last{yr / mo / std::chrono::last}
592 .day());
593 std::uniform_int_distribution<unsigned> day_dist(1, last_day);
594
595 // Apply weekday deficit: weekend births are less likely in
596 // countries with high C-section / scheduled delivery rates.
597 static constexpr unsigned max_weekday_retries_{3};
598 static constexpr double weekday_deficit_scale_{0.5};
599 b.day = static_cast<std::uint8_t>(day_dist(rng.engine()));
600
601 std::chrono::weekday final_wd{};
602 for (unsigned attempt = 0; attempt < max_weekday_retries_;
603 ++attempt)
604 {
605 auto ymd = yr / mo / std::chrono::day{b.day};
606 final_wd = std::chrono::weekday{
607 std::chrono::sys_days{ymd}};
608 const unsigned iso = final_wd.iso_encoding(); // 1=Mon..7=Sun
609 if (iso >= 6) // Saturday or Sunday
610 {
611 const double reject_p =
612 e.csection_rate * weekday_deficit_scale_;
613 std::bernoulli_distribution reject(reject_p);
614 if (reject(rng.engine()))
615 {
616 b.day = static_cast<std::uint8_t>(
617 day_dist(rng.engine()));
618 continue;
619 }
620 }
621 break;
622 }
623
624 // 5. Weekday of final date.
625 b.weekday = static_cast<std::uint8_t>(
626 final_wd.c_encoding()); // 0=Sun..6=Sat
627
628 // 6. Life expectancy remaining.
629 const double le = (b.bio_sex == sex::male) ? e.le_male : e.le_female;
630 b.le_remaining = std::max(0.0, le - static_cast<double>(b.age));
631
632 // 7. Generational cohort.
633 b.cohort = std::string{cohort_label_(b.year)};
634
635 return b;
636 }
637
638 static std::string_view cohort_label_(int year)
639 {
640 if (year <= 1927) return "Greatest Generation";
641 if (year <= 1945) return "Silent Generation";
642 if (year <= 1964) return "Baby Boomer";
643 if (year <= 1980) return "Generation X";
644 if (year <= 1996) return "Millennial";
645 if (year <= 2012) return "Generation Z";
646 return "Generation Alpha";
647 }
648
649 // -- Loading ----------------------------------------------------------
650
651 // Locale-independent double parser via std::from_chars.
652 static double parse_double_(std::string_view str,
653 double fallback = 0.0)
654 {
655 if (str.empty()) { return fallback; }
656 double val{};
657 auto [ptr, ec] =
658 std::from_chars(str.data(), str.data() + str.size(), val); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
659 return ec == std::errc{} ? val : fallback;
660 }
661
662 static std::vector<std::string> split_tab_(const std::string& line)
663 {
664 std::vector<std::string> fields;
665 fields.reserve(16);
666 for (auto part : line | std::views::split('\t'))
667 {
668 fields.emplace_back(std::ranges::begin(part),
669 std::ranges::end(part));
670 }
671 return fields;
672 }
673
674 void load_countries_(const std::filesystem::path& path)
675 {
676 if (!std::filesystem::is_regular_file(path)) return;
677
678 std::ifstream file{path};
679 if (!file.is_open()) return;
680
681 std::string line;
682 if (!std::getline(file, line)) return; // skip header
683
684 // Header: cca2 cca3 name region subregion latitude
685 // independent le_male le_female csection_rate
686 static constexpr std::size_t min_fields{10};
687
688 while (std::getline(file, line))
689 {
690 if (line.empty()) continue;
691 if (line.back() == '\r') line.pop_back();
692
693 auto f = split_tab_(line);
694 if (f.size() < min_fields) continue;
695
696 entry e;
697 e.cca2 = std::move(f[0]);
698 e.cca3 = std::move(f[1]);
699 e.name = std::move(f[2]);
700 // fields 3..5 are region, subregion, latitude (metadata)
701 // field 6: independent
702 e.le_male = parse_double_(f[7]);
703 e.le_female = parse_double_(f[8]);
704 e.csection_rate = parse_double_(f[9]);
705
706 std::string key{e.cca2};
707 _entries.insert_or_assign(std::move(key), std::move(e));
708 }
709 }
710
711 void load_age_pyramid_(const std::filesystem::path& path)
712 {
713 if (!std::filesystem::is_regular_file(path)) return;
714
715 std::ifstream file{path};
716 if (!file.is_open()) return;
717
718 std::string line;
719 if (!std::getline(file, line)) return; // skip header
720
721 // Header: cca2 m0..m100 f0..f100 (203 columns)
722 static constexpr std::size_t expected_cols{1 + 2 * (max_age_ + 1)};
723
724 while (std::getline(file, line))
725 {
726 if (line.empty()) continue;
727 if (line.back() == '\r') line.pop_back();
728
729 auto f = split_tab_(line);
730 if (f.size() < expected_cols) continue;
731
732 auto it = _entries.find(f[0]);
733 if (it == _entries.end()) continue;
734
735 auto& e = it->second;
736
737 std::vector<double> male_w(max_age_ + 1);
738 std::vector<double> female_w(max_age_ + 1);
739
740 e.total_male = 0;
741 e.total_female = 0;
742
743 for (std::size_t a = 0; a <= max_age_; ++a)
744 {
745 const double mv = parse_double_(f[1 + a]);
746 const double fv = parse_double_(f[1 + (max_age_ + 1) + a]);
747 male_w[a] = std::max(mv, 0.0);
748 female_w[a] = std::max(fv, 0.0);
749 e.total_male += male_w[a];
750 e.total_female += female_w[a];
751 }
752
753 // Ensure at least some weight to avoid degenerate distributions.
754 if (e.total_male <= 0)
755 {
756 std::ranges::fill(male_w, 1.0);
757 e.total_male = static_cast<double>(max_age_ + 1);
758 }
759 if (e.total_female <= 0)
760 {
761 std::ranges::fill(female_w, 1.0);
762 e.total_female = static_cast<double>(max_age_ + 1);
763 }
764
765 e.male_age_dist = std::discrete_distribution<unsigned>(
766 male_w.begin(), male_w.end());
767 e.female_age_dist = std::discrete_distribution<unsigned>(
768 female_w.begin(), female_w.end());
769 }
770 }
771
772 void load_monthly_(const std::filesystem::path& path)
773 {
774 if (!std::filesystem::is_regular_file(path)) return;
775
776 std::ifstream file{path};
777 if (!file.is_open()) return;
778
779 std::string line;
780 if (!std::getline(file, line)) return; // skip header
781
782 // Header: cca2 jan feb ... dec (13 columns)
783 static constexpr std::size_t expected_cols{13};
784
785 while (std::getline(file, line))
786 {
787 if (line.empty()) continue;
788 if (line.back() == '\r') line.pop_back();
789
790 auto f = split_tab_(line);
791 if (f.size() < expected_cols) continue;
792
793 auto it = _entries.find(f[0]);
794 if (it == _entries.end()) continue;
795
796 std::vector<double> w(12);
797 for (std::size_t m = 0; m < 12; ++m)
798 {
799 w[m] = std::max(parse_double_(f[1 + m], 1.0), 0.0);
800 }
801 it->second.month_dist =
802 std::discrete_distribution<unsigned>(w.begin(), w.end());
803 }
804 }
805
806 void rebuild_indices_()
807 {
808 _cca2_order.clear();
809 _cca2_order.reserve(_entries.size());
810
811 std::vector<double> weights;
812 weights.reserve(_entries.size());
813
814 for (auto& [cca2, e] : _entries)
815 {
816 // Only include countries that have age pyramid data.
817 if (e.total_male + e.total_female <= 0) continue;
818
819 _cca2_order.push_back(cca2);
820 weights.push_back(
821 std::max(e.total_male + e.total_female, 1.0));
822 }
823
824 if (!_cca2_order.empty())
825 {
826 _country_dist = std::discrete_distribution<std::size_t>(
827 weights.begin(), weights.end());
828 _country_uniform =
829 std::uniform_int_distribution<std::size_t>(
830 0, _cca2_order.size() - 1);
831 }
832 }
833};
834
835} // namespace dasmig
836
837#endif // DASMIG_BIRTHGEN_HPP
@ full
~237 countries and territories (UN WPP coverage).
@ lite
~195 sovereign states only.
Return type for birth generation, holding all data fields.
Definition birthgen.hpp:75
std::uint8_t day
Birth day (1–31).
Definition birthgen.hpp:80
double le_remaining
Estimated years of life remaining.
Definition birthgen.hpp:84
std::uint64_t seed() const
Retrieve the random seed used to generate this birth.
Definition birthgen.hpp:88
std::uint8_t month
Birth month (1–12).
Definition birthgen.hpp:79
sex bio_sex
Biological sex.
Definition birthgen.hpp:82
std::string cohort
Generational cohort label.
Definition birthgen.hpp:85
friend std::ostream & operator<<(std::ostream &os, const birth &b)
Stream the ISO date to an output stream.
Definition birthgen.hpp:107
std::uint8_t age
Age in completed years.
Definition birthgen.hpp:81
std::string date_string() const
ISO 8601 date string (YYYY-MM-DD).
Definition birthgen.hpp:91
std::uint16_t year
Birth year.
Definition birthgen.hpp:78
std::string country_code
ISO 3166-1 alpha-2 code.
Definition birthgen.hpp:77
std::uint8_t weekday
Day of week (0=Sun, 1=Mon, …, 6=Sat).
Definition birthgen.hpp:83
Birth generator that produces demographically plausible random birthdays using UN WPP 2024 population...
Definition birthgen.hpp:134
birth get_birth(std::string_view cca2, sex bio_sex, year_t year, std::uint64_t call_seed) const
Generate a deterministic birth with predetermined sex and year.
Definition birthgen.hpp:287
birth get_birth(std::string_view cca2)
Generate a random birth for a specific country.
Definition birthgen.hpp:160
birth get_birth(std::string_view cca2, sex bio_sex)
Generate a random birth with a predetermined sex.
Definition birthgen.hpp:200
bthg & weighted(bool enable)
Set whether country selection is population-weighted.
Definition birthgen.hpp:352
bool load(dataset tier)
Load a specific dataset tier from auto-probed paths.
Definition birthgen.hpp:390
birth get_birth()
Generate a random birth from a random country.
Definition birthgen.hpp:174
birth get_birth(std::string_view cca2, year_t year)
Generate a random birth for a specific year.
Definition birthgen.hpp:260
bool weighted() const
Query whether country selection is population-weighted.
Definition birthgen.hpp:359
std::size_t country_count() const
Return the number of loaded countries.
Definition birthgen.hpp:367
birth get_birth(sex bio_sex)
Generate a random birth from a random country with a predetermined sex.
Definition birthgen.hpp:216
bthg & unseed()
Reseed the engine with a non-deterministic source.
Definition birthgen.hpp:345
birth get_birth(std::string_view cca2, std::uint64_t call_seed) const
Generate a deterministic birth for a specific country.
Definition birthgen.hpp:166
bool has_data() const
Check whether any data has been loaded.
Definition birthgen.hpp:364
bthg()
Default constructor — creates an empty generator with no data.
Definition birthgen.hpp:137
bthg & seed(std::uint64_t seed_value)
Seed the internal random engine for deterministic sequences.
Definition birthgen.hpp:338
birth get_birth(std::string_view cca2, age_range range)
Generate a random birth with age within [range.min, range.max].
Definition birthgen.hpp:301
birth get_birth(std::string_view cca2, age_range range, std::uint64_t call_seed) const
Generate a deterministic birth with age within [range.min, range.max].
Definition birthgen.hpp:318
birth get_birth(std::string_view cca2, sex bio_sex, std::uint64_t call_seed) const
Generate a deterministic birth with a predetermined sex.
Definition birthgen.hpp:207
void load(const std::filesystem::path &dir)
Load birth data from a resource directory.
Definition birthgen.hpp:376
birth get_birth(std::uint64_t call_seed) const
Generate a deterministic birth from a random country.
Definition birthgen.hpp:180
birth get_birth(std::string_view cca2, year_t year, std::uint64_t call_seed) const
Generate a deterministic birth for a specific year.
Definition birthgen.hpp:267
birth get_birth(sex bio_sex, std::uint64_t call_seed) const
Generate a deterministic birth from a random country with a predetermined sex.
Definition birthgen.hpp:236
static bthg & instance()
Access the global singleton instance.
Definition birthgen.hpp:148
birth get_birth(std::string_view cca2, sex bio_sex, year_t year)
Generate a random birth with predetermined sex and year.
Definition birthgen.hpp:277
Specifies an inclusive age range [min, max].
Definition birthgen.hpp:65
Strong type for specifying a birth year.
Definition birthgen.hpp:58