137 bthg() : _ref_year(current_year_()) {}
140 bthg& operator=(
const bthg&) =
delete;
142 bthg& operator=(
bthg&&) noexcept = default;
150 static bthg inst{auto_probe_tag{}};
162 return generate_(lookup_entry_(cca2), draw_seed_(), {});
167 std::uint64_t call_seed)
const
169 return generate_(lookup_entry_(cca2), call_seed, {});
182 if (_entries.empty())
184 throw std::runtime_error(
185 "No birth data loaded. Call load() first.");
187 effolkronium::random_local rng;
188 rng.seed(
static_cast<std::mt19937::result_type
>(
189 call_seed ^ (call_seed >> seed_shift_)));
192 ? _country_dist(rng.engine())
193 : _country_uniform(rng.engine());
194 return get_birth(_cca2_order[idx], call_seed);
202 return generate_(lookup_entry_(cca2), draw_seed_(),
203 {.fix_sex =
true, .forced_sex = bio_sex});
208 std::uint64_t call_seed)
const
210 return generate_(lookup_entry_(cca2), call_seed,
211 {.fix_sex =
true, .forced_sex = bio_sex});
218 auto seed = draw_seed_();
219 if (_entries.empty())
221 throw std::runtime_error(
222 "No birth data loaded. Call load() first.");
224 effolkronium::random_local rng;
225 rng.seed(
static_cast<std::mt19937::result_type
>(
228 ? _country_dist(rng.engine())
229 : _country_uniform(rng.engine());
230 return generate_(lookup_entry_(_cca2_order[idx]),
seed,
231 {.fix_sex =
true, .forced_sex = bio_sex});
237 std::uint64_t call_seed)
const
239 if (_entries.empty())
241 throw std::runtime_error(
242 "No birth data loaded. Call load() first.");
244 effolkronium::random_local rng;
245 rng.seed(
static_cast<std::mt19937::result_type
>(
246 call_seed ^ (call_seed >> seed_shift_)));
249 ? _country_dist(rng.engine())
250 : _country_uniform(rng.engine());
251 return get_birth(_cca2_order[idx], bio_sex, call_seed);
262 return generate_(lookup_entry_(cca2), draw_seed_(),
263 {.fix_year =
true, .forced_year = year.value});
268 std::uint64_t call_seed)
const
270 return generate_(lookup_entry_(cca2), call_seed,
271 {.fix_year =
true, .forced_year = year.value});
280 return generate_(lookup_entry_(cca2), draw_seed_(),
281 {.fix_sex =
true, .forced_sex = bio_sex,
282 .fix_year =
true, .forced_year = year.value});
289 std::uint64_t call_seed)
const
291 return generate_(lookup_entry_(cca2), call_seed,
292 {.fix_sex =
true, .forced_sex = bio_sex,
293 .fix_year =
true, .forced_year = year.value});
303 if (range.min > range.max)
305 throw std::invalid_argument(
306 "age_range: min must be <= max");
308 return generate_(lookup_entry_(cca2), draw_seed_(),
309 {.fix_age_range =
true,
310 .age_min = range.min,
311 .age_max = std::min<std::uint8_t>(
313 static_cast<std::uint8_t
>(max_age_))});
320 std::uint64_t call_seed)
const
322 if (range.min > range.max)
324 throw std::invalid_argument(
325 "age_range: min must be <= max");
327 return generate_(lookup_entry_(cca2), call_seed,
328 {.fix_age_range =
true,
329 .age_min = range.min,
330 .age_max = std::min<std::uint8_t>(
332 static_cast<std::uint8_t
>(max_age_))});
340 _engine.seed(seed_value);
347 _engine.seed(std::random_device{}());
359 [[nodiscard]]
bool weighted()
const {
return _weighted; }
364 [[nodiscard]]
bool has_data()
const {
return !_entries.empty(); }
369 return _entries.size();
376 void load(
const std::filesystem::path& dir)
378 if (!std::filesystem::is_directory(dir))
383 load_countries_(dir /
"countries.tsv");
384 load_age_pyramid_(dir /
"age_pyramid.tsv");
385 load_monthly_(dir /
"monthly_births.tsv");
392 std::string_view sub = (tier == dataset::full) ?
"full" :
"lite";
393 auto found = std::ranges::find_if(
394 probe_bases_, [&](std::string_view base) {
395 auto d = std::filesystem::path{base} / sub;
396 return std::filesystem::is_regular_file(
397 d /
"countries.tsv");
399 if (found != probe_bases_.end())
401 load(std::filesystem::path{*found} / sub);
417 double csection_rate{0};
418 double total_male{0};
419 double total_female{0};
422 mutable std::discrete_distribution<unsigned> male_age_dist;
423 mutable std::discrete_distribution<unsigned> female_age_dist;
426 mutable std::discrete_distribution<unsigned> month_dist;
429 std::unordered_map<std::string, entry> _entries;
432 std::vector<std::string> _cca2_order;
433 mutable std::discrete_distribution<std::size_t> _country_dist;
434 mutable std::uniform_int_distribution<std::size_t> _country_uniform;
436 bool _weighted{
true};
439 static constexpr unsigned seed_shift_{32U};
440 static constexpr std::size_t max_age_{100};
442 static constexpr std::array<std::string_view, 3> probe_bases_{
443 "resources",
"../resources",
"birth-generator/resources"};
445 std::mt19937_64 _engine{std::random_device{}()};
447 struct auto_probe_tag {};
449 explicit bthg(auto_probe_tag ) : _ref_year(current_year_())
451 auto found = std::ranges::find_if(
452 probe_bases_, [](std::string_view p) {
453 return std::filesystem::exists(p) &&
454 std::filesystem::is_directory(p);
456 if (found != probe_bases_.end())
458 const std::filesystem::path base{*found};
459 auto lite = base /
"lite";
460 auto full = base /
"full";
461 if (std::filesystem::is_regular_file(
462 lite /
"countries.tsv"))
466 else if (std::filesystem::is_regular_file(
467 full /
"countries.tsv"))
476 static int current_year_()
478 auto now = std::chrono::system_clock::now();
479 auto dp = std::chrono::floor<std::chrono::days>(now);
480 auto ymd = std::chrono::year_month_day{dp};
481 return static_cast<int>(ymd.year());
484 std::uint64_t draw_seed_()
486 return static_cast<std::uint64_t
>(_engine());
489 [[nodiscard]]
const entry& lookup_entry_(
490 std::string_view cca2)
const
492 if (_entries.empty())
494 throw std::runtime_error(
495 "No birth data loaded. Call load() first.");
497 auto it = _entries.find(std::string{cca2});
498 if (it == _entries.end())
500 throw std::invalid_argument(
501 std::string{
"Unknown country code: "} += cca2);
510 sex forced_sex{sex::male};
512 bool fix_year{
false};
513 std::uint16_t forced_year{0};
515 bool fix_age_range{
false};
516 std::uint8_t age_min{0};
517 std::uint8_t age_max{100};
521 [[nodiscard]] birth generate_(
const entry& e,
522 std::uint64_t call_seed,
523 const gen_opts_& opts)
const
525 effolkronium::random_local rng;
526 rng.seed(
static_cast<std::mt19937::result_type
>(
527 call_seed ^ (call_seed >> seed_shift_)));
531 b.country_code = e.cca2;
536 b.bio_sex = opts.forced_sex;
540 const double total = e.total_male + e.total_female;
541 const double male_prob =
542 (total > 0) ? (e.total_male / total) : 0.5;
543 std::bernoulli_distribution sex_dist(1.0 - male_prob);
544 b.bio_sex = sex_dist(rng.engine()) ? sex::female : sex::male;
550 b.year = opts.forced_year;
551 const int age = _ref_year -
static_cast<int>(b.year);
552 b.age =
static_cast<std::uint8_t
>(
553 std::clamp(age, 0,
static_cast<int>(max_age_)));
555 else if (opts.fix_age_range)
558 static constexpr unsigned max_rejection_tries_{200};
559 unsigned raw = (b.bio_sex == sex::male)
560 ? e.male_age_dist(rng.engine())
561 : e.female_age_dist(rng.engine());
562 for (
unsigned r = 0; r < max_rejection_tries_; ++r)
564 if (raw >= opts.age_min && raw <= opts.age_max)
break;
565 raw = (b.bio_sex == sex::male)
566 ? e.male_age_dist(rng.engine())
567 : e.female_age_dist(rng.engine());
570 b.age =
static_cast<std::uint8_t
>(
571 std::clamp(raw,
static_cast<unsigned>(opts.age_min),
572 static_cast<unsigned>(opts.age_max)));
573 b.year =
static_cast<std::uint16_t
>(_ref_year - b.age);
577 b.age = (b.bio_sex == sex::male)
578 ? e.male_age_dist(rng.engine())
579 : e.female_age_dist(rng.engine());
580 b.year =
static_cast<std::uint16_t
>(_ref_year - b.age);
584 b.month =
static_cast<std::uint8_t
>(
585 e.month_dist(rng.engine()) + 1);
588 auto yr = std::chrono::year{
static_cast<int>(b.year)};
589 auto mo = std::chrono::month{b.month};
590 auto last_day =
static_cast<unsigned>(
591 std::chrono::year_month_day_last{yr / mo / std::chrono::last}
593 std::uniform_int_distribution<unsigned> day_dist(1, last_day);
597 static constexpr unsigned max_weekday_retries_{3};
598 static constexpr double weekday_deficit_scale_{0.5};
599 b.day =
static_cast<std::uint8_t
>(day_dist(rng.engine()));
601 std::chrono::weekday final_wd{};
602 for (
unsigned attempt = 0; attempt < max_weekday_retries_;
605 auto ymd = yr / mo / std::chrono::day{b.day};
606 final_wd = std::chrono::weekday{
607 std::chrono::sys_days{ymd}};
608 const unsigned iso = final_wd.iso_encoding();
611 const double reject_p =
612 e.csection_rate * weekday_deficit_scale_;
613 std::bernoulli_distribution reject(reject_p);
614 if (reject(rng.engine()))
616 b.day =
static_cast<std::uint8_t
>(
617 day_dist(rng.engine()));
625 b.weekday =
static_cast<std::uint8_t
>(
626 final_wd.c_encoding());
629 const double le = (b.bio_sex == sex::male) ? e.le_male : e.le_female;
630 b.le_remaining = std::max(0.0, le -
static_cast<double>(b.age));
633 b.cohort = std::string{cohort_label_(b.year)};
638 static std::string_view cohort_label_(
int year)
640 if (year <= 1927)
return "Greatest Generation";
641 if (year <= 1945)
return "Silent Generation";
642 if (year <= 1964)
return "Baby Boomer";
643 if (year <= 1980)
return "Generation X";
644 if (year <= 1996)
return "Millennial";
645 if (year <= 2012)
return "Generation Z";
646 return "Generation Alpha";
652 static double parse_double_(std::string_view str,
653 double fallback = 0.0)
655 if (str.empty()) {
return fallback; }
658 std::from_chars(str.data(), str.data() + str.size(), val);
659 return ec == std::errc{} ? val : fallback;
662 static std::vector<std::string> split_tab_(
const std::string& line)
664 std::vector<std::string> fields;
666 for (
auto part : line | std::views::split(
'\t'))
668 fields.emplace_back(std::ranges::begin(part),
669 std::ranges::end(part));
674 void load_countries_(
const std::filesystem::path& path)
676 if (!std::filesystem::is_regular_file(path))
return;
678 std::ifstream file{path};
679 if (!file.is_open())
return;
682 if (!std::getline(file, line))
return;
686 static constexpr std::size_t min_fields{10};
688 while (std::getline(file, line))
690 if (line.empty())
continue;
691 if (line.back() ==
'\r') line.pop_back();
693 auto f = split_tab_(line);
694 if (f.size() < min_fields)
continue;
697 e.cca2 = std::move(f[0]);
698 e.cca3 = std::move(f[1]);
699 e.name = std::move(f[2]);
702 e.le_male = parse_double_(f[7]);
703 e.le_female = parse_double_(f[8]);
704 e.csection_rate = parse_double_(f[9]);
706 std::string key{e.cca2};
707 _entries.insert_or_assign(std::move(key), std::move(e));
711 void load_age_pyramid_(
const std::filesystem::path& path)
713 if (!std::filesystem::is_regular_file(path))
return;
715 std::ifstream file{path};
716 if (!file.is_open())
return;
719 if (!std::getline(file, line))
return;
722 static constexpr std::size_t expected_cols{1 + 2 * (max_age_ + 1)};
724 while (std::getline(file, line))
726 if (line.empty())
continue;
727 if (line.back() ==
'\r') line.pop_back();
729 auto f = split_tab_(line);
730 if (f.size() < expected_cols)
continue;
732 auto it = _entries.find(f[0]);
733 if (it == _entries.end())
continue;
735 auto& e = it->second;
737 std::vector<double> male_w(max_age_ + 1);
738 std::vector<double> female_w(max_age_ + 1);
743 for (std::size_t a = 0; a <= max_age_; ++a)
745 const double mv = parse_double_(f[1 + a]);
746 const double fv = parse_double_(f[1 + (max_age_ + 1) + a]);
747 male_w[a] = std::max(mv, 0.0);
748 female_w[a] = std::max(fv, 0.0);
749 e.total_male += male_w[a];
750 e.total_female += female_w[a];
754 if (e.total_male <= 0)
756 std::ranges::fill(male_w, 1.0);
757 e.total_male =
static_cast<double>(max_age_ + 1);
759 if (e.total_female <= 0)
761 std::ranges::fill(female_w, 1.0);
762 e.total_female =
static_cast<double>(max_age_ + 1);
765 e.male_age_dist = std::discrete_distribution<unsigned>(
766 male_w.begin(), male_w.end());
767 e.female_age_dist = std::discrete_distribution<unsigned>(
768 female_w.begin(), female_w.end());
772 void load_monthly_(
const std::filesystem::path& path)
774 if (!std::filesystem::is_regular_file(path))
return;
776 std::ifstream file{path};
777 if (!file.is_open())
return;
780 if (!std::getline(file, line))
return;
783 static constexpr std::size_t expected_cols{13};
785 while (std::getline(file, line))
787 if (line.empty())
continue;
788 if (line.back() ==
'\r') line.pop_back();
790 auto f = split_tab_(line);
791 if (f.size() < expected_cols)
continue;
793 auto it = _entries.find(f[0]);
794 if (it == _entries.end())
continue;
796 std::vector<double> w(12);
797 for (std::size_t m = 0; m < 12; ++m)
799 w[m] = std::max(parse_double_(f[1 + m], 1.0), 0.0);
801 it->second.month_dist =
802 std::discrete_distribution<unsigned>(w.begin(), w.end());
806 void rebuild_indices_()
809 _cca2_order.reserve(_entries.size());
811 std::vector<double> weights;
812 weights.reserve(_entries.size());
814 for (
auto& [cca2, e] : _entries)
817 if (e.total_male + e.total_female <= 0)
continue;
819 _cca2_order.push_back(cca2);
821 std::max(e.total_male + e.total_female, 1.0));
824 if (!_cca2_order.empty())
826 _country_dist = std::discrete_distribution<std::size_t>(
827 weights.begin(), weights.end());
829 std::uniform_int_distribution<std::size_t>(
830 0, _cca2_order.size() - 1);