266 bdg& operator=(
const bdg&) =
delete;
267 bdg(
bdg&&) noexcept = default;
268 bdg& operator=(
bdg&&) noexcept = default;
276 static bdg inst{auto_probe_tag{}};
288 return generate_(lookup_entry_(cca2), draw_seed_(), {});
293 std::uint64_t call_seed)
const
295 return generate_(lookup_entry_(cca2), call_seed, {});
308 return get_biodata(pick_random_cca2_(call_seed), call_seed);
316 return generate_(lookup_entry_(cca2), draw_seed_(),
317 {.fix_sex =
true, .forced_sex = bio_sex});
322 std::uint64_t call_seed)
const
324 return generate_(lookup_entry_(cca2), call_seed,
325 {.fix_sex =
true, .forced_sex = bio_sex});
332 auto seed = draw_seed_();
333 return generate_(lookup_entry_(pick_random_cca2_(
seed)),
seed,
334 {.fix_sex =
true, .forced_sex = bio_sex});
340 std::uint64_t call_seed)
const
342 return get_biodata(pick_random_cca2_(call_seed), bio_sex,
352 _engine.seed(seed_value);
359 _engine.seed(std::random_device{}());
366 [[nodiscard]]
bool has_data()
const {
return !_entries.empty(); }
371 return _entries.size();
379 void load(
const std::filesystem::path& dir)
381 if (!std::filesystem::is_directory(dir))
387 load_biodata_(dir /
"biodata.tsv");
394 std::string_view sub = (tier == dataset::full) ?
"full" :
"lite";
395 auto found = std::ranges::find_if(
396 probe_bases_, [&](std::string_view base) {
397 auto d = std::filesystem::path{base} / sub;
398 return std::filesystem::is_regular_file(
401 if (found != probe_bases_.end())
403 load(std::filesystem::path{*found} / sub);
418 double male_height_mean{170.0};
419 double male_height_sd{7.0};
420 double female_height_mean{157.0};
421 double female_height_sd{6.5};
424 double male_bmi_mean{24.5};
425 double female_bmi_mean{25.0};
429 std::array<double, 3> eye_weights{0.0, 5.0, 95.0};
432 std::array<double, 4> hair_weights{90.0, 8.0, 1.0, 1.0};
435 std::array<double, 6> skin_weights{2.0, 8.0, 18.0, 30.0,
439 std::array<double, 8> blood_weights{37.0, 28.0, 20.0, 5.0,
443 double left_handed_pct{10.6};
446 std::unordered_map<std::string, entry> _entries;
449 std::vector<std::string> _cca2_order;
450 mutable std::uniform_int_distribution<std::size_t> _country_uniform;
452 static constexpr unsigned seed_shift_{32U};
454 static constexpr std::array<std::string_view, 3> probe_bases_{
455 "resources",
"../resources",
"biodata-generator/resources"};
457 std::mt19937_64 _engine{std::random_device{}()};
459 struct auto_probe_tag {};
461 explicit bdg(auto_probe_tag )
463 auto found = std::ranges::find_if(
464 probe_bases_, [](std::string_view p) {
465 return std::filesystem::exists(p) &&
466 std::filesystem::is_directory(p);
468 if (found != probe_bases_.end())
470 const std::filesystem::path base{*found};
471 auto lite = base /
"lite";
472 auto full = base /
"full";
473 if (std::filesystem::is_regular_file(
474 lite /
"biodata.tsv"))
478 else if (std::filesystem::is_regular_file(
479 full /
"biodata.tsv"))
488 std::uint64_t draw_seed_()
490 return static_cast<std::uint64_t
>(_engine());
493 [[nodiscard]]
const entry& lookup_entry_(
494 std::string_view cca2)
const
496 if (_entries.empty())
498 throw std::runtime_error(
499 "No biodata loaded. Call load() first.");
501 auto it = _entries.find(std::string{cca2});
502 if (it == _entries.end())
504 throw std::invalid_argument(
505 std::format(
"Unknown country code: {}", cca2));
511 [[nodiscard]]
const std::string& pick_random_cca2_(
512 std::uint64_t call_seed)
const
514 if (_entries.empty())
516 throw std::runtime_error(
517 "No biodata loaded. Call load() first.");
519 effolkronium::random_local rng;
520 rng.seed(
static_cast<std::mt19937::result_type
>(
521 call_seed ^ (call_seed >> seed_shift_)));
522 return _cca2_order[_country_uniform(rng.engine())];
529 sex forced_sex{sex::male};
533 [[nodiscard]] biodata generate_(
const entry& e,
534 std::uint64_t call_seed,
535 const gen_opts_& opts)
const
537 effolkronium::random_local rng;
538 rng.seed(
static_cast<std::mt19937::result_type
>(
539 call_seed ^ (call_seed >> seed_shift_)));
543 b.country_code = e.cca2;
551 b.bio_sex = opts.forced_sex;
555 std::bernoulli_distribution sex_dist(0.5);
556 b.bio_sex = sex_dist(rng.engine()) ? sex::female
561 const double h_mean = (b.bio_sex == sex::male)
563 : e.female_height_mean;
564 const double h_sd = (b.bio_sex == sex::male)
566 : e.female_height_sd;
567 std::normal_distribution<double> height_dist(h_mean, h_sd);
568 b.height_cm = std::clamp(height_dist(rng.engine()),
570 h_mean + 4.0 * h_sd);
573 const double bmi_mean = (b.bio_sex == sex::male)
576 const double bmi_sd = e.bmi_sd;
578 const double bmi_var = bmi_sd * bmi_sd;
579 const double ln_sigma2 =
580 std::log1p(bmi_var / (bmi_mean * bmi_mean));
582 std::log(bmi_mean) - 0.5 * ln_sigma2;
583 const double ln_sigma = std::sqrt(ln_sigma2);
584 std::lognormal_distribution<double> bmi_dist(ln_mu, ln_sigma);
585 b.bmi = std::clamp(bmi_dist(rng.engine()), 14.0, 55.0);
588 const double height_m = b.height_cm / 100.0;
589 b.weight_kg = b.bmi * height_m * height_m;
592 std::discrete_distribution<unsigned> eye_dist(
593 e.eye_weights.begin(), e.eye_weights.end());
594 b.eyes =
static_cast<eye_color>(eye_dist(rng.engine()));
597 std::discrete_distribution<unsigned> hair_dist(
598 e.hair_weights.begin(), e.hair_weights.end());
599 b.hair =
static_cast<hair_color>(hair_dist(rng.engine()));
602 std::discrete_distribution<unsigned> skin_dist(
603 e.skin_weights.begin(), e.skin_weights.end());
605 skin_dist(rng.engine()) + 1);
608 std::discrete_distribution<unsigned> blood_dist(
609 e.blood_weights.begin(), e.blood_weights.end());
610 b.blood =
static_cast<blood_type>(blood_dist(rng.engine()));
613 std::bernoulli_distribution hand_dist(
614 e.left_handed_pct / 100.0);
615 b.hand = hand_dist(rng.engine()) ? handedness::left
624 static double parse_double_(std::string_view str,
625 double fallback = 0.0)
627 if (str.empty()) {
return fallback; }
630 std::from_chars(str.data(),
631 str.data() + str.size(), val);
632 return ec == std::errc{} ? val : fallback;
635 static std::vector<std::string> split_tab_(std::string_view line)
637 std::vector<std::string> fields;
639 for (
auto part : line | std::views::split(
'\t'))
641 fields.emplace_back(std::ranges::begin(part),
642 std::ranges::end(part));
647 void load_biodata_(
const std::filesystem::path& path)
649 if (!std::filesystem::is_regular_file(path))
return;
651 std::ifstream file{path};
652 if (!file.is_open())
return;
655 if (!std::getline(file, line))
return;
667 static constexpr std::size_t min_fields{31};
669 while (std::getline(file, line))
671 if (line.empty())
continue;
672 if (line.back() ==
'\r') line.pop_back();
674 auto f = split_tab_(line);
675 if (f.size() < min_fields)
continue;
678 e.cca2 = std::move(f[0]);
679 e.name = std::move(f[1]);
682 e.male_height_mean = parse_double_(f[2], 170.0);
683 e.male_height_sd = parse_double_(f[3], 7.0);
684 e.female_height_mean = parse_double_(f[4], 157.0);
685 e.female_height_sd = parse_double_(f[5], 6.5);
688 e.male_bmi_mean = parse_double_(f[6], 24.5);
689 e.female_bmi_mean = parse_double_(f[7], 25.0);
690 e.bmi_sd = parse_double_(f[8], 4.5);
695 parse_double_(f[10]),
701 parse_double_(f[12]),
702 parse_double_(f[13]),
703 parse_double_(f[14]),
709 parse_double_(f[16]),
710 parse_double_(f[17]),
711 parse_double_(f[18]),
712 parse_double_(f[19]),
713 parse_double_(f[20]),
719 parse_double_(f[22]),
720 parse_double_(f[23]),
721 parse_double_(f[24]),
722 parse_double_(f[25]),
723 parse_double_(f[26]),
724 parse_double_(f[27]),
725 parse_double_(f[28]),
730 e.left_handed_pct = parse_double_(f[30], 10.6);
732 std::string key{e.cca2};
733 _entries.insert_or_assign(std::move(key), std::move(e));
737 void rebuild_indices_()
740 _cca2_order.reserve(_entries.size());
742 for (
const auto& key : _entries | std::views::keys)
744 _cca2_order.push_back(key);
746 std::ranges::sort(_cca2_order);
748 if (!_cca2_order.empty())
751 std::uniform_int_distribution<std::size_t>(
752 0, _cca2_order.size() - 1);