269 ng& operator=(
ng&&) noexcept = default;
279 static ng inst{auto_probe_tag{}};
288 static const std::map<std::wstring, culture, std::less<>>
290 {L
"ae", culture::emirati}, {L
"af", culture::afghan},
291 {L
"al", culture::albanian}, {L
"ao", culture::angolan},
292 {L
"ar", culture::argentinian}, {L
"at", culture::austrian},
293 {L
"az", culture::azerbaijani}, {L
"bd", culture::bangladeshi},
294 {L
"be", culture::belgian}, {L
"bf", culture::burkinabe},
295 {L
"bg", culture::bulgarian}, {L
"bh", culture::bahraini},
296 {L
"bi", culture::burundian}, {L
"bn", culture::bruneian},
297 {L
"bo", culture::bolivian}, {L
"br", culture::brazilian},
298 {L
"bw", culture::botswanan}, {L
"ca", culture::canadian},
299 {L
"ch", culture::swiss}, {L
"cl", culture::chilean},
300 {L
"cm", culture::cameroonian}, {L
"cn", culture::chinese},
301 {L
"co", culture::colombian}, {L
"cr", culture::costarican},
302 {L
"cy", culture::cypriot}, {L
"cz", culture::czech},
303 {L
"de", culture::german}, {L
"dj", culture::djiboutian},
304 {L
"dk", culture::danish}, {L
"dz", culture::algerian},
305 {L
"ec", culture::ecuadorian}, {L
"ee", culture::estonian},
306 {L
"eg", culture::egyptian}, {L
"es", culture::spanish},
307 {L
"et", culture::ethiopian}, {L
"fi", culture::finnish},
308 {L
"fj", culture::fijian}, {L
"fr", culture::french},
309 {L
"gb", culture::british}, {L
"ge", culture::georgian},
310 {L
"gh", culture::ghanaian}, {L
"gr", culture::greek},
311 {L
"gt", culture::guatemalan}, {L
"hk", culture::hongkonger},
312 {L
"hn", culture::honduran}, {L
"hr", culture::croatian},
313 {L
"ht", culture::haitian}, {L
"hu", culture::hungarian},
314 {L
"id", culture::indonesian}, {L
"ie", culture::irish},
315 {L
"il", culture::israeli}, {L
"in", culture::indian},
316 {L
"iq", culture::iraqi}, {L
"ir", culture::iranian},
317 {L
"is", culture::icelandic}, {L
"it", culture::italian},
318 {L
"jm", culture::jamaican}, {L
"jo", culture::jordanian},
319 {L
"jp", culture::japanese}, {L
"kh", culture::cambodian},
320 {L
"kr", culture::korean}, {L
"kw", culture::kuwaiti},
321 {L
"kz", culture::kazakh}, {L
"lb", culture::lebanese},
322 {L
"lt", culture::lithuanian}, {L
"lu", culture::luxembourgish},
323 {L
"ly", culture::libyan}, {L
"ma", culture::moroccan},
324 {L
"md", culture::moldovan}, {L
"mo", culture::macanese},
325 {L
"mt", culture::maltese}, {L
"mu", culture::mauritian},
326 {L
"mv", culture::maldivian}, {L
"mx", culture::mexican},
327 {L
"my", culture::malaysian}, {L
"na", culture::namibian},
328 {L
"ng", culture::nigerian}, {L
"nl", culture::dutch},
329 {L
"no", culture::norwegian}, {L
"om", culture::omani},
330 {L
"pa", culture::panamanian}, {L
"pe", culture::peruvian},
331 {L
"ph", culture::filipino}, {L
"pl", culture::polish},
332 {L
"pr", culture::puertorican}, {L
"ps", culture::palestinian},
333 {L
"pt", culture::portuguese}, {L
"qa", culture::qatari},
334 {L
"rs", culture::serbian}, {L
"ru", culture::russian},
335 {L
"sa", culture::saudi}, {L
"sd", culture::sudanese},
336 {L
"se", culture::swedish}, {L
"sg", culture::singaporean},
337 {L
"si", culture::slovenian}, {L
"sv", culture::salvadoran},
338 {L
"sy", culture::syrian}, {L
"tm", culture::turkmen},
339 {L
"tn", culture::tunisian}, {L
"tr", culture::turkish},
340 {L
"tw", culture::taiwanese}, {L
"us", culture::american},
341 {L
"uy", culture::uruguayan}, {L
"ye", culture::yemeni},
342 {L
"za", culture::southafrican}};
344 if (
auto it = country_code_map.find(country_code);
345 it != country_code_map.end())
357 static const std::map<std::wstring, gender, std::less<>> gender_map = {
360 {L
"male", gender::m},
361 {L
"female", gender::f}};
363 if (
auto it = gender_map.find(gender_string);
364 it != gender_map.end())
379 auto call_seed =
static_cast<std::uint64_t
>(_engine());
380 auto result = solver(
true, g, c, call_seed);
381 result._seed = call_seed;
392 std::uint64_t call_seed)
394 auto result = solver(
true, g, c, call_seed);
395 result._seed = call_seed;
405 auto call_seed =
static_cast<std::uint64_t
>(_engine());
406 auto result = solver(
false, gender::any, c, call_seed);
407 result._seed = call_seed;
417 std::uint64_t call_seed)
419 auto result = solver(
false, gender::any, c, call_seed);
420 result._seed = call_seed;
436 _engine.seed(seed_value);
444 _engine.seed(std::random_device{}());
454 return !_m_pool.empty() ||
465 void load(
const std::filesystem::path& resource_path)
467 if (std::filesystem::exists(resource_path) &&
468 std::filesystem::is_directory(resource_path))
470 for (
const auto& entry :
471 std::filesystem::recursive_directory_iterator(resource_path))
473 if (entry.is_regular_file() &&
474 (entry.path().extension() ==
".names"))
492 static constexpr std::array probe_paths = {
493 "resources",
"../resources",
"name-generator/resources"};
495 const char* subfolder =
496 (tier == dataset::full) ?
"full" :
"lite";
498 auto found = std::ranges::find_if(probe_paths, [&](
const char* base) {
499 const std::filesystem::path dir =
500 std::filesystem::path{base} / subfolder;
501 return std::filesystem::is_directory(dir);
503 if (found != probe_paths.end())
505 load(std::filesystem::path{*found} / subfolder);
515 std::vector<std::wstring> names;
516 std::vector<double> weights;
520 mutable std::discrete_distribution<std::size_t> dist;
524 std::map<culture, name_pool> _m_pool;
525 std::map<culture, name_pool> _f_pool;
526 std::map<culture, name_pool> _sur_pool;
529 std::mt19937_64 _engine{std::random_device{}()};
532 struct auto_probe_tag {};
535 explicit ng(auto_probe_tag )
537 static constexpr std::array probe_paths = {
538 "resources",
"../resources",
"name-generator/resources"};
540 auto found = std::ranges::find_if(probe_paths, [](
const char* p) {
541 return std::filesystem::exists(p) &&
542 std::filesystem::is_directory(p);
544 if (found != probe_paths.end())
546 const std::filesystem::path base{*found};
547 auto lite_dir = base /
"lite";
548 auto full_dir = base /
"full";
549 if (std::filesystem::is_directory(lite_dir))
553 else if (std::filesystem::is_directory(full_dir))
565 static culture resolve_culture(culture c,
566 const std::map<culture, name_pool>& db,
567 effolkronium::random_local& engine)
569 if (c != culture::any) {
return c; }
572 throw std::invalid_argument(
"No names loaded for any culture");
574 auto idx = engine.get<std::size_t>(0, db.size() - 1);
575 auto it = db.begin();
576 std::advance(it,
static_cast<std::ptrdiff_t
>(idx));
581 static gender resolve_gender(gender g,
582 effolkronium::random_local& engine)
584 if (g == gender::any)
586 return static_cast<gender>(engine.get<std::size_t>(0, 1));
592 [[nodiscard]]
static const char* culture_label(culture c)
594 static constexpr std::array labels = {
595 "afghan",
"albanian",
"algerian",
596 "american",
"angolan",
"argentinian",
597 "austrian",
"azerbaijani",
"bahraini",
598 "bangladeshi",
"belgian",
"bolivian",
599 "botswanan",
"brazilian",
"british",
600 "bruneian",
"bulgarian",
"burkinabe",
601 "burundian",
"cambodian",
"cameroonian",
602 "canadian",
"chilean",
"chinese",
603 "colombian",
"costarican",
"croatian",
604 "cypriot",
"czech",
"danish",
605 "djiboutian",
"dutch",
"ecuadorian",
606 "egyptian",
"emirati",
"estonian",
607 "ethiopian",
"fijian",
"filipino",
608 "finnish",
"french",
"georgian",
609 "german",
"ghanaian",
"greek",
610 "guatemalan",
"haitian",
"honduran",
611 "hongkonger",
"hungarian",
"icelandic",
612 "indian",
"indonesian",
"iranian",
613 "iraqi",
"irish",
"israeli",
614 "italian",
"jamaican",
"japanese",
615 "jordanian",
"kazakh",
"korean",
616 "kuwaiti",
"lebanese",
"libyan",
617 "lithuanian",
"luxembourgish",
"macanese",
618 "malaysian",
"maldivian",
"maltese",
619 "mauritian",
"mexican",
"moldovan",
620 "moroccan",
"namibian",
"nigerian",
621 "norwegian",
"omani",
"palestinian",
622 "panamanian",
"peruvian",
"polish",
623 "portuguese",
"puertorican",
"qatari",
624 "russian",
"salvadoran",
"saudi",
625 "serbian",
"singaporean",
"slovenian",
626 "southafrican",
"spanish",
"sudanese",
627 "swedish",
"swiss",
"syrian",
628 "taiwanese",
"tunisian",
"turkish",
629 "turkmen",
"uruguayan",
"yemeni",
631 auto idx =
static_cast<std::size_t
>(c);
632 if (idx < labels.size()) {
return labels.at(idx); }
637 [[nodiscard]]
static const char* gender_label(gender g)
641 case gender::m:
return "male";
642 case gender::f:
return "female";
643 case gender::any:
return "any";
649 [[nodiscard]]
static std::wstring pick(
650 const std::map<culture, name_pool>& db,
651 culture c, gender g, effolkronium::random_local& engine)
653 auto it = db.find(c);
654 if (it == db.end() || it->second.names.empty())
656 throw std::invalid_argument(
657 std::string(
"No ") + gender_label(g) +
658 " names loaded for culture '" + culture_label(c) +
"'");
660 const auto& pool = it->second;
661 if (!pool.weights.empty())
665 return pool.names.at(pool.dist(engine.engine()));
667 return *engine.get(pool.names);
671 static constexpr unsigned seed_fold_shift = 32U;
674 void append_name_impl(name& n, gender g, culture c)
676 auto call_seed =
static_cast<std::uint64_t
>(_engine());
677 effolkronium::random_local call_engine;
678 call_engine.seed(
static_cast<std::mt19937::result_type
>(
679 (call_seed ^ (call_seed >> seed_fold_shift))));
681 const gender resolved_g = resolve_gender(g, call_engine);
683 const auto& db = (resolved_g == gender::f)
686 const culture resolved_c = resolve_culture(c, db, call_engine);
688 const std::wstring part = pick(db, resolved_c, resolved_g,
690 n._parts.push_back(part);
691 n._full_string.append(L
" ").append(part);
695 void append_surname_impl(name& n, culture c)
697 auto call_seed =
static_cast<std::uint64_t
>(_engine());
698 effolkronium::random_local call_engine;
699 call_engine.seed(
static_cast<std::mt19937::result_type
>(
700 (call_seed ^ (call_seed >> seed_fold_shift))));
702 const culture resolved_c = resolve_culture(c,
703 _sur_pool, call_engine);
704 const std::wstring part = pick(_sur_pool,
705 resolved_c, gender::any, call_engine);
706 n._parts.push_back(part);
707 n._full_string.append(L
" ").append(part);
711 [[nodiscard]] name solver(
bool is_name, gender requested_gender,
712 culture requested_culture,
713 std::uint64_t call_seed)
715 effolkronium::random_local call_engine;
716 call_engine.seed(
static_cast<std::mt19937::result_type
>(
717 (call_seed ^ (call_seed >> seed_fold_shift))));
719 const gender resolved_gender = resolve_gender(requested_gender,
724 const auto& db = (resolved_gender == gender::f)
726 const culture resolved_culture = resolve_culture(
727 requested_culture, db, call_engine);
728 return {pick(db, resolved_culture, resolved_gender,
730 resolved_gender, resolved_culture,
this};
733 const culture resolved_culture = resolve_culture(
734 requested_culture, _sur_pool, call_engine);
735 return {pick(_sur_pool,
736 resolved_culture, gender::any, call_engine),
737 resolved_gender, resolved_culture,
this};
746 static std::wstring utf8_to_wstring(
const std::string& utf8)
749 static constexpr unsigned char ascii_max = 0x80U;
750 static constexpr unsigned char two_byte_mask = 0xE0U;
751 static constexpr unsigned char two_byte_lead = 0xC0U;
752 static constexpr unsigned char two_byte_val = 0x1FU;
753 static constexpr unsigned char three_byte_mask = 0xF0U;
754 static constexpr unsigned char three_byte_lead = 0xE0U;
755 static constexpr unsigned char three_byte_val = 0x0FU;
756 static constexpr unsigned char four_byte_mask = 0xF8U;
757 static constexpr unsigned char four_byte_lead = 0xF0U;
758 static constexpr unsigned char four_byte_val = 0x07U;
759 static constexpr unsigned char cont_val = 0x3FU;
760 static constexpr unsigned char cont_check_mask = 0xC0U;
761 static constexpr unsigned char cont_check_lead = 0x80U;
762 static constexpr unsigned cont_shift = 6U;
763 static constexpr char32_t max_codepoint = 0x10FFFFU;
765 static constexpr char32_t surrogate_offset = 0x10000U;
766 static constexpr char32_t high_surrogate_base = 0xD800U;
767 static constexpr char32_t low_surrogate_base = 0xDC00U;
768 static constexpr unsigned surrogate_shift = 10U;
769 static constexpr char32_t surrogate_mask = 0x3FFU;
772 result.reserve(utf8.size());
775 while (i < utf8.size())
777 char32_t codepoint = 0;
778 auto lead =
static_cast<unsigned char>(utf8.at(i));
779 std::size_t extra = 0;
781 if (lead < ascii_max)
785 else if ((lead & two_byte_mask) == two_byte_lead)
787 codepoint = lead & two_byte_val;
790 else if ((lead & three_byte_mask) == three_byte_lead)
792 codepoint = lead & three_byte_val;
795 else if ((lead & four_byte_mask) == four_byte_lead)
797 codepoint = lead & four_byte_val;
808 for (std::size_t j = 0; j < extra; ++j, ++i)
810 if (i >= utf8.size())
815 auto byte =
static_cast<unsigned char>(utf8.at(i));
816 if ((
byte & cont_check_mask) != cont_check_lead)
821 codepoint = (codepoint << cont_shift) |
822 (
static_cast<char32_t>(
byte) &
823 static_cast<char32_t>(cont_val));
826 if (!valid || codepoint > max_codepoint)
831 if constexpr (
sizeof(wchar_t) >= 4)
833 result.push_back(
static_cast<wchar_t>(codepoint));
837 if (codepoint < surrogate_offset)
839 result.push_back(
static_cast<wchar_t>(codepoint));
843 const char32_t shifted = codepoint - surrogate_offset;
844 result.push_back(
static_cast<wchar_t>(
845 high_surrogate_base + (shifted >> surrogate_shift)));
846 result.push_back(
static_cast<wchar_t>(
847 low_surrogate_base + (shifted & surrogate_mask)));
858 void parse_file(
const std::filesystem::path& file)
860 std::ifstream tentative_file{file};
862 if (tentative_file.is_open())
864 std::string raw_line;
867 if (!std::getline(tentative_file, raw_line))
871 if (!raw_line.empty() && raw_line.back() ==
'\r')
879 if (culture_read == culture::any)
885 if (!std::getline(tentative_file, raw_line))
889 if (!raw_line.empty() && raw_line.back() ==
'\r')
898 while (std::getline(tentative_file, raw_line))
900 if (!raw_line.empty() && raw_line.back() ==
'\r')
904 if (!raw_line.empty())
906 auto tab_pos = raw_line.find(
'\t');
907 if (tab_pos != std::string::npos)
909 pool.names.push_back(
910 utf8_to_wstring(raw_line.substr(0, tab_pos)));
911 pool.weights.push_back(
912 std::stod(raw_line.substr(tab_pos + 1)));
916 pool.names.push_back(utf8_to_wstring(raw_line));
917 pool.weights.push_back(1.0);
922 if (pool.names.empty())
928 if (!pool.weights.empty())
930 pool.dist = std::discrete_distribution<std::size_t>(
931 pool.weights.begin(), pool.weights.end());
938 _m_pool[culture_read] = std::move(pool);
941 _f_pool[culture_read] = std::move(pool);
944 _sur_pool[culture_read] = std::move(pool);
951 friend struct ::ng_test_access;