173 ng& operator=(
ng&&) noexcept = default;
183 static ng inst{auto_probe_tag{}};
192 static const std::map<std::wstring, culture> country_code_map = {
193 {L
"ar", culture::argentinian}, {L
"us", culture::american},
194 {L
"au", culture::australian}, {L
"br", culture::brazilian},
195 {L
"gb", culture::british}, {L
"bg", culture::bulgarian},
196 {L
"ca", culture::canadian}, {L
"cn", culture::chinese},
197 {L
"dk", culture::danish}, {L
"fi", culture::finnish},
198 {L
"fr", culture::french}, {L
"de", culture::german},
199 {L
"kz", culture::kazakh}, {L
"mx", culture::mexican},
200 {L
"no", culture::norwegian}, {L
"pl", culture::polish},
201 {L
"pt", culture::portuguese}, {L
"ru", culture::russian},
202 {L
"es", culture::spanish}, {L
"se", culture::swedish},
203 {L
"tr", culture::turkish}, {L
"ua", culture::ukrainian}};
205 if (
auto it = country_code_map.find(country_code);
206 it != country_code_map.end())
218 static const std::map<std::wstring, gender> gender_map = {
221 {L
"male", gender::m},
222 {L
"female", gender::f}};
224 if (
auto it = gender_map.find(gender_string);
225 it != gender_map.end())
240 auto call_seed =
static_cast<std::uint64_t
>(_engine());
241 auto result = solver(
true, g, c, call_seed);
242 result._seed = call_seed;
253 std::uint64_t call_seed)
255 auto result = solver(
true, g, c, call_seed);
256 result._seed = call_seed;
266 auto call_seed =
static_cast<std::uint64_t
>(_engine());
267 auto result = solver(
false, gender::any, c, call_seed);
268 result._seed = call_seed;
278 std::uint64_t call_seed)
280 auto result = solver(
false, gender::any, c, call_seed);
281 result._seed = call_seed;
297 _engine.seed(seed_value);
305 _engine.seed(std::random_device{}());
315 return !_culture_indexed_m_names.empty() ||
316 !_culture_indexed_f_names.empty() ||
317 !_culture_indexed_surnames.empty();
326 void load(
const std::filesystem::path& resource_path)
328 if (std::filesystem::exists(resource_path) &&
329 std::filesystem::is_directory(resource_path))
331 for (
const auto& entry :
332 std::filesystem::recursive_directory_iterator(resource_path))
334 if (entry.is_regular_file() &&
335 (entry.path().extension() ==
".names"))
345 using name_container = std::vector<std::wstring>;
348 static constexpr std::size_t culture_count =
349 static_cast<std::size_t
>(culture::any);
352 std::map<culture, name_container> _culture_indexed_m_names;
353 std::map<culture, name_container> _culture_indexed_f_names;
354 std::map<culture, name_container> _culture_indexed_surnames;
357 std::mt19937_64 _engine{std::random_device{}()};
360 struct auto_probe_tag {};
363 explicit ng(auto_probe_tag )
365 static constexpr std::array probe_paths = {
366 "resources",
"../resources",
"name-generator/resources"};
368 auto found = std::ranges::find_if(probe_paths, [](
const char* p) {
369 return std::filesystem::exists(p) &&
370 std::filesystem::is_directory(p);
372 if (found != probe_paths.end())
379 static culture resolve_culture(culture c,
380 effolkronium::random_local& engine)
382 if (c == culture::any)
385 engine.get<std::size_t>(0, culture_count - 1));
391 static gender resolve_gender(gender g,
392 effolkronium::random_local& engine)
394 if (g == gender::any)
396 return static_cast<gender>(engine.get<std::size_t>(0, 1));
402 [[nodiscard]]
static const char* culture_label(culture c)
404 static constexpr std::array labels = {
405 "american",
"argentinian",
"australian",
406 "brazilian",
"british",
"bulgarian",
407 "canadian",
"chinese",
"danish",
408 "finnish",
"french",
"german",
409 "kazakh",
"mexican",
"norwegian",
410 "polish",
"portuguese",
"russian",
411 "spanish",
"swedish",
"turkish",
413 auto idx =
static_cast<std::size_t
>(c);
414 if (idx < labels.size()) {
return labels.at(idx); }
419 [[nodiscard]]
static const char* gender_label(gender g)
423 case gender::m:
return "male";
424 case gender::f:
return "female";
425 case gender::any:
return "any";
431 [[nodiscard]]
static std::wstring pick(
432 const std::map<culture, name_container>& db,
433 culture c, gender g, effolkronium::random_local& engine)
435 if (
auto it = db.find(c); it != db.end() && !it->second.empty())
437 return *engine.get(it->second);
439 throw std::invalid_argument(
440 std::string(
"No ") + gender_label(g) +
441 " names loaded for culture '" + culture_label(c) +
"'");
445 static constexpr unsigned seed_fold_shift = 32U;
448 void append_name_impl(name& n, gender g, culture c)
450 auto call_seed =
static_cast<std::uint64_t
>(_engine());
451 effolkronium::random_local call_engine;
452 call_engine.seed(
static_cast<std::mt19937::result_type
>(
453 (call_seed ^ (call_seed >> seed_fold_shift))));
455 const culture resolved_c = resolve_culture(c, call_engine);
456 const gender resolved_g = resolve_gender(g, call_engine);
458 const auto& db = (resolved_g == gender::f)
459 ? _culture_indexed_f_names
460 : _culture_indexed_m_names;
462 const std::wstring part = pick(db, resolved_c, resolved_g,
464 n._parts.push_back(part);
465 n._full_string.append(L
" ").append(part);
469 void append_surname_impl(name& n, culture c)
471 auto call_seed =
static_cast<std::uint64_t
>(_engine());
472 effolkronium::random_local call_engine;
473 call_engine.seed(
static_cast<std::mt19937::result_type
>(
474 (call_seed ^ (call_seed >> seed_fold_shift))));
476 const culture resolved_c = resolve_culture(c, call_engine);
477 const std::wstring part = pick(_culture_indexed_surnames, resolved_c,
478 gender::any, call_engine);
479 n._parts.push_back(part);
480 n._full_string.append(L
" ").append(part);
484 [[nodiscard]] name solver(
bool is_name, gender requested_gender,
485 culture requested_culture,
486 std::uint64_t call_seed)
488 effolkronium::random_local call_engine;
489 call_engine.seed(
static_cast<std::mt19937::result_type
>(
490 (call_seed ^ (call_seed >> seed_fold_shift))));
492 const culture resolved_culture = resolve_culture(requested_culture,
494 const gender resolved_gender = resolve_gender(requested_gender,
499 const auto& db = (resolved_gender == gender::f)
500 ? _culture_indexed_f_names
501 : _culture_indexed_m_names;
502 return {pick(db, resolved_culture, resolved_gender, call_engine),
503 resolved_gender, resolved_culture,
this};
506 return {pick(_culture_indexed_surnames, resolved_culture,
507 gender::any, call_engine),
508 resolved_gender, resolved_culture,
this};
517 static std::wstring utf8_to_wstring(
const std::string& utf8)
520 static constexpr unsigned char ascii_max = 0x80U;
521 static constexpr unsigned char two_byte_mask = 0xE0U;
522 static constexpr unsigned char two_byte_lead = 0xC0U;
523 static constexpr unsigned char two_byte_val = 0x1FU;
524 static constexpr unsigned char three_byte_mask = 0xF0U;
525 static constexpr unsigned char three_byte_lead = 0xE0U;
526 static constexpr unsigned char three_byte_val = 0x0FU;
527 static constexpr unsigned char four_byte_mask = 0xF8U;
528 static constexpr unsigned char four_byte_lead = 0xF0U;
529 static constexpr unsigned char four_byte_val = 0x07U;
530 static constexpr unsigned char cont_val = 0x3FU;
531 static constexpr unsigned char cont_check_mask = 0xC0U;
532 static constexpr unsigned char cont_check_lead = 0x80U;
533 static constexpr unsigned cont_shift = 6U;
534 static constexpr char32_t max_codepoint = 0x10FFFFU;
536 static constexpr char32_t surrogate_offset = 0x10000U;
537 static constexpr char32_t high_surrogate_base = 0xD800U;
538 static constexpr char32_t low_surrogate_base = 0xDC00U;
539 static constexpr unsigned surrogate_shift = 10U;
540 static constexpr char32_t surrogate_mask = 0x3FFU;
543 result.reserve(utf8.size());
546 while (i < utf8.size())
548 char32_t codepoint = 0;
549 auto lead =
static_cast<unsigned char>(utf8.at(i));
550 std::size_t extra = 0;
552 if (lead < ascii_max)
556 else if ((lead & two_byte_mask) == two_byte_lead)
558 codepoint = lead & two_byte_val;
561 else if ((lead & three_byte_mask) == three_byte_lead)
563 codepoint = lead & three_byte_val;
566 else if ((lead & four_byte_mask) == four_byte_lead)
568 codepoint = lead & four_byte_val;
579 for (std::size_t j = 0; j < extra; ++j, ++i)
581 if (i >= utf8.size())
586 auto byte =
static_cast<unsigned char>(utf8.at(i));
587 if ((
byte & cont_check_mask) != cont_check_lead)
592 codepoint = (codepoint << cont_shift) |
593 (
static_cast<char32_t>(
byte) &
594 static_cast<char32_t>(cont_val));
597 if (!valid || codepoint > max_codepoint)
602 if constexpr (
sizeof(wchar_t) >= 4)
604 result.push_back(
static_cast<wchar_t>(codepoint));
608 if (codepoint < surrogate_offset)
610 result.push_back(
static_cast<wchar_t>(codepoint));
614 const char32_t shifted = codepoint - surrogate_offset;
615 result.push_back(
static_cast<wchar_t>(
616 high_surrogate_base + (shifted >> surrogate_shift)));
617 result.push_back(
static_cast<wchar_t>(
618 low_surrogate_base + (shifted & surrogate_mask)));
629 void parse_file(
const std::filesystem::path& file)
631 std::ifstream tentative_file{file};
633 if (tentative_file.is_open())
635 std::string raw_line;
638 if (!std::getline(tentative_file, raw_line))
642 if (!raw_line.empty() && raw_line.back() ==
'\r')
650 if (culture_read == culture::any)
656 if (!std::getline(tentative_file, raw_line))
660 if (!raw_line.empty() && raw_line.back() ==
'\r')
667 name_container names_read;
668 while (std::getline(tentative_file, raw_line))
670 if (!raw_line.empty() && raw_line.back() ==
'\r')
674 if (!raw_line.empty())
676 names_read.push_back(utf8_to_wstring(raw_line));
680 if (names_read.empty())
689 _culture_indexed_m_names[culture_read] =
690 std::move(names_read);
693 _culture_indexed_f_names[culture_read] =
694 std::move(names_read);
697 _culture_indexed_surnames[culture_read] =
698 std::move(names_read);
705 friend struct ::ng_test_access;