215 static cntg inst{auto_probe_tag{}};
229 auto call_seed =
static_cast<std::uint64_t
>(_engine());
239 if (_countries.empty())
241 throw std::runtime_error(
242 "No country data loaded. Call load() first.");
245 effolkronium::random_local call_engine;
246 call_engine.seed(
static_cast<std::mt19937::result_type
>(
247 (call_seed ^ (call_seed >> seed_shift_))));
250 ? _distribution(call_engine.engine())
251 : _uniform(call_engine.engine());
252 country result = _countries[idx];
253 result._seed = call_seed;
264 auto call_seed =
static_cast<std::uint64_t
>(_engine());
275 std::uint64_t call_seed)
const
277 if (_countries.empty())
279 throw std::runtime_error(
280 "No country data loaded. Call load() first.");
283 auto it = _region_index.find(rgn);
284 if (it == _region_index.end())
286 throw std::invalid_argument(
287 "No countries found for region: " + rgn);
290 effolkronium::random_local call_engine;
291 call_engine.seed(
static_cast<std::mt19937::result_type
>(
292 (call_seed ^ (call_seed >> seed_shift_))));
294 const auto& idx = it->second;
295 auto selected = _weighted
296 ? idx.distribution(call_engine.engine())
297 : std::uniform_int_distribution<std::size_t>(
298 0, idx.country_indices.size() - 1)(
299 call_engine.engine());
301 country result = _countries[idx.country_indices[selected]];
302 result._seed = call_seed;
313 if (_countries.empty())
315 throw std::runtime_error(
316 "No country data loaded. Call load() first.");
319 auto it = _cca2_index.find(cca2);
320 if (it == _cca2_index.end())
322 throw std::invalid_argument(
323 "No country found for CCA2 code: " + cca2);
326 return _countries[it->second];
339 _engine.seed(seed_value);
347 _engine.seed(std::random_device{}());
374 return !_countries.empty();
380 return _countries.size();
390 void load(
const std::filesystem::path& tsv_path)
392 if (!std::filesystem::exists(tsv_path) ||
393 !std::filesystem::is_regular_file(tsv_path))
398 std::ifstream file{tsv_path};
407 if (!std::getline(file, line))
412 while (std::getline(file, line))
419 if (line.back() ==
'\r')
424 auto c = parse_line(line);
427 _countries.push_back(std::move(c));
439 const char* subfolder =
440 (tier == dataset::full) ?
"full" :
"lite";
442 auto found = std::ranges::find_if(
443 probe_bases_, [&](
const char* base) {
445 std::filesystem::path{base} / subfolder /
"countries.tsv";
446 return std::filesystem::is_regular_file(tsv);
448 if (found != probe_bases_.end())
450 load(std::filesystem::path{*found} / subfolder /
"countries.tsv");
458 std::vector<country> _countries;
461 mutable std::discrete_distribution<std::size_t> _distribution;
464 mutable std::uniform_int_distribution<std::size_t> _uniform;
467 bool _weighted{
true};
472 std::vector<std::size_t> country_indices;
473 mutable std::discrete_distribution<std::size_t> distribution;
476 std::unordered_map<std::string, region_entry> _region_index;
479 std::unordered_map<std::string, std::size_t> _cca2_index;
482 static constexpr unsigned seed_shift_{32U};
485 static constexpr std::array probe_bases_{
486 "resources",
"../resources",
"country-generator/resources"};
489 std::mt19937_64 _engine{std::random_device{}()};
492 struct auto_probe_tag {};
495 explicit cntg(auto_probe_tag )
497 auto found = std::ranges::find_if(
498 probe_bases_, [](
const char* p) {
499 return std::filesystem::exists(p) &&
500 std::filesystem::is_directory(p);
502 if (found != probe_bases_.end())
504 const std::filesystem::path base{*found};
505 auto lite_tsv = base /
"lite" /
"countries.tsv";
506 auto full_tsv = base /
"full" /
"countries.tsv";
507 if (std::filesystem::is_regular_file(lite_tsv))
511 else if (std::filesystem::is_regular_file(full_tsv))
519 void rebuild_indices()
522 std::vector<double> weights;
523 weights.reserve(_countries.size());
525 std::ranges::transform(
526 _countries, std::back_inserter(weights), [](
const country& c) {
527 return static_cast<double>(
528 std::max<std::uint64_t>(c.population, 1));
531 _distribution = std::discrete_distribution<std::size_t>(
532 weights.begin(), weights.end());
535 if (!_countries.empty())
537 _uniform = std::uniform_int_distribution<std::size_t>(
538 0, _countries.size() - 1);
542 _region_index.clear();
545 for (
auto&& [i, c] : _countries | std::views::enumerate)
547 auto idx =
static_cast<std::size_t
>(i);
548 auto& entry = _region_index[c.region];
549 entry.country_indices.push_back(idx);
550 _cca2_index[c.cca2] = idx;
554 for (
auto& [rgn, entry] : _region_index)
556 std::vector<double> rw;
557 rw.reserve(entry.country_indices.size());
559 for (
auto idx : entry.country_indices)
561 rw.push_back(
static_cast<double>(
562 std::max<std::uint64_t>(
563 _countries[idx].population, 1)));
566 entry.distribution = std::discrete_distribution<std::size_t>(
567 rw.begin(), rw.end());
574 static country parse_line(
const std::string& line)
578 static constexpr std::size_t num_fields{31};
579 std::vector<std::string> fields;
580 fields.reserve(num_fields);
582 for (
auto part : line | std::views::split(
'\t'))
584 fields.emplace_back(std::ranges::begin(part),
585 std::ranges::end(part));
588 if (fields.size() < num_fields)
595 auto parse_double = [](
const std::string& s,
double fallback = 0.0) {
601 auto [ptr, ec] = std::from_chars(s.data(), s.data() + s.size(), val);
602 return ec == std::errc{} ? val : fallback;
605 auto parse_uint64 = [](
const std::string& s) -> std::uint64_t {
611 auto [ptr, ec] = std::from_chars(s.data(), s.data() + s.size(), val);
612 return ec == std::errc{} ? val : 0ULL;
616 c.cca2 = std::move(fields[0]);
617 c.cca3 = std::move(fields[1]);
618 c.ccn3 = std::move(fields[2]);
619 c.name_common = std::move(fields[3]);
620 c.name_official = std::move(fields[4]);
621 c.capital = std::move(fields[5]);
622 c.region = std::move(fields[6]);
623 c.subregion = std::move(fields[7]);
624 c.continent = std::move(fields[8]);
625 c.latitude = parse_double(fields[9]);
626 c.longitude = parse_double(fields[10]);
627 c.area = parse_uint64(fields[11]);
628 c.population = parse_uint64(fields[12]);
629 c.landlocked = (fields[13] ==
"1");
630 c.independent = (fields[14] ==
"1");
631 c.un_member = (fields[15] ==
"1");
632 c.languages = std::move(fields[16]);
633 c.currency_code = std::move(fields[17]);
634 c.currency_name = std::move(fields[18]);
635 c.currency_symbol = std::move(fields[19]);
636 c.borders = std::move(fields[20]);
637 c.timezones = std::move(fields[21]);
638 c.driving_side = std::move(fields[22]);
639 c.tld = std::move(fields[23]);
640 c.idd_root = std::move(fields[24]);
641 c.idd_suffix = std::move(fields[25]);
642 c.demonym_m = std::move(fields[26]);
643 c.demonym_f = std::move(fields[27]);
644 c.flag_emoji = std::move(fields[28]);
645 c.income_level = std::move(fields[29]);
646 c.start_of_week = std::move(fields[30]);