161 static cg inst{auto_probe_tag{}};
173 auto call_seed =
static_cast<std::uint64_t
>(_engine());
189 throw std::runtime_error(
190 "No city data loaded. Call load() first.");
193 effolkronium::random_local call_engine;
194 call_engine.seed(
static_cast<std::mt19937::result_type
>(
195 (call_seed ^ (call_seed >> 32U))));
198 ? _distribution(call_engine.engine())
199 : _uniform(call_engine.engine());
200 city result = _cities[idx];
201 result._seed = call_seed;
212 auto call_seed =
static_cast<std::uint64_t
>(_engine());
213 return get_city(country, call_seed);
223 std::uint64_t call_seed)
const
227 throw std::runtime_error(
228 "No city data loaded. Call load() first.");
231 auto it = _country_index.find(country);
232 if (it == _country_index.end())
234 throw std::invalid_argument(
235 "No cities found for country code: " + country);
238 effolkronium::random_local call_engine;
239 call_engine.seed(
static_cast<std::mt19937::result_type
>(
240 (call_seed ^ (call_seed >> 32U))));
242 const auto& idx = it->second;
243 auto selected = _weighted
244 ? idx.distribution(call_engine.engine())
245 : std::uniform_int_distribution<std::size_t>(
246 0, idx.city_indices.size() - 1)(
247 call_engine.engine());
248 city result = _cities[idx.city_indices[selected]];
249 result._seed = call_seed;
265 _engine.seed(seed_value);
275 _engine.seed(std::random_device{}());
305 return !_cities.empty();
312 return _cities.size();
323 void load(
const std::filesystem::path& tsv_path)
325 if (!std::filesystem::exists(tsv_path) ||
326 !std::filesystem::is_regular_file(tsv_path))
331 std::ifstream file{tsv_path};
340 if (!std::getline(file, line))
345 while (std::getline(file, line))
353 if (!line.empty() && line.back() ==
'\r')
358 auto c = parse_line(line);
359 if (c.geonameid != 0)
361 _cities.push_back(std::move(c));
378 static constexpr std::array probe_paths = {
379 "resources",
"../resources",
"city-generator/resources"};
381 const char* subfolder =
382 (tier == dataset::full) ?
"full" :
"lite";
384 auto found = std::ranges::find_if(probe_paths, [&](
const char* base) {
386 std::filesystem::path{base} / subfolder /
"cities.tsv";
387 return std::filesystem::is_regular_file(tsv);
389 if (found != probe_paths.end())
391 load(std::filesystem::path{*found} / subfolder /
"cities.tsv");
399 std::vector<city> _cities;
402 mutable std::discrete_distribution<std::size_t> _distribution;
405 mutable std::uniform_int_distribution<std::size_t> _uniform;
408 bool _weighted{
true};
413 std::vector<std::size_t> city_indices;
414 mutable std::discrete_distribution<std::size_t> distribution;
417 std::unordered_map<std::string, country_entry> _country_index;
420 std::mt19937_64 _engine{std::random_device{}()};
423 struct auto_probe_tag {};
426 explicit cg(auto_probe_tag )
428 static constexpr std::array probe_paths = {
429 "resources",
"../resources",
"city-generator/resources"};
431 auto found = std::ranges::find_if(probe_paths, [](
const char* p) {
432 return std::filesystem::exists(p) &&
433 std::filesystem::is_directory(p);
435 if (found != probe_paths.end())
437 const std::filesystem::path base{*found};
438 auto lite_tsv = base /
"lite" /
"cities.tsv";
439 auto full_tsv = base /
"full" /
"cities.tsv";
440 if (std::filesystem::is_regular_file(lite_tsv))
444 else if (std::filesystem::is_regular_file(full_tsv))
452 void rebuild_indices()
455 std::vector<double> weights;
456 weights.reserve(_cities.size());
458 std::ranges::transform(_cities, std::back_inserter(weights),
460 return static_cast<double>(
461 std::max<std::uint64_t>(c.population, 1));
464 _distribution = std::discrete_distribution<std::size_t>(
465 weights.begin(), weights.end());
468 if (!_cities.empty())
470 _uniform = std::uniform_int_distribution<std::size_t>(
471 0, _cities.size() - 1);
475 _country_index.clear();
477 for (
auto&& [i, c] : _cities | std::views::enumerate)
479 auto& entry = _country_index[c.country_code];
480 entry.city_indices.push_back(
static_cast<std::size_t
>(i));
484 for (
auto& [code, entry] : _country_index)
486 std::vector<double> cw;
487 cw.reserve(entry.city_indices.size());
489 for (
auto idx : entry.city_indices)
491 cw.push_back(
static_cast<double>(
492 std::max<std::uint64_t>(_cities[idx].population, 1)));
495 entry.distribution = std::discrete_distribution<std::size_t>(
496 cw.begin(), cw.end());
504 static city parse_line(
const std::string& line)
509 std::vector<std::string> fields;
512 for (
auto part : line | std::views::split(
'\t'))
514 fields.emplace_back(std::ranges::begin(part),
515 std::ranges::end(part));
519 static constexpr std::size_t expected_fields{16};
521 if (fields.size() < expected_fields)
528 c.geonameid =
static_cast<std::uint32_t
>(std::stoul(fields[0]));
530 c.asciiname = fields[2];
531 c.latitude = std::stod(fields[3]);
532 c.longitude = std::stod(fields[4]);
533 c.feature_code = fields[5];
534 c.country_code = fields[6];
536 c.admin1_code = fields[8];
537 c.admin2_code = fields[9];
538 c.admin3_code = fields[10];
539 c.admin4_code = fields[11];
543 :
static_cast<std::uint64_t
>(std::stoull(fields[12]));
546 ?
static_cast<std::int16_t
>(-9999)
547 : static_cast<std::int16_t>(std::stoi(fields[13]));
548 c.dem = fields[14].empty()
549 ?
static_cast<std::int16_t
>(0)
550 : static_cast<std::int16_t>(std::stoi(fields[14]));
551 c.timezone = fields[15];