mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
32c8486e1f
Also use C locale for ispunct/isspace, and split unicode-data.cpp from unicode.cpp.
17 lines
782 B
C++
17 lines
782 B
C++
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <map>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_digit;
|
|
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_letter;
|
|
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_whitespace;
|
|
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_accent_mark;
|
|
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_punctuation;
|
|
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_symbol;
|
|
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_control;
|
|
extern const std::multimap<uint32_t, uint32_t> unicode_map_nfd;
|
|
extern const std::map<char32_t, char32_t> unicode_map_lowercase;
|