Nuspell
spellchecker
utils.hxx
1 /* Copyright 2016-2022 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef NUSPELL_UTILS_HXX
20 #define NUSPELL_UTILS_HXX
21 
22 #include "nuspell_export.h"
23 
24 #include <string>
25 #include <string_view>
26 #include <vector>
27 
28 #include <unicode/locid.h>
29 
30 #ifdef __GNUC__
31 #define likely(expr) __builtin_expect(!!(expr), 1)
32 #define unlikely(expr) __builtin_expect(!!(expr), 0)
33 #else
34 #define likely(expr) (expr)
35 #define unlikely(expr) (expr)
36 #endif
37 
38 struct UConverter; // unicode/ucnv.h
39 
40 namespace nuspell {
41 inline namespace v5 {
42 
43 NUSPELL_DEPRECATED_EXPORT auto split_on_any_of(std::string_view s,
44  const char* sep,
45  std::vector<std::string>& out)
46  -> std::vector<std::string>&;
47 
48 NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in, std::string& out)
49  -> void;
50 NUSPELL_EXPORT auto utf32_to_utf8(std::u32string_view in) -> std::string;
51 
52 auto valid_utf8_to_32(std::string_view in, std::u32string& out) -> void;
53 auto valid_utf8_to_32(std::string_view in) -> std::u32string;
54 
55 auto utf8_to_16(std::string_view in) -> std::u16string;
56 auto utf8_to_16(std::string_view in, std::u16string& out) -> bool;
57 
58 auto validate_utf8(std::string_view s) -> bool;
59 
60 NUSPELL_EXPORT auto is_all_ascii(std::string_view s) -> bool;
61 
62 NUSPELL_EXPORT auto latin1_to_ucs2(std::string_view s) -> std::u16string;
63 auto latin1_to_ucs2(std::string_view s, std::u16string& out) -> void;
64 
65 NUSPELL_EXPORT auto is_all_bmp(std::u16string_view s) -> bool;
66 
67 auto to_upper_ascii(std::string& s) -> void;
68 
69 [[nodiscard]] NUSPELL_EXPORT auto to_upper(std::string_view in,
70  const icu::Locale& loc)
71  -> std::string;
72 [[nodiscard]] NUSPELL_EXPORT auto to_title(std::string_view in,
73  const icu::Locale& loc)
74  -> std::string;
75 [[nodiscard]] NUSPELL_EXPORT auto to_lower(std::string_view in,
76  const icu::Locale& loc)
77  -> std::string;
78 
79 auto to_upper(std::string_view in, const icu::Locale& loc, std::string& out)
80  -> void;
81 auto to_title(std::string_view in, const icu::Locale& loc, std::string& out)
82  -> void;
83 auto to_lower(std::u32string_view in, const icu::Locale& loc,
84  std::u32string& out) -> void;
85 auto to_lower(std::string_view in, const icu::Locale& loc, std::string& out)
86  -> void;
87 auto to_lower_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
88 auto to_title_char_at(std::string& s, size_t i, const icu::Locale& loc) -> void;
89 
97 enum class Casing : char {
98  SMALL,
99  INIT_CAPITAL,
100  ALL_CAPITAL,
101  CAMEL ,
102  PASCAL
103 };
104 
105 NUSPELL_EXPORT auto classify_casing(std::string_view s) -> Casing;
106 
107 auto has_uppercase_at_compound_word_boundary(std::string_view word, size_t i)
108  -> bool;
109 
111  UConverter* cnv = nullptr;
112 
113  public:
114  Encoding_Converter() = default;
115  explicit Encoding_Converter(const char* enc);
116  explicit Encoding_Converter(const std::string& enc)
117  : Encoding_Converter(enc.c_str())
118  {
119  }
121  Encoding_Converter(const Encoding_Converter& other) = delete;
122  Encoding_Converter(Encoding_Converter&& other) noexcept
123  {
124  cnv = other.cnv;
125  cnv = nullptr;
126  }
127  auto operator=(const Encoding_Converter& other)
128  -> Encoding_Converter& = delete;
129  auto operator=(Encoding_Converter&& other) noexcept
131  {
132  std::swap(cnv, other.cnv);
133  return *this;
134  }
135  auto to_utf8(std::string_view in, std::string& out) -> bool;
136  auto valid() -> bool { return cnv != nullptr; }
137 };
138 
139 auto replace_ascii_char(std::string& s, char from, char to) -> void;
140 auto erase_chars(std::string& s, std::string_view erase_chars) -> void;
141 NUSPELL_EXPORT auto is_number(std::string_view s) -> bool;
142 auto count_appereances_of(std::string_view haystack, std::string_view needles)
143  -> size_t;
144 
145 auto inline begins_with(std::string_view haystack, std::string_view needle)
146  -> bool
147 {
148  return haystack.compare(0, needle.size(), needle) == 0;
149 }
150 
151 auto inline ends_with(std::string_view haystack, std::string_view needle)
152  -> bool
153 {
154  return haystack.size() >= needle.size() &&
155  haystack.compare(haystack.size() - needle.size(), needle.size(),
156  needle) == 0;
157 }
158 
159 template <class T>
160 auto begin_ptr(T& x)
161 {
162  return x.data();
163 }
164 template <class T>
165 auto end_ptr(T& x)
166 {
167  return x.data() + x.size();
168 }
169 } // namespace v5
170 } // namespace nuspell
171 #endif // NUSPELL_UTILS_HXX
Library main namespace.
Definition: aff_data.cxx:33
Definition: utils.hxx:110