1
/* charset.h - Maintain a character set for a crossword puzzle
2
 *
3
 * Copyright 2021 Federico Mena Quintero <federico@gnome.org>
4
 * Copyright 2023 Jonathan Blandford <jrb@gnome.org>
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2.1 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 * SPDX-License-Identifier: (LGPL-2.1-or-later OR MIT)
20
 */
21

            
22
#pragma once
23

            
24
#include <glib-object.h>
25

            
26
G_BEGIN_DECLS
27

            
28

            
29
#define IPUZ_TYPE_CHARSET (ipuz_charset_get_type ())
30
#define IPUZ_CHARSET(charset) ((IPuzCharset *)charset)
31

            
32
typedef struct _IPuzCharsetBuilder IPuzCharsetBuilder;
33
typedef struct _IPuzCharset IPuzCharset;
34
typedef struct _IPuzCharsetIter IPuzCharsetIter;
35

            
36

            
37
/* Keep in sync with rust/src/charset.rs */
38
typedef struct _IPuzCharsetIterValue
39
{
40
  gunichar c;
41
  guint count;
42
} IPuzCharsetIterValue;
43

            
44

            
45
/**
46
 * ipuz_charset_builder_new:
47
 *
48
 * Returns: an empty builder for a character set.  Use `ipuz_charset_builder_add_text()` to populate it.
49
 */
50
IPuzCharsetBuilder   *ipuz_charset_builder_new              (void);
51

            
52
/**
53
 * ipuz_charset_builder_new_from_text:
54
 * @text: the text to base a new IPuzCharsetBuilder on, or NULL
55
 *
56
 * Returns: a new builder for character sets populated by @text.
57
 */
58
IPuzCharsetBuilder   *ipuz_charset_builder_new_from_text    (const char         *text);
59

            
60
/**
61
 * ipuz_charset_builder_new_for_language:
62
 * @lang: A language code, such as "en" or "es"
63
 *
64
 * Creates a charset builder with a list of all characters in common use in
65
 * crosswords for @lang's alphabet. @lang should be a country code,
66
 * but can be a fully-qualified locale (such as from the $LANG
67
 * environment variable). In that instance the remainder of the string
68
 * is ignored, as we don't consider regional distinctions when
69
 * determining a default alphabet. Along those lines, a lang of "C"
70
 * will return the English alphabet.
71
 *
72
 * Note that this returns the common alphabet of letters for a
73
 * language and will not include digraphs as independent
74
 * characters. As examples, Dutch will not include a separate 'ij'
75
 * digraph, despite the prevelance of "IJ" in Dutch puzzles.
76
 *
77
 * Returns: a newly allocated @IPuzCharsetBuilder, or NULL
78
 **/
79
IPuzCharsetBuilder   *ipuz_charset_builder_new_for_language (const char         *lang);
80

            
81
/**
82
 * ipuz_charset_builder_add_text:
83
 * @builder: the character set builder to populate.
84
 * @text: string with characters to add to the builder.
85
 *
86
 * Adds each unicode code point from @text into the @builder.
87
 */
88
void                  ipuz_charset_builder_add_text         (IPuzCharsetBuilder *builder,
89
                                                             const char         *text);
90

            
91
/**
92
 * ipuz_charset_builder_add_character:
93
 * @builder: the character set builder to extend.
94
 * @c: a unicode character to add to the builder.
95
 *
96
 * Adds @c to the @builder.
97
 **/
98
void                  ipuz_charset_builder_add_character    (IPuzCharsetBuilder *builder,
99
                                                             gunichar            c);
100

            
101
/**
102
 * ipuz_charset_builder_set_char_count:
103
 * @builder: the character set builder to extend.
104
 * @c: a unicode character to add to the builder.
105
 * @count: the count
106
 *
107
 * Explicitly sets the count of @c to be @count in the @builder.
108
 **/
109
void                  ipuz_charset_builder_set_char_count   (IPuzCharsetBuilder *builder,
110
                                                             gunichar            c,
111
                                                             guint               count);
112

            
113

            
114
/**
115
 * ipuz_charset_builder_remove_text:
116
 * @builder: a character set builder with some characters in it.
117
 * @text: text whose characters should be tried to be removed.
118
 *
119
 * Tries to remove all the characters in @text from the @builder,
120
 * i.e. decrease its character counts by as many instances of each
121
 * character there are in @text.  If @text contains characters that
122
 * are not already in the @builder, or if @text contains more of a
123
 * certain character than @builder already has, this function returns #FALSE and leaves
124
 * the @builder unchanged.
125
 *
126
 * Returns: whether @builder had enough characters to remove all of @text's.  This
127
 * function only changes the contents @builder if it returns #TRUE; otherwise,
128
 * if it returns #FALSE, the @text could not be removed and @builder remains unchanged.
129
 */
130
gboolean              ipuz_charset_builder_remove_text      (IPuzCharsetBuilder *builder,
131
                                                             const char         *text);
132

            
133
/**
134
 * ipuz_charset_builder_build:
135
 * @builder: A character set builder that has already been populated.
136
 *
137
 * Consumes @builder and frees it, and returns an immutable
138
 * #IPuzCharset.  The resulting charset can be queried efficiently for
139
 * character counts and such.
140
 *
141
 * Returns: A charset compiled from the information in the @builder.  Use
142
 * ipuz_charset_unref() to free the return value.
143
 */
144
IPuzCharset          *ipuz_charset_builder_build            (IPuzCharsetBuilder *builder);
145

            
146
/**
147
 * ipuz_charset_ref:
148
 * @charset: the character set to ref
149
 *
150
 * Refs the character set.
151
 *
152
 * Returns: @charset. This can be used to chain calls or ref on return.
153
 */
154
IPuzCharset          *ipuz_charset_ref              (IPuzCharset       *charset);
155

            
156
/**
157
 * ipuz_charset_unref:
158
 * @charset: the character set to unref
159
 *
160
 * Unrefs a character set, which will be freed when the reference count becomes 0.
161
 */
162
void                  ipuz_charset_unref            (IPuzCharset       *charset);
163

            
164
/**
165
 * ipuz_charset_compare:
166
 * @charset_a: a charset
167
 * @charset_b: a charset
168
 *
169
 * Compares two charsets to see if they have exactly the same contents.
170
 *
171
 * Returns: #TRUE if the characters have the same characters and character counts.
172
 */
173
gboolean              ipuz_charset_compare          (IPuzCharset       *charset_a,
174
                                                     IPuzCharset       *charset_b);
175

            
176

            
177
/**
178
 * ipuz_charset_get_char_index:
179
 * @charset: the character set to search in.
180
 * @c: character to search for.
181
 *
182
 * Returns the index of the character @c in the @charset, or -1 if it does not exist.
183
 */
184
gint                  ipuz_charset_get_char_index   (const IPuzCharset *charset,
185
                                                     gunichar           c);
186

            
187
/**
188
 * ipuz_charset_get_char_count:
189
 * @charset: the character set to lookup in
190
 * @c: a character
191
 *
192
 *
193
 *
194
 * Returns: the number of instances of @c in the text
195
 **/
196
guint                 ipuz_charset_get_char_count   (const IPuzCharset *charset,
197
                                                     gunichar           c);
198

            
199
/**
200
 * ipuz_charset_get_n_chars:
201
 * @charset: the character set to query.
202
 *
203
 * Returns: the number of different types of characters stored in the
204
 * @charset. This is a constant-time operation.
205
 */
206
gsize                 ipuz_charset_get_n_chars      (const IPuzCharset *charset);
207

            
208
/**
209
 * ipuz_charset_get_size:
210
 * @charset: the character set to query
211
 *
212
 * Returns the total number of characters stored in @charset. Unlike
213
 * ipuz_charset_get_n_chars() which returns the number of types of
214
 * characters, this returns the count of characters.
215
 *
216
 * Returns: Total number of characters
217
 **/
218
gsize                 ipuz_charset_get_size         (const IPuzCharset *charset);
219

            
220

            
221
/**
222
 * ipuz_charset_check_text
223
 * @charset: the character set to query
224
 * *text: the text to test
225
 *
226
 * Checks to see if all the characters in @text are contained within
227
 * @charset. This can be used to quickly assertain if a valid
228
 * character is being used within a puzzle.
229
 *
230
 * Returns: #TRUE, if all the characters in text exist in @charset
231
 **/
232
gboolean              ipuz_charset_check_text       (const IPuzCharset *charset,
233
                                                     const char        *text);
234

            
235
/**
236
 * ipuz_charset_iter_first:
237
 * @charset: A @IPuzCharset
238
 *
239
 * Gets an iteratior for querying the charset.  The first value can be queried
240
 * with ipuz_charset_iter_get_value().  Note that the iterator must be allowed to
241
 * terminate by running it until ipuz_charset_iter_next() returns #NULL; that last
242
 * iteration will free the #IPuzCharsetIter.
243
 *
244
 * Returns: The first iterator
245
 **/
246
IPuzCharsetIter      *ipuz_charset_iter_first       (const IPuzCharset *ipuz_charset);
247

            
248
/**
249
 * ipuz_charset_iter_next:
250
 * @iter: A @IPuzCharsetIter
251
 *
252
 * Returns the next iter in the charset sequentially. If we've reached
253
 * the end of the set, then return %NULL; this will free the #IPuzCharsetIter.
254
 *
255
 * Returns: the next @IPuzCharsetIter, or %NULL
256
 **/
257
IPuzCharsetIter      *ipuz_charset_iter_next        (IPuzCharsetIter   *iter);
258

            
259
/**
260
 * ipuz_charset_iter_get_value:
261
 * @iter: A @IPuzCharsetIter
262
 *
263
 * Returns: the current value of a character set iterator.  The value
264
 * contains the current character and its count.
265
 */
266
IPuzCharsetIterValue  ipuz_charset_iter_get_value   (IPuzCharsetIter   *iter);
267

            
268
/**
269
 * ipuz_charset_serialize:
270
 * @charset: the character set to serialize.
271
 *
272
 * Concatenates all the unique characters stored in a @charset, in the order in which they
273
 * would be returned by ipuz_charset_get_char_index().
274
 *
275
 * Returns: a string with all the characters from the character set.
276
 */
277
gchar                *ipuz_charset_serialize        (const IPuzCharset *ipuz_charset);
278

            
279
/**
280
 * ipuz_charset_deserialize:
281
 * @str: String serialization of a character set, as returned by ipuz_charset_serialize().
282
 *
283
 * Creates a new character set by deserializing from a string.
284
 *
285
 * Returns: a new character set.
286
 */
287
IPuzCharset          *ipuz_charset_deserialize      (const char        *str);
288

            
289

            
290
9
G_DEFINE_AUTOPTR_CLEANUP_FUNC (IPuzCharset, ipuz_charset_unref);
291

            
292

            
293
G_END_DECLS