blob: adf414712c25b3d57f8474eb4249586efd6d7902 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * arabic.c: functions for Arabic language
12 *
Bram Moolenaar071d4272004-06-13 20:20:40 +000013 * Author: Nadim Shaikli & Isam Bayazidi
Bram Moolenaar071d4272004-06-13 20:20:40 +000014 */
15
Bram Moolenaar75464dc2016-07-02 20:27:50 +020016#include "vim.h"
17
18#if defined(FEAT_ARABIC) || defined(PROTO)
19
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010020static int A_firstc_laa(int c1, int c);
21static int A_is_harakat(int c);
22static int A_is_iso(int c);
23static int A_is_formb(int c);
24static int A_is_ok(int c);
25static int A_is_valid(int c);
26static int A_is_special(int c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000027
28
29/*
30 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
31 */
32 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010033A_is_a(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000034{
35 switch (cur_c)
36 {
37 case a_HAMZA:
38 case a_ALEF_MADDA:
39 case a_ALEF_HAMZA_ABOVE:
40 case a_WAW_HAMZA:
41 case a_ALEF_HAMZA_BELOW:
42 case a_YEH_HAMZA:
43 case a_ALEF:
44 case a_BEH:
45 case a_TEH_MARBUTA:
46 case a_TEH:
47 case a_THEH:
48 case a_JEEM:
49 case a_HAH:
50 case a_KHAH:
51 case a_DAL:
52 case a_THAL:
53 case a_REH:
54 case a_ZAIN:
55 case a_SEEN:
56 case a_SHEEN:
57 case a_SAD:
58 case a_DAD:
59 case a_TAH:
60 case a_ZAH:
61 case a_AIN:
62 case a_GHAIN:
63 case a_TATWEEL:
64 case a_FEH:
65 case a_QAF:
66 case a_KAF:
67 case a_LAM:
68 case a_MEEM:
69 case a_NOON:
70 case a_HEH:
71 case a_WAW:
72 case a_ALEF_MAKSURA:
73 case a_YEH:
74 return TRUE;
75 }
76
77 return FALSE;
78}
79
80
81/*
82 * Returns True if c is an Isolated Form-B ARABIC letter
83 */
84 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010085A_is_s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000086{
87 switch (cur_c)
88 {
89 case a_s_HAMZA:
90 case a_s_ALEF_MADDA:
91 case a_s_ALEF_HAMZA_ABOVE:
92 case a_s_WAW_HAMZA:
93 case a_s_ALEF_HAMZA_BELOW:
94 case a_s_YEH_HAMZA:
95 case a_s_ALEF:
96 case a_s_BEH:
97 case a_s_TEH_MARBUTA:
98 case a_s_TEH:
99 case a_s_THEH:
100 case a_s_JEEM:
101 case a_s_HAH:
102 case a_s_KHAH:
103 case a_s_DAL:
104 case a_s_THAL:
105 case a_s_REH:
106 case a_s_ZAIN:
107 case a_s_SEEN:
108 case a_s_SHEEN:
109 case a_s_SAD:
110 case a_s_DAD:
111 case a_s_TAH:
112 case a_s_ZAH:
113 case a_s_AIN:
114 case a_s_GHAIN:
115 case a_s_FEH:
116 case a_s_QAF:
117 case a_s_KAF:
118 case a_s_LAM:
119 case a_s_MEEM:
120 case a_s_NOON:
121 case a_s_HEH:
122 case a_s_WAW:
123 case a_s_ALEF_MAKSURA:
124 case a_s_YEH:
125 return TRUE;
126 }
127
128 return FALSE;
129}
130
131
132/*
133 * Returns True if c is a Final shape of an ARABIC letter
134 */
135 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100136A_is_f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137{
138 switch (cur_c)
139 {
140 case a_f_ALEF_MADDA:
141 case a_f_ALEF_HAMZA_ABOVE:
142 case a_f_WAW_HAMZA:
143 case a_f_ALEF_HAMZA_BELOW:
144 case a_f_YEH_HAMZA:
145 case a_f_ALEF:
146 case a_f_BEH:
147 case a_f_TEH_MARBUTA:
148 case a_f_TEH:
149 case a_f_THEH:
150 case a_f_JEEM:
151 case a_f_HAH:
152 case a_f_KHAH:
153 case a_f_DAL:
154 case a_f_THAL:
155 case a_f_REH:
156 case a_f_ZAIN:
157 case a_f_SEEN:
158 case a_f_SHEEN:
159 case a_f_SAD:
160 case a_f_DAD:
161 case a_f_TAH:
162 case a_f_ZAH:
163 case a_f_AIN:
164 case a_f_GHAIN:
165 case a_f_FEH:
166 case a_f_QAF:
167 case a_f_KAF:
168 case a_f_LAM:
169 case a_f_MEEM:
170 case a_f_NOON:
171 case a_f_HEH:
172 case a_f_WAW:
173 case a_f_ALEF_MAKSURA:
174 case a_f_YEH:
175 case a_f_LAM_ALEF_MADDA_ABOVE:
176 case a_f_LAM_ALEF_HAMZA_ABOVE:
177 case a_f_LAM_ALEF_HAMZA_BELOW:
178 case a_f_LAM_ALEF:
179 return TRUE;
180 }
181 return FALSE;
182}
183
184
185/*
186 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
187 */
188 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100189chg_c_a2s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000190{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000191 switch (cur_c)
192 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100193 case a_HAMZA: return a_s_HAMZA;
194 case a_ALEF_MADDA: return a_s_ALEF_MADDA;
195 case a_ALEF_HAMZA_ABOVE: return a_s_ALEF_HAMZA_ABOVE;
196 case a_WAW_HAMZA: return a_s_WAW_HAMZA;
197 case a_ALEF_HAMZA_BELOW: return a_s_ALEF_HAMZA_BELOW;
198 case a_YEH_HAMZA: return a_s_YEH_HAMZA;
199 case a_ALEF: return a_s_ALEF;
200 case a_TEH_MARBUTA: return a_s_TEH_MARBUTA;
201 case a_DAL: return a_s_DAL;
202 case a_THAL: return a_s_THAL;
203 case a_REH: return a_s_REH;
204 case a_ZAIN: return a_s_ZAIN;
205 case a_TATWEEL: return cur_c; /* exceptions */
206 case a_WAW: return a_s_WAW;
207 case a_ALEF_MAKSURA: return a_s_ALEF_MAKSURA;
208 case a_BEH: return a_s_BEH;
209 case a_TEH: return a_s_TEH;
210 case a_THEH: return a_s_THEH;
211 case a_JEEM: return a_s_JEEM;
212 case a_HAH: return a_s_HAH;
213 case a_KHAH: return a_s_KHAH;
214 case a_SEEN: return a_s_SEEN;
215 case a_SHEEN: return a_s_SHEEN;
216 case a_SAD: return a_s_SAD;
217 case a_DAD: return a_s_DAD;
218 case a_TAH: return a_s_TAH;
219 case a_ZAH: return a_s_ZAH;
220 case a_AIN: return a_s_AIN;
221 case a_GHAIN: return a_s_GHAIN;
222 case a_FEH: return a_s_FEH;
223 case a_QAF: return a_s_QAF;
224 case a_KAF: return a_s_KAF;
225 case a_LAM: return a_s_LAM;
226 case a_MEEM: return a_s_MEEM;
227 case a_NOON: return a_s_NOON;
228 case a_HEH: return a_s_HEH;
229 case a_YEH: return a_s_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000230 }
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100231 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000232}
233
234
235/*
236 * Change shape - from ISO-8859-6/Isolated to Initial
237 */
238 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100239chg_c_a2i(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000240{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000241 switch (cur_c)
242 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100243 case a_YEH_HAMZA: return a_i_YEH_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000244 case a_HAMZA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100245 return a_s_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000246 case a_ALEF_MADDA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100247 return a_s_ALEF_MADDA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000248 case a_ALEF_HAMZA_ABOVE: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100249 return a_s_ALEF_HAMZA_ABOVE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000250 case a_WAW_HAMZA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100251 return a_s_WAW_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000252 case a_ALEF_HAMZA_BELOW: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100253 return a_s_ALEF_HAMZA_BELOW;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 case a_ALEF: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100255 return a_s_ALEF;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000256 case a_TEH_MARBUTA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100257 return a_s_TEH_MARBUTA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 case a_DAL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100259 return a_s_DAL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260 case a_THAL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100261 return a_s_THAL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000262 case a_REH: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100263 return a_s_REH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000264 case a_ZAIN: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100265 return a_s_ZAIN;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000266 case a_TATWEEL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100267 return cur_c;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268 case a_WAW: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100269 return a_s_WAW;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000270 case a_ALEF_MAKSURA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100271 return a_s_ALEF_MAKSURA;
272 case a_BEH: return a_i_BEH;
273 case a_TEH: return a_i_TEH;
274 case a_THEH: return a_i_THEH;
275 case a_JEEM: return a_i_JEEM;
276 case a_HAH: return a_i_HAH;
277 case a_KHAH: return a_i_KHAH;
278 case a_SEEN: return a_i_SEEN;
279 case a_SHEEN: return a_i_SHEEN;
280 case a_SAD: return a_i_SAD;
281 case a_DAD: return a_i_DAD;
282 case a_TAH: return a_i_TAH;
283 case a_ZAH: return a_i_ZAH;
284 case a_AIN: return a_i_AIN;
285 case a_GHAIN: return a_i_GHAIN;
286 case a_FEH: return a_i_FEH;
287 case a_QAF: return a_i_QAF;
288 case a_KAF: return a_i_KAF;
289 case a_LAM: return a_i_LAM;
290 case a_MEEM: return a_i_MEEM;
291 case a_NOON: return a_i_NOON;
292 case a_HEH: return a_i_HEH;
293 case a_YEH: return a_i_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294 }
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100295 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296}
297
298
299/*
300 * Change shape - from ISO-8859-6/Isolated to Medial
301 */
302 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100303chg_c_a2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000304{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000305 switch (cur_c)
306 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100307 case a_HAMZA: return a_s_HAMZA; /* exception */
308 case a_ALEF_MADDA: return a_f_ALEF_MADDA; /* exception */
309 case a_ALEF_HAMZA_ABOVE: return a_f_ALEF_HAMZA_ABOVE; /* exception */
310 case a_WAW_HAMZA: return a_f_WAW_HAMZA; /* exception */
311 case a_ALEF_HAMZA_BELOW: return a_f_ALEF_HAMZA_BELOW; /* exception */
312 case a_YEH_HAMZA: return a_m_YEH_HAMZA;
313 case a_ALEF: return a_f_ALEF; /* exception */
314 case a_BEH: return a_m_BEH;
315 case a_TEH_MARBUTA: return a_f_TEH_MARBUTA; /* exception */
316 case a_TEH: return a_m_TEH;
317 case a_THEH: return a_m_THEH;
318 case a_JEEM: return a_m_JEEM;
319 case a_HAH: return a_m_HAH;
320 case a_KHAH: return a_m_KHAH;
321 case a_DAL: return a_f_DAL; /* exception */
322 case a_THAL: return a_f_THAL; /* exception */
323 case a_REH: return a_f_REH; /* exception */
324 case a_ZAIN: return a_f_ZAIN; /* exception */
325 case a_SEEN: return a_m_SEEN;
326 case a_SHEEN: return a_m_SHEEN;
327 case a_SAD: return a_m_SAD;
328 case a_DAD: return a_m_DAD;
329 case a_TAH: return a_m_TAH;
330 case a_ZAH: return a_m_ZAH;
331 case a_AIN: return a_m_AIN;
332 case a_GHAIN: return a_m_GHAIN;
333 case a_TATWEEL: return cur_c; /* exception */
334 case a_FEH: return a_m_FEH;
335 case a_QAF: return a_m_QAF;
336 case a_KAF: return a_m_KAF;
337 case a_LAM: return a_m_LAM;
338 case a_MEEM: return a_m_MEEM;
339 case a_NOON: return a_m_NOON;
340 case a_HEH: return a_m_HEH;
341 case a_WAW: return a_f_WAW; /* exception */
342 case a_ALEF_MAKSURA: return a_f_ALEF_MAKSURA; /* exception */
343 case a_YEH: return a_m_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 }
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100345 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346}
347
348
349/*
350 * Change shape - from ISO-8859-6/Isolated to final
351 */
352 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100353chg_c_a2f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000355 /* NOTE: these encodings need to be accounted for
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100356 * a_f_ALEF_MADDA;
357 * a_f_ALEF_HAMZA_ABOVE;
358 * a_f_ALEF_HAMZA_BELOW;
359 * a_f_LAM_ALEF_MADDA_ABOVE;
360 * a_f_LAM_ALEF_HAMZA_ABOVE;
361 * a_f_LAM_ALEF_HAMZA_BELOW;
362 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000363 switch (cur_c)
364 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100365 case a_HAMZA: return a_s_HAMZA; /* exception */
366 case a_ALEF_MADDA: return a_f_ALEF_MADDA;
367 case a_ALEF_HAMZA_ABOVE: return a_f_ALEF_HAMZA_ABOVE;
368 case a_WAW_HAMZA: return a_f_WAW_HAMZA;
369 case a_ALEF_HAMZA_BELOW: return a_f_ALEF_HAMZA_BELOW;
370 case a_YEH_HAMZA: return a_f_YEH_HAMZA;
371 case a_ALEF: return a_f_ALEF;
372 case a_BEH: return a_f_BEH;
373 case a_TEH_MARBUTA: return a_f_TEH_MARBUTA;
374 case a_TEH: return a_f_TEH;
375 case a_THEH: return a_f_THEH;
376 case a_JEEM: return a_f_JEEM;
377 case a_HAH: return a_f_HAH;
378 case a_KHAH: return a_f_KHAH;
379 case a_DAL: return a_f_DAL;
380 case a_THAL: return a_f_THAL;
381 case a_REH: return a_f_REH;
382 case a_ZAIN: return a_f_ZAIN;
383 case a_SEEN: return a_f_SEEN;
384 case a_SHEEN: return a_f_SHEEN;
385 case a_SAD: return a_f_SAD;
386 case a_DAD: return a_f_DAD;
387 case a_TAH: return a_f_TAH;
388 case a_ZAH: return a_f_ZAH;
389 case a_AIN: return a_f_AIN;
390 case a_GHAIN: return a_f_GHAIN;
391 case a_TATWEEL: return cur_c; /* exception */
392 case a_FEH: return a_f_FEH;
393 case a_QAF: return a_f_QAF;
394 case a_KAF: return a_f_KAF;
395 case a_LAM: return a_f_LAM;
396 case a_MEEM: return a_f_MEEM;
397 case a_NOON: return a_f_NOON;
398 case a_HEH: return a_f_HEH;
399 case a_WAW: return a_f_WAW;
400 case a_ALEF_MAKSURA: return a_f_ALEF_MAKSURA;
401 case a_YEH: return a_f_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100403 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404}
405
406
407/*
408 * Change shape - from Initial to Medial
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100409 * This code is unreachable, because for the relevant characters ARABIC_CHAR()
410 * is FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000411 */
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100412#if 0
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100414chg_c_i2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000415{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000416 switch (cur_c)
417 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100418 case a_i_YEH_HAMZA: return a_m_YEH_HAMZA;
419 case a_i_BEH: return a_m_BEH;
420 case a_i_TEH: return a_m_TEH;
421 case a_i_THEH: return a_m_THEH;
422 case a_i_JEEM: return a_m_JEEM;
423 case a_i_HAH: return a_m_HAH;
424 case a_i_KHAH: return a_m_KHAH;
425 case a_i_SEEN: return a_m_SEEN;
426 case a_i_SHEEN: return a_m_SHEEN;
427 case a_i_SAD: return a_m_SAD;
428 case a_i_DAD: return a_m_DAD;
429 case a_i_TAH: return a_m_TAH;
430 case a_i_ZAH: return a_m_ZAH;
431 case a_i_AIN: return a_m_AIN;
432 case a_i_GHAIN: return a_m_GHAIN;
433 case a_i_FEH: return a_m_FEH;
434 case a_i_QAF: return a_m_QAF;
435 case a_i_KAF: return a_m_KAF;
436 case a_i_LAM: return a_m_LAM;
437 case a_i_MEEM: return a_m_MEEM;
438 case a_i_NOON: return a_m_NOON;
439 case a_i_HEH: return a_m_HEH;
440 case a_i_YEH: return a_m_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100442 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000443}
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100444#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445
446
447/*
448 * Change shape - from Final to Medial
449 */
450 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100451chg_c_f2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000452{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 switch (cur_c)
454 {
455 /* NOTE: these encodings are multi-positional, no ?
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100456 * case a_f_ALEF_MADDA:
457 * case a_f_ALEF_HAMZA_ABOVE:
458 * case a_f_ALEF_HAMZA_BELOW:
459 */
460 case a_f_YEH_HAMZA: return a_m_YEH_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461 case a_f_WAW_HAMZA: /* exceptions */
462 case a_f_ALEF:
463 case a_f_TEH_MARBUTA:
464 case a_f_DAL:
465 case a_f_THAL:
466 case a_f_REH:
467 case a_f_ZAIN:
468 case a_f_WAW:
469 case a_f_ALEF_MAKSURA:
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100470 return cur_c;
471 case a_f_BEH: return a_m_BEH;
472 case a_f_TEH: return a_m_TEH;
473 case a_f_THEH: return a_m_THEH;
474 case a_f_JEEM: return a_m_JEEM;
475 case a_f_HAH: return a_m_HAH;
476 case a_f_KHAH: return a_m_KHAH;
477 case a_f_SEEN: return a_m_SEEN;
478 case a_f_SHEEN: return a_m_SHEEN;
479 case a_f_SAD: return a_m_SAD;
480 case a_f_DAD: return a_m_DAD;
481 case a_f_TAH: return a_m_TAH;
482 case a_f_ZAH: return a_m_ZAH;
483 case a_f_AIN: return a_m_AIN;
484 case a_f_GHAIN: return a_m_GHAIN;
485 case a_f_FEH: return a_m_FEH;
486 case a_f_QAF: return a_m_QAF;
487 case a_f_KAF: return a_m_KAF;
488 case a_f_LAM: return a_m_LAM;
489 case a_f_MEEM: return a_m_MEEM;
490 case a_f_NOON: return a_m_NOON;
491 case a_f_HEH: return a_m_HEH;
492 case a_f_YEH: return a_m_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000493
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100494 /* NOTE: these encodings are multi-positional, no ?
495 * case a_f_LAM_ALEF_MADDA_ABOVE:
496 * case a_f_LAM_ALEF_HAMZA_ABOVE:
497 * case a_f_LAM_ALEF_HAMZA_BELOW:
498 * case a_f_LAM_ALEF:
499 */
500 }
501 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000502}
503
504
505/*
506 * Change shape - from Combination (2 char) to an Isolated
507 */
508 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100509chg_c_laa2i(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 switch (hid_c)
512 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100513 case a_ALEF_MADDA: return a_s_LAM_ALEF_MADDA_ABOVE;
514 case a_ALEF_HAMZA_ABOVE: return a_s_LAM_ALEF_HAMZA_ABOVE;
515 case a_ALEF_HAMZA_BELOW: return a_s_LAM_ALEF_HAMZA_BELOW;
516 case a_ALEF: return a_s_LAM_ALEF;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000517 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100518 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000519}
520
521
522/*
523 * Change shape - from Combination-Isolated to Final
524 */
525 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100526chg_c_laa2f(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000527{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000528 switch (hid_c)
529 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100530 case a_ALEF_MADDA: return a_f_LAM_ALEF_MADDA_ABOVE;
531 case a_ALEF_HAMZA_ABOVE: return a_f_LAM_ALEF_HAMZA_ABOVE;
532 case a_ALEF_HAMZA_BELOW: return a_f_LAM_ALEF_HAMZA_BELOW;
533 case a_ALEF: return a_f_LAM_ALEF;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000534 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100535 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000536}
537
538/*
539 * Do "half-shaping" on character "c". Return zero if no shaping.
540 */
541 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100542half_shape(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000543{
544 if (A_is_a(c))
545 return chg_c_a2i(c);
546 if (A_is_valid(c) && A_is_f(c))
547 return chg_c_f2m(c);
548 return 0;
549}
550
551/*
552 * Do Arabic shaping on character "c". Returns the shaped character.
553 * out: "ccp" points to the first byte of the character to be shaped.
554 * in/out: "c1p" points to the first composing char for "c".
555 * in: "prev_c" is the previous character (not shaped)
556 * in: "prev_c1" is the first composing char for the previous char
557 * (not shaped)
558 * in: "next_c" is the next character (not shaped).
559 */
560 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100561arabic_shape(
562 int c,
563 int *ccp,
564 int *c1p,
565 int prev_c,
566 int prev_c1,
567 int next_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000568{
569 int curr_c;
570 int shape_c;
571 int curr_laa;
572 int prev_laa;
573
574 /* Deal only with Arabic character, pass back all others */
575 if (!A_is_ok(c))
576 return c;
577
578 /* half-shape current and previous character */
579 shape_c = half_shape(prev_c);
580
Bram Moolenaar071d4272004-06-13 20:20:40 +0000581 curr_laa = A_firstc_laa(c, *c1p);
582 prev_laa = A_firstc_laa(prev_c, prev_c1);
583
584 if (curr_laa)
585 {
586 if (A_is_valid(prev_c) && !A_is_f(shape_c)
587 && !A_is_s(shape_c) && !prev_laa)
588 curr_c = chg_c_laa2f(curr_laa);
589 else
590 curr_c = chg_c_laa2i(curr_laa);
591
592 /* Remove the composing character */
593 *c1p = 0;
594 }
595 else if (!A_is_valid(prev_c) && A_is_valid(next_c))
596 curr_c = chg_c_a2i(c);
597 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
598 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
599 else if (A_is_valid(next_c))
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100600#if 0
Bram Moolenaar071d4272004-06-13 20:20:40 +0000601 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100602#else
603 curr_c = A_is_iso(c) ? chg_c_a2m(c) : 0;
604#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000605 else if (A_is_valid(prev_c))
606 curr_c = chg_c_a2f(c);
607 else
608 curr_c = chg_c_a2s(c);
609
610 /* Sanity check -- curr_c should, in the future, never be 0.
611 * We should, in the future, insert a fatal error here. */
612 if (curr_c == NUL)
613 curr_c = c;
614
615 if (curr_c != c && ccp != NULL)
616 {
Bram Moolenaar9a920d82012-06-01 15:21:02 +0200617 char_u buf[MB_MAXBYTES + 1];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000618
619 /* Update the first byte of the character. */
620 (*mb_char2bytes)(curr_c, buf);
621 *ccp = buf[0];
622 }
623
624 /* Return the shaped character */
625 return curr_c;
626}
627
628
629/*
630 * A_firstc_laa returns first character of LAA combination if it exists
631 */
632 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100633A_firstc_laa(
634 int c, /* base character */
635 int c1) /* first composing character */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000636{
637 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
638 return c1;
639 return 0;
640}
641
642
643/*
644 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
645 * (harakat/tanween)
646 */
647 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100648A_is_harakat(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000649{
650 return (c >= a_FATHATAN && c <= a_SUKUN);
651}
652
653
654/*
655 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
656 * (alphabet/number/punctuation)
657 */
658 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100659A_is_iso(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000660{
661 return ((c >= a_HAMZA && c <= a_GHAIN)
662 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
663 || c == a_MINI_ALEF);
664}
665
666
667/*
668 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
669 * (alphabet/number/punctuation)
670 */
671 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100672A_is_formb(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000673{
674 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
675 || c == a_s_KASRATAN
676 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
677 || c == a_BYTE_ORDER_MARK);
678}
679
680
681/*
682 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
683 */
684 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100685A_is_ok(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000686{
687 return (A_is_iso(c) || A_is_formb(c));
688}
689
690
691/*
692 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
693 * with some exceptions/exclusions
694 */
695 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100696A_is_valid(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000697{
698 return (A_is_ok(c) && !A_is_special(c));
699}
700
701
702/*
703 * A_is_special returns TRUE if 'c' is not a special Arabic character.
704 * Specials don't adhere to most of the rules.
705 */
706 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100707A_is_special(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000708{
709 return (c == a_HAMZA || c == a_s_HAMZA);
710}
Bram Moolenaar75464dc2016-07-02 20:27:50 +0200711
712#endif /* FEAT_ARABIC */