blob: 4ad2c6e89f97874f60e2162916b67cd2b515bcf9 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * arabic.c: functions for Arabic language
12 *
Bram Moolenaar071d4272004-06-13 20:20:40 +000013 * Author: Nadim Shaikli & Isam Bayazidi
Bram Moolenaar071d4272004-06-13 20:20:40 +000014 */
15
Bram Moolenaar75464dc2016-07-02 20:27:50 +020016#include "vim.h"
17
18#if defined(FEAT_ARABIC) || defined(PROTO)
19
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010020static int A_is_a(int cur_c);
21static int A_is_s(int cur_c);
22static int A_is_f(int cur_c);
23static int chg_c_a2s(int cur_c);
24static int chg_c_a2i(int cur_c);
25static int chg_c_a2m(int cur_c);
26static int chg_c_a2f(int cur_c);
27static int chg_c_i2m(int cur_c);
28static int chg_c_f2m(int cur_c);
29static int chg_c_laa2i(int hid_c);
30static int chg_c_laa2f(int hid_c);
31static int half_shape(int c);
32static int A_firstc_laa(int c1, int c);
33static int A_is_harakat(int c);
34static int A_is_iso(int c);
35static int A_is_formb(int c);
36static int A_is_ok(int c);
37static int A_is_valid(int c);
38static int A_is_special(int c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000039
40
41/*
42 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
43 */
44 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010045A_is_a(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000046{
47 switch (cur_c)
48 {
49 case a_HAMZA:
50 case a_ALEF_MADDA:
51 case a_ALEF_HAMZA_ABOVE:
52 case a_WAW_HAMZA:
53 case a_ALEF_HAMZA_BELOW:
54 case a_YEH_HAMZA:
55 case a_ALEF:
56 case a_BEH:
57 case a_TEH_MARBUTA:
58 case a_TEH:
59 case a_THEH:
60 case a_JEEM:
61 case a_HAH:
62 case a_KHAH:
63 case a_DAL:
64 case a_THAL:
65 case a_REH:
66 case a_ZAIN:
67 case a_SEEN:
68 case a_SHEEN:
69 case a_SAD:
70 case a_DAD:
71 case a_TAH:
72 case a_ZAH:
73 case a_AIN:
74 case a_GHAIN:
75 case a_TATWEEL:
76 case a_FEH:
77 case a_QAF:
78 case a_KAF:
79 case a_LAM:
80 case a_MEEM:
81 case a_NOON:
82 case a_HEH:
83 case a_WAW:
84 case a_ALEF_MAKSURA:
85 case a_YEH:
86 return TRUE;
87 }
88
89 return FALSE;
90}
91
92
93/*
94 * Returns True if c is an Isolated Form-B ARABIC letter
95 */
96 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010097A_is_s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000098{
99 switch (cur_c)
100 {
101 case a_s_HAMZA:
102 case a_s_ALEF_MADDA:
103 case a_s_ALEF_HAMZA_ABOVE:
104 case a_s_WAW_HAMZA:
105 case a_s_ALEF_HAMZA_BELOW:
106 case a_s_YEH_HAMZA:
107 case a_s_ALEF:
108 case a_s_BEH:
109 case a_s_TEH_MARBUTA:
110 case a_s_TEH:
111 case a_s_THEH:
112 case a_s_JEEM:
113 case a_s_HAH:
114 case a_s_KHAH:
115 case a_s_DAL:
116 case a_s_THAL:
117 case a_s_REH:
118 case a_s_ZAIN:
119 case a_s_SEEN:
120 case a_s_SHEEN:
121 case a_s_SAD:
122 case a_s_DAD:
123 case a_s_TAH:
124 case a_s_ZAH:
125 case a_s_AIN:
126 case a_s_GHAIN:
127 case a_s_FEH:
128 case a_s_QAF:
129 case a_s_KAF:
130 case a_s_LAM:
131 case a_s_MEEM:
132 case a_s_NOON:
133 case a_s_HEH:
134 case a_s_WAW:
135 case a_s_ALEF_MAKSURA:
136 case a_s_YEH:
137 return TRUE;
138 }
139
140 return FALSE;
141}
142
143
144/*
145 * Returns True if c is a Final shape of an ARABIC letter
146 */
147 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100148A_is_f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000149{
150 switch (cur_c)
151 {
152 case a_f_ALEF_MADDA:
153 case a_f_ALEF_HAMZA_ABOVE:
154 case a_f_WAW_HAMZA:
155 case a_f_ALEF_HAMZA_BELOW:
156 case a_f_YEH_HAMZA:
157 case a_f_ALEF:
158 case a_f_BEH:
159 case a_f_TEH_MARBUTA:
160 case a_f_TEH:
161 case a_f_THEH:
162 case a_f_JEEM:
163 case a_f_HAH:
164 case a_f_KHAH:
165 case a_f_DAL:
166 case a_f_THAL:
167 case a_f_REH:
168 case a_f_ZAIN:
169 case a_f_SEEN:
170 case a_f_SHEEN:
171 case a_f_SAD:
172 case a_f_DAD:
173 case a_f_TAH:
174 case a_f_ZAH:
175 case a_f_AIN:
176 case a_f_GHAIN:
177 case a_f_FEH:
178 case a_f_QAF:
179 case a_f_KAF:
180 case a_f_LAM:
181 case a_f_MEEM:
182 case a_f_NOON:
183 case a_f_HEH:
184 case a_f_WAW:
185 case a_f_ALEF_MAKSURA:
186 case a_f_YEH:
187 case a_f_LAM_ALEF_MADDA_ABOVE:
188 case a_f_LAM_ALEF_HAMZA_ABOVE:
189 case a_f_LAM_ALEF_HAMZA_BELOW:
190 case a_f_LAM_ALEF:
191 return TRUE;
192 }
193 return FALSE;
194}
195
196
197/*
198 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
199 */
200 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100201chg_c_a2s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000202{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000203 switch (cur_c)
204 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100205 case a_HAMZA: return a_s_HAMZA;
206 case a_ALEF_MADDA: return a_s_ALEF_MADDA;
207 case a_ALEF_HAMZA_ABOVE: return a_s_ALEF_HAMZA_ABOVE;
208 case a_WAW_HAMZA: return a_s_WAW_HAMZA;
209 case a_ALEF_HAMZA_BELOW: return a_s_ALEF_HAMZA_BELOW;
210 case a_YEH_HAMZA: return a_s_YEH_HAMZA;
211 case a_ALEF: return a_s_ALEF;
212 case a_TEH_MARBUTA: return a_s_TEH_MARBUTA;
213 case a_DAL: return a_s_DAL;
214 case a_THAL: return a_s_THAL;
215 case a_REH: return a_s_REH;
216 case a_ZAIN: return a_s_ZAIN;
217 case a_TATWEEL: return cur_c; /* exceptions */
218 case a_WAW: return a_s_WAW;
219 case a_ALEF_MAKSURA: return a_s_ALEF_MAKSURA;
220 case a_BEH: return a_s_BEH;
221 case a_TEH: return a_s_TEH;
222 case a_THEH: return a_s_THEH;
223 case a_JEEM: return a_s_JEEM;
224 case a_HAH: return a_s_HAH;
225 case a_KHAH: return a_s_KHAH;
226 case a_SEEN: return a_s_SEEN;
227 case a_SHEEN: return a_s_SHEEN;
228 case a_SAD: return a_s_SAD;
229 case a_DAD: return a_s_DAD;
230 case a_TAH: return a_s_TAH;
231 case a_ZAH: return a_s_ZAH;
232 case a_AIN: return a_s_AIN;
233 case a_GHAIN: return a_s_GHAIN;
234 case a_FEH: return a_s_FEH;
235 case a_QAF: return a_s_QAF;
236 case a_KAF: return a_s_KAF;
237 case a_LAM: return a_s_LAM;
238 case a_MEEM: return a_s_MEEM;
239 case a_NOON: return a_s_NOON;
240 case a_HEH: return a_s_HEH;
241 case a_YEH: return a_s_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000242 }
243
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100244 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000245}
246
247
248/*
249 * Change shape - from ISO-8859-6/Isolated to Initial
250 */
251 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100252chg_c_a2i(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 switch (cur_c)
255 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100256 case a_YEH_HAMZA: return a_i_YEH_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000257 case a_HAMZA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100258 return a_s_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000259 case a_ALEF_MADDA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100260 return a_s_ALEF_MADDA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000261 case a_ALEF_HAMZA_ABOVE: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100262 return a_s_ALEF_HAMZA_ABOVE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000263 case a_WAW_HAMZA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100264 return a_s_WAW_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000265 case a_ALEF_HAMZA_BELOW: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100266 return a_s_ALEF_HAMZA_BELOW;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 case a_ALEF: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100268 return a_s_ALEF;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000269 case a_TEH_MARBUTA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100270 return a_s_TEH_MARBUTA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000271 case a_DAL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100272 return a_s_DAL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000273 case a_THAL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100274 return a_s_THAL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000275 case a_REH: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100276 return a_s_REH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000277 case a_ZAIN: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100278 return a_s_ZAIN;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000279 case a_TATWEEL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100280 return cur_c;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000281 case a_WAW: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100282 return a_s_WAW;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283 case a_ALEF_MAKSURA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100284 return a_s_ALEF_MAKSURA;
285 case a_BEH: return a_i_BEH;
286 case a_TEH: return a_i_TEH;
287 case a_THEH: return a_i_THEH;
288 case a_JEEM: return a_i_JEEM;
289 case a_HAH: return a_i_HAH;
290 case a_KHAH: return a_i_KHAH;
291 case a_SEEN: return a_i_SEEN;
292 case a_SHEEN: return a_i_SHEEN;
293 case a_SAD: return a_i_SAD;
294 case a_DAD: return a_i_DAD;
295 case a_TAH: return a_i_TAH;
296 case a_ZAH: return a_i_ZAH;
297 case a_AIN: return a_i_AIN;
298 case a_GHAIN: return a_i_GHAIN;
299 case a_FEH: return a_i_FEH;
300 case a_QAF: return a_i_QAF;
301 case a_KAF: return a_i_KAF;
302 case a_LAM: return a_i_LAM;
303 case a_MEEM: return a_i_MEEM;
304 case a_NOON: return a_i_NOON;
305 case a_HEH: return a_i_HEH;
306 case a_YEH: return a_i_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000307 }
308
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100309 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000310}
311
312
313/*
314 * Change shape - from ISO-8859-6/Isolated to Medial
315 */
316 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100317chg_c_a2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000318{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000319 switch (cur_c)
320 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100321 case a_HAMZA: return a_s_HAMZA; /* exception */
322 case a_ALEF_MADDA: return a_f_ALEF_MADDA; /* exception */
323 case a_ALEF_HAMZA_ABOVE: return a_f_ALEF_HAMZA_ABOVE; /* exception */
324 case a_WAW_HAMZA: return a_f_WAW_HAMZA; /* exception */
325 case a_ALEF_HAMZA_BELOW: return a_f_ALEF_HAMZA_BELOW; /* exception */
326 case a_YEH_HAMZA: return a_m_YEH_HAMZA;
327 case a_ALEF: return a_f_ALEF; /* exception */
328 case a_BEH: return a_m_BEH;
329 case a_TEH_MARBUTA: return a_f_TEH_MARBUTA; /* exception */
330 case a_TEH: return a_m_TEH;
331 case a_THEH: return a_m_THEH;
332 case a_JEEM: return a_m_JEEM;
333 case a_HAH: return a_m_HAH;
334 case a_KHAH: return a_m_KHAH;
335 case a_DAL: return a_f_DAL; /* exception */
336 case a_THAL: return a_f_THAL; /* exception */
337 case a_REH: return a_f_REH; /* exception */
338 case a_ZAIN: return a_f_ZAIN; /* exception */
339 case a_SEEN: return a_m_SEEN;
340 case a_SHEEN: return a_m_SHEEN;
341 case a_SAD: return a_m_SAD;
342 case a_DAD: return a_m_DAD;
343 case a_TAH: return a_m_TAH;
344 case a_ZAH: return a_m_ZAH;
345 case a_AIN: return a_m_AIN;
346 case a_GHAIN: return a_m_GHAIN;
347 case a_TATWEEL: return cur_c; /* exception */
348 case a_FEH: return a_m_FEH;
349 case a_QAF: return a_m_QAF;
350 case a_KAF: return a_m_KAF;
351 case a_LAM: return a_m_LAM;
352 case a_MEEM: return a_m_MEEM;
353 case a_NOON: return a_m_NOON;
354 case a_HEH: return a_m_HEH;
355 case a_WAW: return a_f_WAW; /* exception */
356 case a_ALEF_MAKSURA: return a_f_ALEF_MAKSURA; /* exception */
357 case a_YEH: return a_m_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000358 }
359
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100360 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000361}
362
363
364/*
365 * Change shape - from ISO-8859-6/Isolated to final
366 */
367 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100368chg_c_a2f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000369{
370 int tempc;
371
372 /* NOTE: these encodings need to be accounted for
373
374 a_f_ALEF_MADDA;
375 a_f_ALEF_HAMZA_ABOVE;
376 a_f_ALEF_HAMZA_BELOW;
377 a_f_LAM_ALEF_MADDA_ABOVE;
378 a_f_LAM_ALEF_HAMZA_ABOVE;
379 a_f_LAM_ALEF_HAMZA_BELOW;
380 */
381
382 switch (cur_c)
383 {
384 case a_HAMZA: /* exception */
385 tempc = a_s_HAMZA;
386 break;
387 case a_ALEF_MADDA:
388 tempc = a_f_ALEF_MADDA;
389 break;
390 case a_ALEF_HAMZA_ABOVE:
391 tempc = a_f_ALEF_HAMZA_ABOVE;
392 break;
393 case a_WAW_HAMZA:
394 tempc = a_f_WAW_HAMZA;
395 break;
396 case a_ALEF_HAMZA_BELOW:
397 tempc = a_f_ALEF_HAMZA_BELOW;
398 break;
399 case a_YEH_HAMZA:
400 tempc = a_f_YEH_HAMZA;
401 break;
402 case a_ALEF:
403 tempc = a_f_ALEF;
404 break;
405 case a_BEH:
406 tempc = a_f_BEH;
407 break;
408 case a_TEH_MARBUTA:
409 tempc = a_f_TEH_MARBUTA;
410 break;
411 case a_TEH:
412 tempc = a_f_TEH;
413 break;
414 case a_THEH:
415 tempc = a_f_THEH;
416 break;
417 case a_JEEM:
418 tempc = a_f_JEEM;
419 break;
420 case a_HAH:
421 tempc = a_f_HAH;
422 break;
423 case a_KHAH:
424 tempc = a_f_KHAH;
425 break;
426 case a_DAL:
427 tempc = a_f_DAL;
428 break;
429 case a_THAL:
430 tempc = a_f_THAL;
431 break;
432 case a_REH:
433 tempc = a_f_REH;
434 break;
435 case a_ZAIN:
436 tempc = a_f_ZAIN;
437 break;
438 case a_SEEN:
439 tempc = a_f_SEEN;
440 break;
441 case a_SHEEN:
442 tempc = a_f_SHEEN;
443 break;
444 case a_SAD:
445 tempc = a_f_SAD;
446 break;
447 case a_DAD:
448 tempc = a_f_DAD;
449 break;
450 case a_TAH:
451 tempc = a_f_TAH;
452 break;
453 case a_ZAH:
454 tempc = a_f_ZAH;
455 break;
456 case a_AIN:
457 tempc = a_f_AIN;
458 break;
459 case a_GHAIN:
460 tempc = a_f_GHAIN;
461 break;
462 case a_TATWEEL: /* exception */
463 tempc = cur_c;
464 break;
465 case a_FEH:
466 tempc = a_f_FEH;
467 break;
468 case a_QAF:
469 tempc = a_f_QAF;
470 break;
471 case a_KAF:
472 tempc = a_f_KAF;
473 break;
474 case a_LAM:
475 tempc = a_f_LAM;
476 break;
477 case a_MEEM:
478 tempc = a_f_MEEM;
479 break;
480 case a_NOON:
481 tempc = a_f_NOON;
482 break;
483 case a_HEH:
484 tempc = a_f_HEH;
485 break;
486 case a_WAW:
487 tempc = a_f_WAW;
488 break;
489 case a_ALEF_MAKSURA:
490 tempc = a_f_ALEF_MAKSURA;
491 break;
492 case a_YEH:
493 tempc = a_f_YEH;
494 break;
495 default:
496 tempc = 0;
497 }
498
499 return tempc;
500}
501
502
503/*
504 * Change shape - from Initial to Medial
505 */
506 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100507chg_c_i2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508{
509 int tempc;
510
511 switch (cur_c)
512 {
513 case a_i_YEH_HAMZA:
514 tempc = a_m_YEH_HAMZA;
515 break;
516 case a_i_BEH:
517 tempc = a_m_BEH;
518 break;
519 case a_i_TEH:
520 tempc = a_m_TEH;
521 break;
522 case a_i_THEH:
523 tempc = a_m_THEH;
524 break;
525 case a_i_JEEM:
526 tempc = a_m_JEEM;
527 break;
528 case a_i_HAH:
529 tempc = a_m_HAH;
530 break;
531 case a_i_KHAH:
532 tempc = a_m_KHAH;
533 break;
534 case a_i_SEEN:
535 tempc = a_m_SEEN;
536 break;
537 case a_i_SHEEN:
538 tempc = a_m_SHEEN;
539 break;
540 case a_i_SAD:
541 tempc = a_m_SAD;
542 break;
543 case a_i_DAD:
544 tempc = a_m_DAD;
545 break;
546 case a_i_TAH:
547 tempc = a_m_TAH;
548 break;
549 case a_i_ZAH:
550 tempc = a_m_ZAH;
551 break;
552 case a_i_AIN:
553 tempc = a_m_AIN;
554 break;
555 case a_i_GHAIN:
556 tempc = a_m_GHAIN;
557 break;
558 case a_i_FEH:
559 tempc = a_m_FEH;
560 break;
561 case a_i_QAF:
562 tempc = a_m_QAF;
563 break;
564 case a_i_KAF:
565 tempc = a_m_KAF;
566 break;
567 case a_i_LAM:
568 tempc = a_m_LAM;
569 break;
570 case a_i_MEEM:
571 tempc = a_m_MEEM;
572 break;
573 case a_i_NOON:
574 tempc = a_m_NOON;
575 break;
576 case a_i_HEH:
577 tempc = a_m_HEH;
578 break;
579 case a_i_YEH:
580 tempc = a_m_YEH;
581 break;
582 default:
583 tempc = 0;
584 }
585
586 return tempc;
587}
588
589
590/*
591 * Change shape - from Final to Medial
592 */
593 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100594chg_c_f2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595{
596 int tempc;
597
598 switch (cur_c)
599 {
600 /* NOTE: these encodings are multi-positional, no ?
601 case a_f_ALEF_MADDA:
602 case a_f_ALEF_HAMZA_ABOVE:
603 case a_f_ALEF_HAMZA_BELOW:
604 */
605 case a_f_YEH_HAMZA:
606 tempc = a_m_YEH_HAMZA;
607 break;
608 case a_f_WAW_HAMZA: /* exceptions */
609 case a_f_ALEF:
610 case a_f_TEH_MARBUTA:
611 case a_f_DAL:
612 case a_f_THAL:
613 case a_f_REH:
614 case a_f_ZAIN:
615 case a_f_WAW:
616 case a_f_ALEF_MAKSURA:
617 tempc = cur_c;
618 break;
619 case a_f_BEH:
620 tempc = a_m_BEH;
621 break;
622 case a_f_TEH:
623 tempc = a_m_TEH;
624 break;
625 case a_f_THEH:
626 tempc = a_m_THEH;
627 break;
628 case a_f_JEEM:
629 tempc = a_m_JEEM;
630 break;
631 case a_f_HAH:
632 tempc = a_m_HAH;
633 break;
634 case a_f_KHAH:
635 tempc = a_m_KHAH;
636 break;
637 case a_f_SEEN:
638 tempc = a_m_SEEN;
639 break;
640 case a_f_SHEEN:
641 tempc = a_m_SHEEN;
642 break;
643 case a_f_SAD:
644 tempc = a_m_SAD;
645 break;
646 case a_f_DAD:
647 tempc = a_m_DAD;
648 break;
649 case a_f_TAH:
650 tempc = a_m_TAH;
651 break;
652 case a_f_ZAH:
653 tempc = a_m_ZAH;
654 break;
655 case a_f_AIN:
656 tempc = a_m_AIN;
657 break;
658 case a_f_GHAIN:
659 tempc = a_m_GHAIN;
660 break;
661 case a_f_FEH:
662 tempc = a_m_FEH;
663 break;
664 case a_f_QAF:
665 tempc = a_m_QAF;
666 break;
667 case a_f_KAF:
668 tempc = a_m_KAF;
669 break;
670 case a_f_LAM:
671 tempc = a_m_LAM;
672 break;
673 case a_f_MEEM:
674 tempc = a_m_MEEM;
675 break;
676 case a_f_NOON:
677 tempc = a_m_NOON;
678 break;
679 case a_f_HEH:
680 tempc = a_m_HEH;
681 break;
682 case a_f_YEH:
683 tempc = a_m_YEH;
684 break;
685 /* NOTE: these encodings are multi-positional, no ?
686 case a_f_LAM_ALEF_MADDA_ABOVE:
687 case a_f_LAM_ALEF_HAMZA_ABOVE:
688 case a_f_LAM_ALEF_HAMZA_BELOW:
689 case a_f_LAM_ALEF:
690 */
691 default:
692 tempc = 0;
693 }
694
695 return tempc;
696}
697
698
699/*
700 * Change shape - from Combination (2 char) to an Isolated
701 */
702 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100703chg_c_laa2i(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000704{
705 int tempc;
706
707 switch (hid_c)
708 {
709 case a_ALEF_MADDA:
710 tempc = a_s_LAM_ALEF_MADDA_ABOVE;
711 break;
712 case a_ALEF_HAMZA_ABOVE:
713 tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
714 break;
715 case a_ALEF_HAMZA_BELOW:
716 tempc = a_s_LAM_ALEF_HAMZA_BELOW;
717 break;
718 case a_ALEF:
719 tempc = a_s_LAM_ALEF;
720 break;
721 default:
722 tempc = 0;
723 }
724
725 return tempc;
726}
727
728
729/*
730 * Change shape - from Combination-Isolated to Final
731 */
732 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100733chg_c_laa2f(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734{
735 int tempc;
736
737 switch (hid_c)
738 {
739 case a_ALEF_MADDA:
740 tempc = a_f_LAM_ALEF_MADDA_ABOVE;
741 break;
742 case a_ALEF_HAMZA_ABOVE:
743 tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
744 break;
745 case a_ALEF_HAMZA_BELOW:
746 tempc = a_f_LAM_ALEF_HAMZA_BELOW;
747 break;
748 case a_ALEF:
749 tempc = a_f_LAM_ALEF;
750 break;
751 default:
752 tempc = 0;
753 }
754
755 return tempc;
756}
757
758/*
759 * Do "half-shaping" on character "c". Return zero if no shaping.
760 */
761 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100762half_shape(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763{
764 if (A_is_a(c))
765 return chg_c_a2i(c);
766 if (A_is_valid(c) && A_is_f(c))
767 return chg_c_f2m(c);
768 return 0;
769}
770
771/*
772 * Do Arabic shaping on character "c". Returns the shaped character.
773 * out: "ccp" points to the first byte of the character to be shaped.
774 * in/out: "c1p" points to the first composing char for "c".
775 * in: "prev_c" is the previous character (not shaped)
776 * in: "prev_c1" is the first composing char for the previous char
777 * (not shaped)
778 * in: "next_c" is the next character (not shaped).
779 */
780 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100781arabic_shape(
782 int c,
783 int *ccp,
784 int *c1p,
785 int prev_c,
786 int prev_c1,
787 int next_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000788{
789 int curr_c;
790 int shape_c;
791 int curr_laa;
792 int prev_laa;
793
794 /* Deal only with Arabic character, pass back all others */
795 if (!A_is_ok(c))
796 return c;
797
798 /* half-shape current and previous character */
799 shape_c = half_shape(prev_c);
800
801 /* Save away current character */
802 curr_c = c;
803
804 curr_laa = A_firstc_laa(c, *c1p);
805 prev_laa = A_firstc_laa(prev_c, prev_c1);
806
807 if (curr_laa)
808 {
809 if (A_is_valid(prev_c) && !A_is_f(shape_c)
810 && !A_is_s(shape_c) && !prev_laa)
811 curr_c = chg_c_laa2f(curr_laa);
812 else
813 curr_c = chg_c_laa2i(curr_laa);
814
815 /* Remove the composing character */
816 *c1p = 0;
817 }
818 else if (!A_is_valid(prev_c) && A_is_valid(next_c))
819 curr_c = chg_c_a2i(c);
820 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
821 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
822 else if (A_is_valid(next_c))
823 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
824 else if (A_is_valid(prev_c))
825 curr_c = chg_c_a2f(c);
826 else
827 curr_c = chg_c_a2s(c);
828
829 /* Sanity check -- curr_c should, in the future, never be 0.
830 * We should, in the future, insert a fatal error here. */
831 if (curr_c == NUL)
832 curr_c = c;
833
834 if (curr_c != c && ccp != NULL)
835 {
Bram Moolenaar9a920d82012-06-01 15:21:02 +0200836 char_u buf[MB_MAXBYTES + 1];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000837
838 /* Update the first byte of the character. */
839 (*mb_char2bytes)(curr_c, buf);
840 *ccp = buf[0];
841 }
842
843 /* Return the shaped character */
844 return curr_c;
845}
846
847
848/*
849 * A_firstc_laa returns first character of LAA combination if it exists
850 */
851 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100852A_firstc_laa(
853 int c, /* base character */
854 int c1) /* first composing character */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855{
856 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
857 return c1;
858 return 0;
859}
860
861
862/*
863 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
864 * (harakat/tanween)
865 */
866 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100867A_is_harakat(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000868{
869 return (c >= a_FATHATAN && c <= a_SUKUN);
870}
871
872
873/*
874 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
875 * (alphabet/number/punctuation)
876 */
877 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100878A_is_iso(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879{
880 return ((c >= a_HAMZA && c <= a_GHAIN)
881 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
882 || c == a_MINI_ALEF);
883}
884
885
886/*
887 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
888 * (alphabet/number/punctuation)
889 */
890 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100891A_is_formb(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000892{
893 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
894 || c == a_s_KASRATAN
895 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
896 || c == a_BYTE_ORDER_MARK);
897}
898
899
900/*
901 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
902 */
903 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100904A_is_ok(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000905{
906 return (A_is_iso(c) || A_is_formb(c));
907}
908
909
910/*
911 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
912 * with some exceptions/exclusions
913 */
914 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100915A_is_valid(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000916{
917 return (A_is_ok(c) && !A_is_special(c));
918}
919
920
921/*
922 * A_is_special returns TRUE if 'c' is not a special Arabic character.
923 * Specials don't adhere to most of the rules.
924 */
925 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100926A_is_special(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000927{
928 return (c == a_HAMZA || c == a_s_HAMZA);
929}
Bram Moolenaar75464dc2016-07-02 20:27:50 +0200930
931#endif /* FEAT_ARABIC */