blob: 1c3059839207142ea6e4e1ced6ac2c4f86d86d48 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * arabic.c: functions for Arabic language
12 *
13 * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
14 *
15 * --
16 *
17 * Author: Nadim Shaikli & Isam Bayazidi
18 *
19 */
20
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010021static int A_is_a(int cur_c);
22static int A_is_s(int cur_c);
23static int A_is_f(int cur_c);
24static int chg_c_a2s(int cur_c);
25static int chg_c_a2i(int cur_c);
26static int chg_c_a2m(int cur_c);
27static int chg_c_a2f(int cur_c);
28static int chg_c_i2m(int cur_c);
29static int chg_c_f2m(int cur_c);
30static int chg_c_laa2i(int hid_c);
31static int chg_c_laa2f(int hid_c);
32static int half_shape(int c);
33static int A_firstc_laa(int c1, int c);
34static int A_is_harakat(int c);
35static int A_is_iso(int c);
36static int A_is_formb(int c);
37static int A_is_ok(int c);
38static int A_is_valid(int c);
39static int A_is_special(int c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000040
41
42/*
43 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
44 */
45 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010046A_is_a(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 switch (cur_c)
49 {
50 case a_HAMZA:
51 case a_ALEF_MADDA:
52 case a_ALEF_HAMZA_ABOVE:
53 case a_WAW_HAMZA:
54 case a_ALEF_HAMZA_BELOW:
55 case a_YEH_HAMZA:
56 case a_ALEF:
57 case a_BEH:
58 case a_TEH_MARBUTA:
59 case a_TEH:
60 case a_THEH:
61 case a_JEEM:
62 case a_HAH:
63 case a_KHAH:
64 case a_DAL:
65 case a_THAL:
66 case a_REH:
67 case a_ZAIN:
68 case a_SEEN:
69 case a_SHEEN:
70 case a_SAD:
71 case a_DAD:
72 case a_TAH:
73 case a_ZAH:
74 case a_AIN:
75 case a_GHAIN:
76 case a_TATWEEL:
77 case a_FEH:
78 case a_QAF:
79 case a_KAF:
80 case a_LAM:
81 case a_MEEM:
82 case a_NOON:
83 case a_HEH:
84 case a_WAW:
85 case a_ALEF_MAKSURA:
86 case a_YEH:
87 return TRUE;
88 }
89
90 return FALSE;
91}
92
93
94/*
95 * Returns True if c is an Isolated Form-B ARABIC letter
96 */
97 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010098A_is_s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000099{
100 switch (cur_c)
101 {
102 case a_s_HAMZA:
103 case a_s_ALEF_MADDA:
104 case a_s_ALEF_HAMZA_ABOVE:
105 case a_s_WAW_HAMZA:
106 case a_s_ALEF_HAMZA_BELOW:
107 case a_s_YEH_HAMZA:
108 case a_s_ALEF:
109 case a_s_BEH:
110 case a_s_TEH_MARBUTA:
111 case a_s_TEH:
112 case a_s_THEH:
113 case a_s_JEEM:
114 case a_s_HAH:
115 case a_s_KHAH:
116 case a_s_DAL:
117 case a_s_THAL:
118 case a_s_REH:
119 case a_s_ZAIN:
120 case a_s_SEEN:
121 case a_s_SHEEN:
122 case a_s_SAD:
123 case a_s_DAD:
124 case a_s_TAH:
125 case a_s_ZAH:
126 case a_s_AIN:
127 case a_s_GHAIN:
128 case a_s_FEH:
129 case a_s_QAF:
130 case a_s_KAF:
131 case a_s_LAM:
132 case a_s_MEEM:
133 case a_s_NOON:
134 case a_s_HEH:
135 case a_s_WAW:
136 case a_s_ALEF_MAKSURA:
137 case a_s_YEH:
138 return TRUE;
139 }
140
141 return FALSE;
142}
143
144
145/*
146 * Returns True if c is a Final shape of an ARABIC letter
147 */
148 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100149A_is_f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150{
151 switch (cur_c)
152 {
153 case a_f_ALEF_MADDA:
154 case a_f_ALEF_HAMZA_ABOVE:
155 case a_f_WAW_HAMZA:
156 case a_f_ALEF_HAMZA_BELOW:
157 case a_f_YEH_HAMZA:
158 case a_f_ALEF:
159 case a_f_BEH:
160 case a_f_TEH_MARBUTA:
161 case a_f_TEH:
162 case a_f_THEH:
163 case a_f_JEEM:
164 case a_f_HAH:
165 case a_f_KHAH:
166 case a_f_DAL:
167 case a_f_THAL:
168 case a_f_REH:
169 case a_f_ZAIN:
170 case a_f_SEEN:
171 case a_f_SHEEN:
172 case a_f_SAD:
173 case a_f_DAD:
174 case a_f_TAH:
175 case a_f_ZAH:
176 case a_f_AIN:
177 case a_f_GHAIN:
178 case a_f_FEH:
179 case a_f_QAF:
180 case a_f_KAF:
181 case a_f_LAM:
182 case a_f_MEEM:
183 case a_f_NOON:
184 case a_f_HEH:
185 case a_f_WAW:
186 case a_f_ALEF_MAKSURA:
187 case a_f_YEH:
188 case a_f_LAM_ALEF_MADDA_ABOVE:
189 case a_f_LAM_ALEF_HAMZA_ABOVE:
190 case a_f_LAM_ALEF_HAMZA_BELOW:
191 case a_f_LAM_ALEF:
192 return TRUE;
193 }
194 return FALSE;
195}
196
197
198/*
199 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
200 */
201 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100202chg_c_a2s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000203{
204 int tempc;
205
206 switch (cur_c)
207 {
208 case a_HAMZA:
209 tempc = a_s_HAMZA;
210 break;
211 case a_ALEF_MADDA:
212 tempc = a_s_ALEF_MADDA;
213 break;
214 case a_ALEF_HAMZA_ABOVE:
215 tempc = a_s_ALEF_HAMZA_ABOVE;
216 break;
217 case a_WAW_HAMZA:
218 tempc = a_s_WAW_HAMZA;
219 break;
220 case a_ALEF_HAMZA_BELOW:
221 tempc = a_s_ALEF_HAMZA_BELOW;
222 break;
223 case a_YEH_HAMZA:
224 tempc = a_s_YEH_HAMZA;
225 break;
226 case a_ALEF:
227 tempc = a_s_ALEF;
228 break;
229 case a_TEH_MARBUTA:
230 tempc = a_s_TEH_MARBUTA;
231 break;
232 case a_DAL:
233 tempc = a_s_DAL;
234 break;
235 case a_THAL:
236 tempc = a_s_THAL;
237 break;
238 case a_REH:
239 tempc = a_s_REH;
240 break;
241 case a_ZAIN:
242 tempc = a_s_ZAIN;
243 break;
244 case a_TATWEEL: /* exceptions */
245 tempc = cur_c;
246 break;
247 case a_WAW:
248 tempc = a_s_WAW;
249 break;
250 case a_ALEF_MAKSURA:
251 tempc = a_s_ALEF_MAKSURA;
252 break;
253 case a_BEH:
254 tempc = a_s_BEH;
255 break;
256 case a_TEH:
257 tempc = a_s_TEH;
258 break;
259 case a_THEH:
260 tempc = a_s_THEH;
261 break;
262 case a_JEEM:
263 tempc = a_s_JEEM;
264 break;
265 case a_HAH:
266 tempc = a_s_HAH;
267 break;
268 case a_KHAH:
269 tempc = a_s_KHAH;
270 break;
271 case a_SEEN:
272 tempc = a_s_SEEN;
273 break;
274 case a_SHEEN:
275 tempc = a_s_SHEEN;
276 break;
277 case a_SAD:
278 tempc = a_s_SAD;
279 break;
280 case a_DAD:
281 tempc = a_s_DAD;
282 break;
283 case a_TAH:
284 tempc = a_s_TAH;
285 break;
286 case a_ZAH:
287 tempc = a_s_ZAH;
288 break;
289 case a_AIN:
290 tempc = a_s_AIN;
291 break;
292 case a_GHAIN:
293 tempc = a_s_GHAIN;
294 break;
295 case a_FEH:
296 tempc = a_s_FEH;
297 break;
298 case a_QAF:
299 tempc = a_s_QAF;
300 break;
301 case a_KAF:
302 tempc = a_s_KAF;
303 break;
304 case a_LAM:
305 tempc = a_s_LAM;
306 break;
307 case a_MEEM:
308 tempc = a_s_MEEM;
309 break;
310 case a_NOON:
311 tempc = a_s_NOON;
312 break;
313 case a_HEH:
314 tempc = a_s_HEH;
315 break;
316 case a_YEH:
317 tempc = a_s_YEH;
318 break;
319 default:
320 tempc = 0;
321 }
322
323 return tempc;
324}
325
326
327/*
328 * Change shape - from ISO-8859-6/Isolated to Initial
329 */
330 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100331chg_c_a2i(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332{
333 int tempc;
334
335 switch (cur_c)
336 {
337 case a_YEH_HAMZA:
338 tempc = a_i_YEH_HAMZA;
339 break;
340 case a_HAMZA: /* exceptions */
341 tempc = a_s_HAMZA;
342 break;
343 case a_ALEF_MADDA: /* exceptions */
344 tempc = a_s_ALEF_MADDA;
345 break;
346 case a_ALEF_HAMZA_ABOVE: /* exceptions */
347 tempc = a_s_ALEF_HAMZA_ABOVE;
348 break;
349 case a_WAW_HAMZA: /* exceptions */
350 tempc = a_s_WAW_HAMZA;
351 break;
352 case a_ALEF_HAMZA_BELOW: /* exceptions */
353 tempc = a_s_ALEF_HAMZA_BELOW;
354 break;
355 case a_ALEF: /* exceptions */
356 tempc = a_s_ALEF;
357 break;
358 case a_TEH_MARBUTA: /* exceptions */
359 tempc = a_s_TEH_MARBUTA;
360 break;
361 case a_DAL: /* exceptions */
362 tempc = a_s_DAL;
363 break;
364 case a_THAL: /* exceptions */
365 tempc = a_s_THAL;
366 break;
367 case a_REH: /* exceptions */
368 tempc = a_s_REH;
369 break;
370 case a_ZAIN: /* exceptions */
371 tempc = a_s_ZAIN;
372 break;
373 case a_TATWEEL: /* exceptions */
374 tempc = cur_c;
375 break;
376 case a_WAW: /* exceptions */
377 tempc = a_s_WAW;
378 break;
379 case a_ALEF_MAKSURA: /* exceptions */
380 tempc = a_s_ALEF_MAKSURA;
381 break;
382 case a_BEH:
383 tempc = a_i_BEH;
384 break;
385 case a_TEH:
386 tempc = a_i_TEH;
387 break;
388 case a_THEH:
389 tempc = a_i_THEH;
390 break;
391 case a_JEEM:
392 tempc = a_i_JEEM;
393 break;
394 case a_HAH:
395 tempc = a_i_HAH;
396 break;
397 case a_KHAH:
398 tempc = a_i_KHAH;
399 break;
400 case a_SEEN:
401 tempc = a_i_SEEN;
402 break;
403 case a_SHEEN:
404 tempc = a_i_SHEEN;
405 break;
406 case a_SAD:
407 tempc = a_i_SAD;
408 break;
409 case a_DAD:
410 tempc = a_i_DAD;
411 break;
412 case a_TAH:
413 tempc = a_i_TAH;
414 break;
415 case a_ZAH:
416 tempc = a_i_ZAH;
417 break;
418 case a_AIN:
419 tempc = a_i_AIN;
420 break;
421 case a_GHAIN:
422 tempc = a_i_GHAIN;
423 break;
424 case a_FEH:
425 tempc = a_i_FEH;
426 break;
427 case a_QAF:
428 tempc = a_i_QAF;
429 break;
430 case a_KAF:
431 tempc = a_i_KAF;
432 break;
433 case a_LAM:
434 tempc = a_i_LAM;
435 break;
436 case a_MEEM:
437 tempc = a_i_MEEM;
438 break;
439 case a_NOON:
440 tempc = a_i_NOON;
441 break;
442 case a_HEH:
443 tempc = a_i_HEH;
444 break;
445 case a_YEH:
446 tempc = a_i_YEH;
447 break;
448 default:
449 tempc = 0;
450 }
451
452 return tempc;
453}
454
455
456/*
457 * Change shape - from ISO-8859-6/Isolated to Medial
458 */
459 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100460chg_c_a2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461{
462 int tempc;
463
464 switch (cur_c)
465 {
466 case a_HAMZA: /* exception */
467 tempc = a_s_HAMZA;
468 break;
469 case a_ALEF_MADDA: /* exception */
470 tempc = a_f_ALEF_MADDA;
471 break;
472 case a_ALEF_HAMZA_ABOVE: /* exception */
473 tempc = a_f_ALEF_HAMZA_ABOVE;
474 break;
475 case a_WAW_HAMZA: /* exception */
476 tempc = a_f_WAW_HAMZA;
477 break;
478 case a_ALEF_HAMZA_BELOW: /* exception */
479 tempc = a_f_ALEF_HAMZA_BELOW;
480 break;
481 case a_YEH_HAMZA:
482 tempc = a_m_YEH_HAMZA;
483 break;
484 case a_ALEF: /* exception */
485 tempc = a_f_ALEF;
486 break;
487 case a_BEH:
488 tempc = a_m_BEH;
489 break;
490 case a_TEH_MARBUTA: /* exception */
491 tempc = a_f_TEH_MARBUTA;
492 break;
493 case a_TEH:
494 tempc = a_m_TEH;
495 break;
496 case a_THEH:
497 tempc = a_m_THEH;
498 break;
499 case a_JEEM:
500 tempc = a_m_JEEM;
501 break;
502 case a_HAH:
503 tempc = a_m_HAH;
504 break;
505 case a_KHAH:
506 tempc = a_m_KHAH;
507 break;
508 case a_DAL: /* exception */
509 tempc = a_f_DAL;
510 break;
511 case a_THAL: /* exception */
512 tempc = a_f_THAL;
513 break;
514 case a_REH: /* exception */
515 tempc = a_f_REH;
516 break;
517 case a_ZAIN: /* exception */
518 tempc = a_f_ZAIN;
519 break;
520 case a_SEEN:
521 tempc = a_m_SEEN;
522 break;
523 case a_SHEEN:
524 tempc = a_m_SHEEN;
525 break;
526 case a_SAD:
527 tempc = a_m_SAD;
528 break;
529 case a_DAD:
530 tempc = a_m_DAD;
531 break;
532 case a_TAH:
533 tempc = a_m_TAH;
534 break;
535 case a_ZAH:
536 tempc = a_m_ZAH;
537 break;
538 case a_AIN:
539 tempc = a_m_AIN;
540 break;
541 case a_GHAIN:
542 tempc = a_m_GHAIN;
543 break;
544 case a_TATWEEL: /* exception */
545 tempc = cur_c;
546 break;
547 case a_FEH:
548 tempc = a_m_FEH;
549 break;
550 case a_QAF:
551 tempc = a_m_QAF;
552 break;
553 case a_KAF:
554 tempc = a_m_KAF;
555 break;
556 case a_LAM:
557 tempc = a_m_LAM;
558 break;
559 case a_MEEM:
560 tempc = a_m_MEEM;
561 break;
562 case a_NOON:
563 tempc = a_m_NOON;
564 break;
565 case a_HEH:
566 tempc = a_m_HEH;
567 break;
568 case a_WAW: /* exception */
569 tempc = a_f_WAW;
570 break;
571 case a_ALEF_MAKSURA: /* exception */
572 tempc = a_f_ALEF_MAKSURA;
573 break;
574 case a_YEH:
575 tempc = a_m_YEH;
576 break;
577 default:
578 tempc = 0;
579 }
580
581 return tempc;
582}
583
584
585/*
586 * Change shape - from ISO-8859-6/Isolated to final
587 */
588 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100589chg_c_a2f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590{
591 int tempc;
592
593 /* NOTE: these encodings need to be accounted for
594
595 a_f_ALEF_MADDA;
596 a_f_ALEF_HAMZA_ABOVE;
597 a_f_ALEF_HAMZA_BELOW;
598 a_f_LAM_ALEF_MADDA_ABOVE;
599 a_f_LAM_ALEF_HAMZA_ABOVE;
600 a_f_LAM_ALEF_HAMZA_BELOW;
601 */
602
603 switch (cur_c)
604 {
605 case a_HAMZA: /* exception */
606 tempc = a_s_HAMZA;
607 break;
608 case a_ALEF_MADDA:
609 tempc = a_f_ALEF_MADDA;
610 break;
611 case a_ALEF_HAMZA_ABOVE:
612 tempc = a_f_ALEF_HAMZA_ABOVE;
613 break;
614 case a_WAW_HAMZA:
615 tempc = a_f_WAW_HAMZA;
616 break;
617 case a_ALEF_HAMZA_BELOW:
618 tempc = a_f_ALEF_HAMZA_BELOW;
619 break;
620 case a_YEH_HAMZA:
621 tempc = a_f_YEH_HAMZA;
622 break;
623 case a_ALEF:
624 tempc = a_f_ALEF;
625 break;
626 case a_BEH:
627 tempc = a_f_BEH;
628 break;
629 case a_TEH_MARBUTA:
630 tempc = a_f_TEH_MARBUTA;
631 break;
632 case a_TEH:
633 tempc = a_f_TEH;
634 break;
635 case a_THEH:
636 tempc = a_f_THEH;
637 break;
638 case a_JEEM:
639 tempc = a_f_JEEM;
640 break;
641 case a_HAH:
642 tempc = a_f_HAH;
643 break;
644 case a_KHAH:
645 tempc = a_f_KHAH;
646 break;
647 case a_DAL:
648 tempc = a_f_DAL;
649 break;
650 case a_THAL:
651 tempc = a_f_THAL;
652 break;
653 case a_REH:
654 tempc = a_f_REH;
655 break;
656 case a_ZAIN:
657 tempc = a_f_ZAIN;
658 break;
659 case a_SEEN:
660 tempc = a_f_SEEN;
661 break;
662 case a_SHEEN:
663 tempc = a_f_SHEEN;
664 break;
665 case a_SAD:
666 tempc = a_f_SAD;
667 break;
668 case a_DAD:
669 tempc = a_f_DAD;
670 break;
671 case a_TAH:
672 tempc = a_f_TAH;
673 break;
674 case a_ZAH:
675 tempc = a_f_ZAH;
676 break;
677 case a_AIN:
678 tempc = a_f_AIN;
679 break;
680 case a_GHAIN:
681 tempc = a_f_GHAIN;
682 break;
683 case a_TATWEEL: /* exception */
684 tempc = cur_c;
685 break;
686 case a_FEH:
687 tempc = a_f_FEH;
688 break;
689 case a_QAF:
690 tempc = a_f_QAF;
691 break;
692 case a_KAF:
693 tempc = a_f_KAF;
694 break;
695 case a_LAM:
696 tempc = a_f_LAM;
697 break;
698 case a_MEEM:
699 tempc = a_f_MEEM;
700 break;
701 case a_NOON:
702 tempc = a_f_NOON;
703 break;
704 case a_HEH:
705 tempc = a_f_HEH;
706 break;
707 case a_WAW:
708 tempc = a_f_WAW;
709 break;
710 case a_ALEF_MAKSURA:
711 tempc = a_f_ALEF_MAKSURA;
712 break;
713 case a_YEH:
714 tempc = a_f_YEH;
715 break;
716 default:
717 tempc = 0;
718 }
719
720 return tempc;
721}
722
723
724/*
725 * Change shape - from Initial to Medial
726 */
727 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100728chg_c_i2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000729{
730 int tempc;
731
732 switch (cur_c)
733 {
734 case a_i_YEH_HAMZA:
735 tempc = a_m_YEH_HAMZA;
736 break;
737 case a_i_BEH:
738 tempc = a_m_BEH;
739 break;
740 case a_i_TEH:
741 tempc = a_m_TEH;
742 break;
743 case a_i_THEH:
744 tempc = a_m_THEH;
745 break;
746 case a_i_JEEM:
747 tempc = a_m_JEEM;
748 break;
749 case a_i_HAH:
750 tempc = a_m_HAH;
751 break;
752 case a_i_KHAH:
753 tempc = a_m_KHAH;
754 break;
755 case a_i_SEEN:
756 tempc = a_m_SEEN;
757 break;
758 case a_i_SHEEN:
759 tempc = a_m_SHEEN;
760 break;
761 case a_i_SAD:
762 tempc = a_m_SAD;
763 break;
764 case a_i_DAD:
765 tempc = a_m_DAD;
766 break;
767 case a_i_TAH:
768 tempc = a_m_TAH;
769 break;
770 case a_i_ZAH:
771 tempc = a_m_ZAH;
772 break;
773 case a_i_AIN:
774 tempc = a_m_AIN;
775 break;
776 case a_i_GHAIN:
777 tempc = a_m_GHAIN;
778 break;
779 case a_i_FEH:
780 tempc = a_m_FEH;
781 break;
782 case a_i_QAF:
783 tempc = a_m_QAF;
784 break;
785 case a_i_KAF:
786 tempc = a_m_KAF;
787 break;
788 case a_i_LAM:
789 tempc = a_m_LAM;
790 break;
791 case a_i_MEEM:
792 tempc = a_m_MEEM;
793 break;
794 case a_i_NOON:
795 tempc = a_m_NOON;
796 break;
797 case a_i_HEH:
798 tempc = a_m_HEH;
799 break;
800 case a_i_YEH:
801 tempc = a_m_YEH;
802 break;
803 default:
804 tempc = 0;
805 }
806
807 return tempc;
808}
809
810
811/*
812 * Change shape - from Final to Medial
813 */
814 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100815chg_c_f2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000816{
817 int tempc;
818
819 switch (cur_c)
820 {
821 /* NOTE: these encodings are multi-positional, no ?
822 case a_f_ALEF_MADDA:
823 case a_f_ALEF_HAMZA_ABOVE:
824 case a_f_ALEF_HAMZA_BELOW:
825 */
826 case a_f_YEH_HAMZA:
827 tempc = a_m_YEH_HAMZA;
828 break;
829 case a_f_WAW_HAMZA: /* exceptions */
830 case a_f_ALEF:
831 case a_f_TEH_MARBUTA:
832 case a_f_DAL:
833 case a_f_THAL:
834 case a_f_REH:
835 case a_f_ZAIN:
836 case a_f_WAW:
837 case a_f_ALEF_MAKSURA:
838 tempc = cur_c;
839 break;
840 case a_f_BEH:
841 tempc = a_m_BEH;
842 break;
843 case a_f_TEH:
844 tempc = a_m_TEH;
845 break;
846 case a_f_THEH:
847 tempc = a_m_THEH;
848 break;
849 case a_f_JEEM:
850 tempc = a_m_JEEM;
851 break;
852 case a_f_HAH:
853 tempc = a_m_HAH;
854 break;
855 case a_f_KHAH:
856 tempc = a_m_KHAH;
857 break;
858 case a_f_SEEN:
859 tempc = a_m_SEEN;
860 break;
861 case a_f_SHEEN:
862 tempc = a_m_SHEEN;
863 break;
864 case a_f_SAD:
865 tempc = a_m_SAD;
866 break;
867 case a_f_DAD:
868 tempc = a_m_DAD;
869 break;
870 case a_f_TAH:
871 tempc = a_m_TAH;
872 break;
873 case a_f_ZAH:
874 tempc = a_m_ZAH;
875 break;
876 case a_f_AIN:
877 tempc = a_m_AIN;
878 break;
879 case a_f_GHAIN:
880 tempc = a_m_GHAIN;
881 break;
882 case a_f_FEH:
883 tempc = a_m_FEH;
884 break;
885 case a_f_QAF:
886 tempc = a_m_QAF;
887 break;
888 case a_f_KAF:
889 tempc = a_m_KAF;
890 break;
891 case a_f_LAM:
892 tempc = a_m_LAM;
893 break;
894 case a_f_MEEM:
895 tempc = a_m_MEEM;
896 break;
897 case a_f_NOON:
898 tempc = a_m_NOON;
899 break;
900 case a_f_HEH:
901 tempc = a_m_HEH;
902 break;
903 case a_f_YEH:
904 tempc = a_m_YEH;
905 break;
906 /* NOTE: these encodings are multi-positional, no ?
907 case a_f_LAM_ALEF_MADDA_ABOVE:
908 case a_f_LAM_ALEF_HAMZA_ABOVE:
909 case a_f_LAM_ALEF_HAMZA_BELOW:
910 case a_f_LAM_ALEF:
911 */
912 default:
913 tempc = 0;
914 }
915
916 return tempc;
917}
918
919
920/*
921 * Change shape - from Combination (2 char) to an Isolated
922 */
923 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100924chg_c_laa2i(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000925{
926 int tempc;
927
928 switch (hid_c)
929 {
930 case a_ALEF_MADDA:
931 tempc = a_s_LAM_ALEF_MADDA_ABOVE;
932 break;
933 case a_ALEF_HAMZA_ABOVE:
934 tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
935 break;
936 case a_ALEF_HAMZA_BELOW:
937 tempc = a_s_LAM_ALEF_HAMZA_BELOW;
938 break;
939 case a_ALEF:
940 tempc = a_s_LAM_ALEF;
941 break;
942 default:
943 tempc = 0;
944 }
945
946 return tempc;
947}
948
949
950/*
951 * Change shape - from Combination-Isolated to Final
952 */
953 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100954chg_c_laa2f(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000955{
956 int tempc;
957
958 switch (hid_c)
959 {
960 case a_ALEF_MADDA:
961 tempc = a_f_LAM_ALEF_MADDA_ABOVE;
962 break;
963 case a_ALEF_HAMZA_ABOVE:
964 tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
965 break;
966 case a_ALEF_HAMZA_BELOW:
967 tempc = a_f_LAM_ALEF_HAMZA_BELOW;
968 break;
969 case a_ALEF:
970 tempc = a_f_LAM_ALEF;
971 break;
972 default:
973 tempc = 0;
974 }
975
976 return tempc;
977}
978
979/*
980 * Do "half-shaping" on character "c". Return zero if no shaping.
981 */
982 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100983half_shape(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000984{
985 if (A_is_a(c))
986 return chg_c_a2i(c);
987 if (A_is_valid(c) && A_is_f(c))
988 return chg_c_f2m(c);
989 return 0;
990}
991
992/*
993 * Do Arabic shaping on character "c". Returns the shaped character.
994 * out: "ccp" points to the first byte of the character to be shaped.
995 * in/out: "c1p" points to the first composing char for "c".
996 * in: "prev_c" is the previous character (not shaped)
997 * in: "prev_c1" is the first composing char for the previous char
998 * (not shaped)
999 * in: "next_c" is the next character (not shaped).
1000 */
1001 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001002arabic_shape(
1003 int c,
1004 int *ccp,
1005 int *c1p,
1006 int prev_c,
1007 int prev_c1,
1008 int next_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001009{
1010 int curr_c;
1011 int shape_c;
1012 int curr_laa;
1013 int prev_laa;
1014
1015 /* Deal only with Arabic character, pass back all others */
1016 if (!A_is_ok(c))
1017 return c;
1018
1019 /* half-shape current and previous character */
1020 shape_c = half_shape(prev_c);
1021
1022 /* Save away current character */
1023 curr_c = c;
1024
1025 curr_laa = A_firstc_laa(c, *c1p);
1026 prev_laa = A_firstc_laa(prev_c, prev_c1);
1027
1028 if (curr_laa)
1029 {
1030 if (A_is_valid(prev_c) && !A_is_f(shape_c)
1031 && !A_is_s(shape_c) && !prev_laa)
1032 curr_c = chg_c_laa2f(curr_laa);
1033 else
1034 curr_c = chg_c_laa2i(curr_laa);
1035
1036 /* Remove the composing character */
1037 *c1p = 0;
1038 }
1039 else if (!A_is_valid(prev_c) && A_is_valid(next_c))
1040 curr_c = chg_c_a2i(c);
1041 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
1042 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
1043 else if (A_is_valid(next_c))
1044 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
1045 else if (A_is_valid(prev_c))
1046 curr_c = chg_c_a2f(c);
1047 else
1048 curr_c = chg_c_a2s(c);
1049
1050 /* Sanity check -- curr_c should, in the future, never be 0.
1051 * We should, in the future, insert a fatal error here. */
1052 if (curr_c == NUL)
1053 curr_c = c;
1054
1055 if (curr_c != c && ccp != NULL)
1056 {
Bram Moolenaar9a920d82012-06-01 15:21:02 +02001057 char_u buf[MB_MAXBYTES + 1];
Bram Moolenaar071d4272004-06-13 20:20:40 +00001058
1059 /* Update the first byte of the character. */
1060 (*mb_char2bytes)(curr_c, buf);
1061 *ccp = buf[0];
1062 }
1063
1064 /* Return the shaped character */
1065 return curr_c;
1066}
1067
1068
1069/*
1070 * A_firstc_laa returns first character of LAA combination if it exists
1071 */
1072 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001073A_firstc_laa(
1074 int c, /* base character */
1075 int c1) /* first composing character */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076{
1077 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
1078 return c1;
1079 return 0;
1080}
1081
1082
1083/*
1084 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
1085 * (harakat/tanween)
1086 */
1087 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001088A_is_harakat(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001089{
1090 return (c >= a_FATHATAN && c <= a_SUKUN);
1091}
1092
1093
1094/*
1095 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
1096 * (alphabet/number/punctuation)
1097 */
1098 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001099A_is_iso(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100{
1101 return ((c >= a_HAMZA && c <= a_GHAIN)
1102 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
1103 || c == a_MINI_ALEF);
1104}
1105
1106
1107/*
1108 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
1109 * (alphabet/number/punctuation)
1110 */
1111 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001112A_is_formb(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001113{
1114 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
1115 || c == a_s_KASRATAN
1116 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
1117 || c == a_BYTE_ORDER_MARK);
1118}
1119
1120
1121/*
1122 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1123 */
1124 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001125A_is_ok(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001126{
1127 return (A_is_iso(c) || A_is_formb(c));
1128}
1129
1130
1131/*
1132 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1133 * with some exceptions/exclusions
1134 */
1135 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001136A_is_valid(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001137{
1138 return (A_is_ok(c) && !A_is_special(c));
1139}
1140
1141
1142/*
1143 * A_is_special returns TRUE if 'c' is not a special Arabic character.
1144 * Specials don't adhere to most of the rules.
1145 */
1146 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001147A_is_special(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001148{
1149 return (c == a_HAMZA || c == a_s_HAMZA);
1150}