blob: 818aae61da47e156a035fc1ea1401b3baed6d57e [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * arabic.c: functions for Arabic language
12 *
Bram Moolenaar071d4272004-06-13 20:20:40 +000013 * Author: Nadim Shaikli & Isam Bayazidi
Bram Moolenaar071d4272004-06-13 20:20:40 +000014 */
15
Bram Moolenaar75464dc2016-07-02 20:27:50 +020016#include "vim.h"
17
18#if defined(FEAT_ARABIC) || defined(PROTO)
19
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010020static int A_is_a(int cur_c);
21static int A_is_s(int cur_c);
22static int A_is_f(int cur_c);
23static int chg_c_a2s(int cur_c);
24static int chg_c_a2i(int cur_c);
25static int chg_c_a2m(int cur_c);
26static int chg_c_a2f(int cur_c);
27static int chg_c_i2m(int cur_c);
28static int chg_c_f2m(int cur_c);
29static int chg_c_laa2i(int hid_c);
30static int chg_c_laa2f(int hid_c);
31static int half_shape(int c);
32static int A_firstc_laa(int c1, int c);
33static int A_is_harakat(int c);
34static int A_is_iso(int c);
35static int A_is_formb(int c);
36static int A_is_ok(int c);
37static int A_is_valid(int c);
38static int A_is_special(int c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000039
40
41/*
42 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
43 */
44 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010045A_is_a(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000046{
47 switch (cur_c)
48 {
49 case a_HAMZA:
50 case a_ALEF_MADDA:
51 case a_ALEF_HAMZA_ABOVE:
52 case a_WAW_HAMZA:
53 case a_ALEF_HAMZA_BELOW:
54 case a_YEH_HAMZA:
55 case a_ALEF:
56 case a_BEH:
57 case a_TEH_MARBUTA:
58 case a_TEH:
59 case a_THEH:
60 case a_JEEM:
61 case a_HAH:
62 case a_KHAH:
63 case a_DAL:
64 case a_THAL:
65 case a_REH:
66 case a_ZAIN:
67 case a_SEEN:
68 case a_SHEEN:
69 case a_SAD:
70 case a_DAD:
71 case a_TAH:
72 case a_ZAH:
73 case a_AIN:
74 case a_GHAIN:
75 case a_TATWEEL:
76 case a_FEH:
77 case a_QAF:
78 case a_KAF:
79 case a_LAM:
80 case a_MEEM:
81 case a_NOON:
82 case a_HEH:
83 case a_WAW:
84 case a_ALEF_MAKSURA:
85 case a_YEH:
86 return TRUE;
87 }
88
89 return FALSE;
90}
91
92
93/*
94 * Returns True if c is an Isolated Form-B ARABIC letter
95 */
96 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010097A_is_s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000098{
99 switch (cur_c)
100 {
101 case a_s_HAMZA:
102 case a_s_ALEF_MADDA:
103 case a_s_ALEF_HAMZA_ABOVE:
104 case a_s_WAW_HAMZA:
105 case a_s_ALEF_HAMZA_BELOW:
106 case a_s_YEH_HAMZA:
107 case a_s_ALEF:
108 case a_s_BEH:
109 case a_s_TEH_MARBUTA:
110 case a_s_TEH:
111 case a_s_THEH:
112 case a_s_JEEM:
113 case a_s_HAH:
114 case a_s_KHAH:
115 case a_s_DAL:
116 case a_s_THAL:
117 case a_s_REH:
118 case a_s_ZAIN:
119 case a_s_SEEN:
120 case a_s_SHEEN:
121 case a_s_SAD:
122 case a_s_DAD:
123 case a_s_TAH:
124 case a_s_ZAH:
125 case a_s_AIN:
126 case a_s_GHAIN:
127 case a_s_FEH:
128 case a_s_QAF:
129 case a_s_KAF:
130 case a_s_LAM:
131 case a_s_MEEM:
132 case a_s_NOON:
133 case a_s_HEH:
134 case a_s_WAW:
135 case a_s_ALEF_MAKSURA:
136 case a_s_YEH:
137 return TRUE;
138 }
139
140 return FALSE;
141}
142
143
144/*
145 * Returns True if c is a Final shape of an ARABIC letter
146 */
147 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100148A_is_f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000149{
150 switch (cur_c)
151 {
152 case a_f_ALEF_MADDA:
153 case a_f_ALEF_HAMZA_ABOVE:
154 case a_f_WAW_HAMZA:
155 case a_f_ALEF_HAMZA_BELOW:
156 case a_f_YEH_HAMZA:
157 case a_f_ALEF:
158 case a_f_BEH:
159 case a_f_TEH_MARBUTA:
160 case a_f_TEH:
161 case a_f_THEH:
162 case a_f_JEEM:
163 case a_f_HAH:
164 case a_f_KHAH:
165 case a_f_DAL:
166 case a_f_THAL:
167 case a_f_REH:
168 case a_f_ZAIN:
169 case a_f_SEEN:
170 case a_f_SHEEN:
171 case a_f_SAD:
172 case a_f_DAD:
173 case a_f_TAH:
174 case a_f_ZAH:
175 case a_f_AIN:
176 case a_f_GHAIN:
177 case a_f_FEH:
178 case a_f_QAF:
179 case a_f_KAF:
180 case a_f_LAM:
181 case a_f_MEEM:
182 case a_f_NOON:
183 case a_f_HEH:
184 case a_f_WAW:
185 case a_f_ALEF_MAKSURA:
186 case a_f_YEH:
187 case a_f_LAM_ALEF_MADDA_ABOVE:
188 case a_f_LAM_ALEF_HAMZA_ABOVE:
189 case a_f_LAM_ALEF_HAMZA_BELOW:
190 case a_f_LAM_ALEF:
191 return TRUE;
192 }
193 return FALSE;
194}
195
196
197/*
198 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
199 */
200 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100201chg_c_a2s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000202{
203 int tempc;
204
205 switch (cur_c)
206 {
207 case a_HAMZA:
208 tempc = a_s_HAMZA;
209 break;
210 case a_ALEF_MADDA:
211 tempc = a_s_ALEF_MADDA;
212 break;
213 case a_ALEF_HAMZA_ABOVE:
214 tempc = a_s_ALEF_HAMZA_ABOVE;
215 break;
216 case a_WAW_HAMZA:
217 tempc = a_s_WAW_HAMZA;
218 break;
219 case a_ALEF_HAMZA_BELOW:
220 tempc = a_s_ALEF_HAMZA_BELOW;
221 break;
222 case a_YEH_HAMZA:
223 tempc = a_s_YEH_HAMZA;
224 break;
225 case a_ALEF:
226 tempc = a_s_ALEF;
227 break;
228 case a_TEH_MARBUTA:
229 tempc = a_s_TEH_MARBUTA;
230 break;
231 case a_DAL:
232 tempc = a_s_DAL;
233 break;
234 case a_THAL:
235 tempc = a_s_THAL;
236 break;
237 case a_REH:
238 tempc = a_s_REH;
239 break;
240 case a_ZAIN:
241 tempc = a_s_ZAIN;
242 break;
243 case a_TATWEEL: /* exceptions */
244 tempc = cur_c;
245 break;
246 case a_WAW:
247 tempc = a_s_WAW;
248 break;
249 case a_ALEF_MAKSURA:
250 tempc = a_s_ALEF_MAKSURA;
251 break;
252 case a_BEH:
253 tempc = a_s_BEH;
254 break;
255 case a_TEH:
256 tempc = a_s_TEH;
257 break;
258 case a_THEH:
259 tempc = a_s_THEH;
260 break;
261 case a_JEEM:
262 tempc = a_s_JEEM;
263 break;
264 case a_HAH:
265 tempc = a_s_HAH;
266 break;
267 case a_KHAH:
268 tempc = a_s_KHAH;
269 break;
270 case a_SEEN:
271 tempc = a_s_SEEN;
272 break;
273 case a_SHEEN:
274 tempc = a_s_SHEEN;
275 break;
276 case a_SAD:
277 tempc = a_s_SAD;
278 break;
279 case a_DAD:
280 tempc = a_s_DAD;
281 break;
282 case a_TAH:
283 tempc = a_s_TAH;
284 break;
285 case a_ZAH:
286 tempc = a_s_ZAH;
287 break;
288 case a_AIN:
289 tempc = a_s_AIN;
290 break;
291 case a_GHAIN:
292 tempc = a_s_GHAIN;
293 break;
294 case a_FEH:
295 tempc = a_s_FEH;
296 break;
297 case a_QAF:
298 tempc = a_s_QAF;
299 break;
300 case a_KAF:
301 tempc = a_s_KAF;
302 break;
303 case a_LAM:
304 tempc = a_s_LAM;
305 break;
306 case a_MEEM:
307 tempc = a_s_MEEM;
308 break;
309 case a_NOON:
310 tempc = a_s_NOON;
311 break;
312 case a_HEH:
313 tempc = a_s_HEH;
314 break;
315 case a_YEH:
316 tempc = a_s_YEH;
317 break;
318 default:
319 tempc = 0;
320 }
321
322 return tempc;
323}
324
325
326/*
327 * Change shape - from ISO-8859-6/Isolated to Initial
328 */
329 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100330chg_c_a2i(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000331{
332 int tempc;
333
334 switch (cur_c)
335 {
336 case a_YEH_HAMZA:
337 tempc = a_i_YEH_HAMZA;
338 break;
339 case a_HAMZA: /* exceptions */
340 tempc = a_s_HAMZA;
341 break;
342 case a_ALEF_MADDA: /* exceptions */
343 tempc = a_s_ALEF_MADDA;
344 break;
345 case a_ALEF_HAMZA_ABOVE: /* exceptions */
346 tempc = a_s_ALEF_HAMZA_ABOVE;
347 break;
348 case a_WAW_HAMZA: /* exceptions */
349 tempc = a_s_WAW_HAMZA;
350 break;
351 case a_ALEF_HAMZA_BELOW: /* exceptions */
352 tempc = a_s_ALEF_HAMZA_BELOW;
353 break;
354 case a_ALEF: /* exceptions */
355 tempc = a_s_ALEF;
356 break;
357 case a_TEH_MARBUTA: /* exceptions */
358 tempc = a_s_TEH_MARBUTA;
359 break;
360 case a_DAL: /* exceptions */
361 tempc = a_s_DAL;
362 break;
363 case a_THAL: /* exceptions */
364 tempc = a_s_THAL;
365 break;
366 case a_REH: /* exceptions */
367 tempc = a_s_REH;
368 break;
369 case a_ZAIN: /* exceptions */
370 tempc = a_s_ZAIN;
371 break;
372 case a_TATWEEL: /* exceptions */
373 tempc = cur_c;
374 break;
375 case a_WAW: /* exceptions */
376 tempc = a_s_WAW;
377 break;
378 case a_ALEF_MAKSURA: /* exceptions */
379 tempc = a_s_ALEF_MAKSURA;
380 break;
381 case a_BEH:
382 tempc = a_i_BEH;
383 break;
384 case a_TEH:
385 tempc = a_i_TEH;
386 break;
387 case a_THEH:
388 tempc = a_i_THEH;
389 break;
390 case a_JEEM:
391 tempc = a_i_JEEM;
392 break;
393 case a_HAH:
394 tempc = a_i_HAH;
395 break;
396 case a_KHAH:
397 tempc = a_i_KHAH;
398 break;
399 case a_SEEN:
400 tempc = a_i_SEEN;
401 break;
402 case a_SHEEN:
403 tempc = a_i_SHEEN;
404 break;
405 case a_SAD:
406 tempc = a_i_SAD;
407 break;
408 case a_DAD:
409 tempc = a_i_DAD;
410 break;
411 case a_TAH:
412 tempc = a_i_TAH;
413 break;
414 case a_ZAH:
415 tempc = a_i_ZAH;
416 break;
417 case a_AIN:
418 tempc = a_i_AIN;
419 break;
420 case a_GHAIN:
421 tempc = a_i_GHAIN;
422 break;
423 case a_FEH:
424 tempc = a_i_FEH;
425 break;
426 case a_QAF:
427 tempc = a_i_QAF;
428 break;
429 case a_KAF:
430 tempc = a_i_KAF;
431 break;
432 case a_LAM:
433 tempc = a_i_LAM;
434 break;
435 case a_MEEM:
436 tempc = a_i_MEEM;
437 break;
438 case a_NOON:
439 tempc = a_i_NOON;
440 break;
441 case a_HEH:
442 tempc = a_i_HEH;
443 break;
444 case a_YEH:
445 tempc = a_i_YEH;
446 break;
447 default:
448 tempc = 0;
449 }
450
451 return tempc;
452}
453
454
455/*
456 * Change shape - from ISO-8859-6/Isolated to Medial
457 */
458 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100459chg_c_a2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460{
461 int tempc;
462
463 switch (cur_c)
464 {
465 case a_HAMZA: /* exception */
466 tempc = a_s_HAMZA;
467 break;
468 case a_ALEF_MADDA: /* exception */
469 tempc = a_f_ALEF_MADDA;
470 break;
471 case a_ALEF_HAMZA_ABOVE: /* exception */
472 tempc = a_f_ALEF_HAMZA_ABOVE;
473 break;
474 case a_WAW_HAMZA: /* exception */
475 tempc = a_f_WAW_HAMZA;
476 break;
477 case a_ALEF_HAMZA_BELOW: /* exception */
478 tempc = a_f_ALEF_HAMZA_BELOW;
479 break;
480 case a_YEH_HAMZA:
481 tempc = a_m_YEH_HAMZA;
482 break;
483 case a_ALEF: /* exception */
484 tempc = a_f_ALEF;
485 break;
486 case a_BEH:
487 tempc = a_m_BEH;
488 break;
489 case a_TEH_MARBUTA: /* exception */
490 tempc = a_f_TEH_MARBUTA;
491 break;
492 case a_TEH:
493 tempc = a_m_TEH;
494 break;
495 case a_THEH:
496 tempc = a_m_THEH;
497 break;
498 case a_JEEM:
499 tempc = a_m_JEEM;
500 break;
501 case a_HAH:
502 tempc = a_m_HAH;
503 break;
504 case a_KHAH:
505 tempc = a_m_KHAH;
506 break;
507 case a_DAL: /* exception */
508 tempc = a_f_DAL;
509 break;
510 case a_THAL: /* exception */
511 tempc = a_f_THAL;
512 break;
513 case a_REH: /* exception */
514 tempc = a_f_REH;
515 break;
516 case a_ZAIN: /* exception */
517 tempc = a_f_ZAIN;
518 break;
519 case a_SEEN:
520 tempc = a_m_SEEN;
521 break;
522 case a_SHEEN:
523 tempc = a_m_SHEEN;
524 break;
525 case a_SAD:
526 tempc = a_m_SAD;
527 break;
528 case a_DAD:
529 tempc = a_m_DAD;
530 break;
531 case a_TAH:
532 tempc = a_m_TAH;
533 break;
534 case a_ZAH:
535 tempc = a_m_ZAH;
536 break;
537 case a_AIN:
538 tempc = a_m_AIN;
539 break;
540 case a_GHAIN:
541 tempc = a_m_GHAIN;
542 break;
543 case a_TATWEEL: /* exception */
544 tempc = cur_c;
545 break;
546 case a_FEH:
547 tempc = a_m_FEH;
548 break;
549 case a_QAF:
550 tempc = a_m_QAF;
551 break;
552 case a_KAF:
553 tempc = a_m_KAF;
554 break;
555 case a_LAM:
556 tempc = a_m_LAM;
557 break;
558 case a_MEEM:
559 tempc = a_m_MEEM;
560 break;
561 case a_NOON:
562 tempc = a_m_NOON;
563 break;
564 case a_HEH:
565 tempc = a_m_HEH;
566 break;
567 case a_WAW: /* exception */
568 tempc = a_f_WAW;
569 break;
570 case a_ALEF_MAKSURA: /* exception */
571 tempc = a_f_ALEF_MAKSURA;
572 break;
573 case a_YEH:
574 tempc = a_m_YEH;
575 break;
576 default:
577 tempc = 0;
578 }
579
580 return tempc;
581}
582
583
584/*
585 * Change shape - from ISO-8859-6/Isolated to final
586 */
587 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100588chg_c_a2f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589{
590 int tempc;
591
592 /* NOTE: these encodings need to be accounted for
593
594 a_f_ALEF_MADDA;
595 a_f_ALEF_HAMZA_ABOVE;
596 a_f_ALEF_HAMZA_BELOW;
597 a_f_LAM_ALEF_MADDA_ABOVE;
598 a_f_LAM_ALEF_HAMZA_ABOVE;
599 a_f_LAM_ALEF_HAMZA_BELOW;
600 */
601
602 switch (cur_c)
603 {
604 case a_HAMZA: /* exception */
605 tempc = a_s_HAMZA;
606 break;
607 case a_ALEF_MADDA:
608 tempc = a_f_ALEF_MADDA;
609 break;
610 case a_ALEF_HAMZA_ABOVE:
611 tempc = a_f_ALEF_HAMZA_ABOVE;
612 break;
613 case a_WAW_HAMZA:
614 tempc = a_f_WAW_HAMZA;
615 break;
616 case a_ALEF_HAMZA_BELOW:
617 tempc = a_f_ALEF_HAMZA_BELOW;
618 break;
619 case a_YEH_HAMZA:
620 tempc = a_f_YEH_HAMZA;
621 break;
622 case a_ALEF:
623 tempc = a_f_ALEF;
624 break;
625 case a_BEH:
626 tempc = a_f_BEH;
627 break;
628 case a_TEH_MARBUTA:
629 tempc = a_f_TEH_MARBUTA;
630 break;
631 case a_TEH:
632 tempc = a_f_TEH;
633 break;
634 case a_THEH:
635 tempc = a_f_THEH;
636 break;
637 case a_JEEM:
638 tempc = a_f_JEEM;
639 break;
640 case a_HAH:
641 tempc = a_f_HAH;
642 break;
643 case a_KHAH:
644 tempc = a_f_KHAH;
645 break;
646 case a_DAL:
647 tempc = a_f_DAL;
648 break;
649 case a_THAL:
650 tempc = a_f_THAL;
651 break;
652 case a_REH:
653 tempc = a_f_REH;
654 break;
655 case a_ZAIN:
656 tempc = a_f_ZAIN;
657 break;
658 case a_SEEN:
659 tempc = a_f_SEEN;
660 break;
661 case a_SHEEN:
662 tempc = a_f_SHEEN;
663 break;
664 case a_SAD:
665 tempc = a_f_SAD;
666 break;
667 case a_DAD:
668 tempc = a_f_DAD;
669 break;
670 case a_TAH:
671 tempc = a_f_TAH;
672 break;
673 case a_ZAH:
674 tempc = a_f_ZAH;
675 break;
676 case a_AIN:
677 tempc = a_f_AIN;
678 break;
679 case a_GHAIN:
680 tempc = a_f_GHAIN;
681 break;
682 case a_TATWEEL: /* exception */
683 tempc = cur_c;
684 break;
685 case a_FEH:
686 tempc = a_f_FEH;
687 break;
688 case a_QAF:
689 tempc = a_f_QAF;
690 break;
691 case a_KAF:
692 tempc = a_f_KAF;
693 break;
694 case a_LAM:
695 tempc = a_f_LAM;
696 break;
697 case a_MEEM:
698 tempc = a_f_MEEM;
699 break;
700 case a_NOON:
701 tempc = a_f_NOON;
702 break;
703 case a_HEH:
704 tempc = a_f_HEH;
705 break;
706 case a_WAW:
707 tempc = a_f_WAW;
708 break;
709 case a_ALEF_MAKSURA:
710 tempc = a_f_ALEF_MAKSURA;
711 break;
712 case a_YEH:
713 tempc = a_f_YEH;
714 break;
715 default:
716 tempc = 0;
717 }
718
719 return tempc;
720}
721
722
723/*
724 * Change shape - from Initial to Medial
725 */
726 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100727chg_c_i2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000728{
729 int tempc;
730
731 switch (cur_c)
732 {
733 case a_i_YEH_HAMZA:
734 tempc = a_m_YEH_HAMZA;
735 break;
736 case a_i_BEH:
737 tempc = a_m_BEH;
738 break;
739 case a_i_TEH:
740 tempc = a_m_TEH;
741 break;
742 case a_i_THEH:
743 tempc = a_m_THEH;
744 break;
745 case a_i_JEEM:
746 tempc = a_m_JEEM;
747 break;
748 case a_i_HAH:
749 tempc = a_m_HAH;
750 break;
751 case a_i_KHAH:
752 tempc = a_m_KHAH;
753 break;
754 case a_i_SEEN:
755 tempc = a_m_SEEN;
756 break;
757 case a_i_SHEEN:
758 tempc = a_m_SHEEN;
759 break;
760 case a_i_SAD:
761 tempc = a_m_SAD;
762 break;
763 case a_i_DAD:
764 tempc = a_m_DAD;
765 break;
766 case a_i_TAH:
767 tempc = a_m_TAH;
768 break;
769 case a_i_ZAH:
770 tempc = a_m_ZAH;
771 break;
772 case a_i_AIN:
773 tempc = a_m_AIN;
774 break;
775 case a_i_GHAIN:
776 tempc = a_m_GHAIN;
777 break;
778 case a_i_FEH:
779 tempc = a_m_FEH;
780 break;
781 case a_i_QAF:
782 tempc = a_m_QAF;
783 break;
784 case a_i_KAF:
785 tempc = a_m_KAF;
786 break;
787 case a_i_LAM:
788 tempc = a_m_LAM;
789 break;
790 case a_i_MEEM:
791 tempc = a_m_MEEM;
792 break;
793 case a_i_NOON:
794 tempc = a_m_NOON;
795 break;
796 case a_i_HEH:
797 tempc = a_m_HEH;
798 break;
799 case a_i_YEH:
800 tempc = a_m_YEH;
801 break;
802 default:
803 tempc = 0;
804 }
805
806 return tempc;
807}
808
809
810/*
811 * Change shape - from Final to Medial
812 */
813 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100814chg_c_f2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815{
816 int tempc;
817
818 switch (cur_c)
819 {
820 /* NOTE: these encodings are multi-positional, no ?
821 case a_f_ALEF_MADDA:
822 case a_f_ALEF_HAMZA_ABOVE:
823 case a_f_ALEF_HAMZA_BELOW:
824 */
825 case a_f_YEH_HAMZA:
826 tempc = a_m_YEH_HAMZA;
827 break;
828 case a_f_WAW_HAMZA: /* exceptions */
829 case a_f_ALEF:
830 case a_f_TEH_MARBUTA:
831 case a_f_DAL:
832 case a_f_THAL:
833 case a_f_REH:
834 case a_f_ZAIN:
835 case a_f_WAW:
836 case a_f_ALEF_MAKSURA:
837 tempc = cur_c;
838 break;
839 case a_f_BEH:
840 tempc = a_m_BEH;
841 break;
842 case a_f_TEH:
843 tempc = a_m_TEH;
844 break;
845 case a_f_THEH:
846 tempc = a_m_THEH;
847 break;
848 case a_f_JEEM:
849 tempc = a_m_JEEM;
850 break;
851 case a_f_HAH:
852 tempc = a_m_HAH;
853 break;
854 case a_f_KHAH:
855 tempc = a_m_KHAH;
856 break;
857 case a_f_SEEN:
858 tempc = a_m_SEEN;
859 break;
860 case a_f_SHEEN:
861 tempc = a_m_SHEEN;
862 break;
863 case a_f_SAD:
864 tempc = a_m_SAD;
865 break;
866 case a_f_DAD:
867 tempc = a_m_DAD;
868 break;
869 case a_f_TAH:
870 tempc = a_m_TAH;
871 break;
872 case a_f_ZAH:
873 tempc = a_m_ZAH;
874 break;
875 case a_f_AIN:
876 tempc = a_m_AIN;
877 break;
878 case a_f_GHAIN:
879 tempc = a_m_GHAIN;
880 break;
881 case a_f_FEH:
882 tempc = a_m_FEH;
883 break;
884 case a_f_QAF:
885 tempc = a_m_QAF;
886 break;
887 case a_f_KAF:
888 tempc = a_m_KAF;
889 break;
890 case a_f_LAM:
891 tempc = a_m_LAM;
892 break;
893 case a_f_MEEM:
894 tempc = a_m_MEEM;
895 break;
896 case a_f_NOON:
897 tempc = a_m_NOON;
898 break;
899 case a_f_HEH:
900 tempc = a_m_HEH;
901 break;
902 case a_f_YEH:
903 tempc = a_m_YEH;
904 break;
905 /* NOTE: these encodings are multi-positional, no ?
906 case a_f_LAM_ALEF_MADDA_ABOVE:
907 case a_f_LAM_ALEF_HAMZA_ABOVE:
908 case a_f_LAM_ALEF_HAMZA_BELOW:
909 case a_f_LAM_ALEF:
910 */
911 default:
912 tempc = 0;
913 }
914
915 return tempc;
916}
917
918
919/*
920 * Change shape - from Combination (2 char) to an Isolated
921 */
922 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100923chg_c_laa2i(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000924{
925 int tempc;
926
927 switch (hid_c)
928 {
929 case a_ALEF_MADDA:
930 tempc = a_s_LAM_ALEF_MADDA_ABOVE;
931 break;
932 case a_ALEF_HAMZA_ABOVE:
933 tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
934 break;
935 case a_ALEF_HAMZA_BELOW:
936 tempc = a_s_LAM_ALEF_HAMZA_BELOW;
937 break;
938 case a_ALEF:
939 tempc = a_s_LAM_ALEF;
940 break;
941 default:
942 tempc = 0;
943 }
944
945 return tempc;
946}
947
948
949/*
950 * Change shape - from Combination-Isolated to Final
951 */
952 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100953chg_c_laa2f(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000954{
955 int tempc;
956
957 switch (hid_c)
958 {
959 case a_ALEF_MADDA:
960 tempc = a_f_LAM_ALEF_MADDA_ABOVE;
961 break;
962 case a_ALEF_HAMZA_ABOVE:
963 tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
964 break;
965 case a_ALEF_HAMZA_BELOW:
966 tempc = a_f_LAM_ALEF_HAMZA_BELOW;
967 break;
968 case a_ALEF:
969 tempc = a_f_LAM_ALEF;
970 break;
971 default:
972 tempc = 0;
973 }
974
975 return tempc;
976}
977
978/*
979 * Do "half-shaping" on character "c". Return zero if no shaping.
980 */
981 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100982half_shape(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000983{
984 if (A_is_a(c))
985 return chg_c_a2i(c);
986 if (A_is_valid(c) && A_is_f(c))
987 return chg_c_f2m(c);
988 return 0;
989}
990
991/*
992 * Do Arabic shaping on character "c". Returns the shaped character.
993 * out: "ccp" points to the first byte of the character to be shaped.
994 * in/out: "c1p" points to the first composing char for "c".
995 * in: "prev_c" is the previous character (not shaped)
996 * in: "prev_c1" is the first composing char for the previous char
997 * (not shaped)
998 * in: "next_c" is the next character (not shaped).
999 */
1000 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001001arabic_shape(
1002 int c,
1003 int *ccp,
1004 int *c1p,
1005 int prev_c,
1006 int prev_c1,
1007 int next_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001008{
1009 int curr_c;
1010 int shape_c;
1011 int curr_laa;
1012 int prev_laa;
1013
1014 /* Deal only with Arabic character, pass back all others */
1015 if (!A_is_ok(c))
1016 return c;
1017
1018 /* half-shape current and previous character */
1019 shape_c = half_shape(prev_c);
1020
1021 /* Save away current character */
1022 curr_c = c;
1023
1024 curr_laa = A_firstc_laa(c, *c1p);
1025 prev_laa = A_firstc_laa(prev_c, prev_c1);
1026
1027 if (curr_laa)
1028 {
1029 if (A_is_valid(prev_c) && !A_is_f(shape_c)
1030 && !A_is_s(shape_c) && !prev_laa)
1031 curr_c = chg_c_laa2f(curr_laa);
1032 else
1033 curr_c = chg_c_laa2i(curr_laa);
1034
1035 /* Remove the composing character */
1036 *c1p = 0;
1037 }
1038 else if (!A_is_valid(prev_c) && A_is_valid(next_c))
1039 curr_c = chg_c_a2i(c);
1040 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
1041 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
1042 else if (A_is_valid(next_c))
1043 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
1044 else if (A_is_valid(prev_c))
1045 curr_c = chg_c_a2f(c);
1046 else
1047 curr_c = chg_c_a2s(c);
1048
1049 /* Sanity check -- curr_c should, in the future, never be 0.
1050 * We should, in the future, insert a fatal error here. */
1051 if (curr_c == NUL)
1052 curr_c = c;
1053
1054 if (curr_c != c && ccp != NULL)
1055 {
Bram Moolenaar9a920d82012-06-01 15:21:02 +02001056 char_u buf[MB_MAXBYTES + 1];
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057
1058 /* Update the first byte of the character. */
1059 (*mb_char2bytes)(curr_c, buf);
1060 *ccp = buf[0];
1061 }
1062
1063 /* Return the shaped character */
1064 return curr_c;
1065}
1066
1067
1068/*
1069 * A_firstc_laa returns first character of LAA combination if it exists
1070 */
1071 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001072A_firstc_laa(
1073 int c, /* base character */
1074 int c1) /* first composing character */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001075{
1076 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
1077 return c1;
1078 return 0;
1079}
1080
1081
1082/*
1083 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
1084 * (harakat/tanween)
1085 */
1086 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001087A_is_harakat(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001088{
1089 return (c >= a_FATHATAN && c <= a_SUKUN);
1090}
1091
1092
1093/*
1094 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
1095 * (alphabet/number/punctuation)
1096 */
1097 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001098A_is_iso(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001099{
1100 return ((c >= a_HAMZA && c <= a_GHAIN)
1101 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
1102 || c == a_MINI_ALEF);
1103}
1104
1105
1106/*
1107 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
1108 * (alphabet/number/punctuation)
1109 */
1110 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001111A_is_formb(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001112{
1113 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
1114 || c == a_s_KASRATAN
1115 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
1116 || c == a_BYTE_ORDER_MARK);
1117}
1118
1119
1120/*
1121 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1122 */
1123 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001124A_is_ok(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001125{
1126 return (A_is_iso(c) || A_is_formb(c));
1127}
1128
1129
1130/*
1131 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1132 * with some exceptions/exclusions
1133 */
1134 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001135A_is_valid(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001136{
1137 return (A_is_ok(c) && !A_is_special(c));
1138}
1139
1140
1141/*
1142 * A_is_special returns TRUE if 'c' is not a special Arabic character.
1143 * Specials don't adhere to most of the rules.
1144 */
1145 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001146A_is_special(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001147{
1148 return (c == a_HAMZA || c == a_s_HAMZA);
1149}
Bram Moolenaar75464dc2016-07-02 20:27:50 +02001150
1151#endif /* FEAT_ARABIC */