blob: 9bc16691967938bc73d85d317db00af7185fa6cd [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * arabic.c: functions for Arabic language
12 *
Bram Moolenaar071d4272004-06-13 20:20:40 +000013 * Author: Nadim Shaikli & Isam Bayazidi
Bram Moolenaar071d4272004-06-13 20:20:40 +000014 */
15
Bram Moolenaar75464dc2016-07-02 20:27:50 +020016#include "vim.h"
17
18#if defined(FEAT_ARABIC) || defined(PROTO)
19
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010020static int A_is_a(int cur_c);
21static int A_is_s(int cur_c);
22static int A_is_f(int cur_c);
23static int chg_c_a2s(int cur_c);
24static int chg_c_a2i(int cur_c);
25static int chg_c_a2m(int cur_c);
26static int chg_c_a2f(int cur_c);
Bram Moolenaar3ff2f092017-03-21 13:22:44 +010027#if 0
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010028static int chg_c_i2m(int cur_c);
Bram Moolenaar3ff2f092017-03-21 13:22:44 +010029#endif
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010030static int chg_c_f2m(int cur_c);
31static int chg_c_laa2i(int hid_c);
32static int chg_c_laa2f(int hid_c);
33static int half_shape(int c);
34static int A_firstc_laa(int c1, int c);
35static int A_is_harakat(int c);
36static int A_is_iso(int c);
37static int A_is_formb(int c);
38static int A_is_ok(int c);
39static int A_is_valid(int c);
40static int A_is_special(int c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000041
42
43/*
44 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
45 */
46 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010047A_is_a(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000048{
49 switch (cur_c)
50 {
51 case a_HAMZA:
52 case a_ALEF_MADDA:
53 case a_ALEF_HAMZA_ABOVE:
54 case a_WAW_HAMZA:
55 case a_ALEF_HAMZA_BELOW:
56 case a_YEH_HAMZA:
57 case a_ALEF:
58 case a_BEH:
59 case a_TEH_MARBUTA:
60 case a_TEH:
61 case a_THEH:
62 case a_JEEM:
63 case a_HAH:
64 case a_KHAH:
65 case a_DAL:
66 case a_THAL:
67 case a_REH:
68 case a_ZAIN:
69 case a_SEEN:
70 case a_SHEEN:
71 case a_SAD:
72 case a_DAD:
73 case a_TAH:
74 case a_ZAH:
75 case a_AIN:
76 case a_GHAIN:
77 case a_TATWEEL:
78 case a_FEH:
79 case a_QAF:
80 case a_KAF:
81 case a_LAM:
82 case a_MEEM:
83 case a_NOON:
84 case a_HEH:
85 case a_WAW:
86 case a_ALEF_MAKSURA:
87 case a_YEH:
88 return TRUE;
89 }
90
91 return FALSE;
92}
93
94
95/*
96 * Returns True if c is an Isolated Form-B ARABIC letter
97 */
98 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +010099A_is_s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000100{
101 switch (cur_c)
102 {
103 case a_s_HAMZA:
104 case a_s_ALEF_MADDA:
105 case a_s_ALEF_HAMZA_ABOVE:
106 case a_s_WAW_HAMZA:
107 case a_s_ALEF_HAMZA_BELOW:
108 case a_s_YEH_HAMZA:
109 case a_s_ALEF:
110 case a_s_BEH:
111 case a_s_TEH_MARBUTA:
112 case a_s_TEH:
113 case a_s_THEH:
114 case a_s_JEEM:
115 case a_s_HAH:
116 case a_s_KHAH:
117 case a_s_DAL:
118 case a_s_THAL:
119 case a_s_REH:
120 case a_s_ZAIN:
121 case a_s_SEEN:
122 case a_s_SHEEN:
123 case a_s_SAD:
124 case a_s_DAD:
125 case a_s_TAH:
126 case a_s_ZAH:
127 case a_s_AIN:
128 case a_s_GHAIN:
129 case a_s_FEH:
130 case a_s_QAF:
131 case a_s_KAF:
132 case a_s_LAM:
133 case a_s_MEEM:
134 case a_s_NOON:
135 case a_s_HEH:
136 case a_s_WAW:
137 case a_s_ALEF_MAKSURA:
138 case a_s_YEH:
139 return TRUE;
140 }
141
142 return FALSE;
143}
144
145
146/*
147 * Returns True if c is a Final shape of an ARABIC letter
148 */
149 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100150A_is_f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000151{
152 switch (cur_c)
153 {
154 case a_f_ALEF_MADDA:
155 case a_f_ALEF_HAMZA_ABOVE:
156 case a_f_WAW_HAMZA:
157 case a_f_ALEF_HAMZA_BELOW:
158 case a_f_YEH_HAMZA:
159 case a_f_ALEF:
160 case a_f_BEH:
161 case a_f_TEH_MARBUTA:
162 case a_f_TEH:
163 case a_f_THEH:
164 case a_f_JEEM:
165 case a_f_HAH:
166 case a_f_KHAH:
167 case a_f_DAL:
168 case a_f_THAL:
169 case a_f_REH:
170 case a_f_ZAIN:
171 case a_f_SEEN:
172 case a_f_SHEEN:
173 case a_f_SAD:
174 case a_f_DAD:
175 case a_f_TAH:
176 case a_f_ZAH:
177 case a_f_AIN:
178 case a_f_GHAIN:
179 case a_f_FEH:
180 case a_f_QAF:
181 case a_f_KAF:
182 case a_f_LAM:
183 case a_f_MEEM:
184 case a_f_NOON:
185 case a_f_HEH:
186 case a_f_WAW:
187 case a_f_ALEF_MAKSURA:
188 case a_f_YEH:
189 case a_f_LAM_ALEF_MADDA_ABOVE:
190 case a_f_LAM_ALEF_HAMZA_ABOVE:
191 case a_f_LAM_ALEF_HAMZA_BELOW:
192 case a_f_LAM_ALEF:
193 return TRUE;
194 }
195 return FALSE;
196}
197
198
199/*
200 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
201 */
202 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100203chg_c_a2s(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000204{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000205 switch (cur_c)
206 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100207 case a_HAMZA: return a_s_HAMZA;
208 case a_ALEF_MADDA: return a_s_ALEF_MADDA;
209 case a_ALEF_HAMZA_ABOVE: return a_s_ALEF_HAMZA_ABOVE;
210 case a_WAW_HAMZA: return a_s_WAW_HAMZA;
211 case a_ALEF_HAMZA_BELOW: return a_s_ALEF_HAMZA_BELOW;
212 case a_YEH_HAMZA: return a_s_YEH_HAMZA;
213 case a_ALEF: return a_s_ALEF;
214 case a_TEH_MARBUTA: return a_s_TEH_MARBUTA;
215 case a_DAL: return a_s_DAL;
216 case a_THAL: return a_s_THAL;
217 case a_REH: return a_s_REH;
218 case a_ZAIN: return a_s_ZAIN;
219 case a_TATWEEL: return cur_c; /* exceptions */
220 case a_WAW: return a_s_WAW;
221 case a_ALEF_MAKSURA: return a_s_ALEF_MAKSURA;
222 case a_BEH: return a_s_BEH;
223 case a_TEH: return a_s_TEH;
224 case a_THEH: return a_s_THEH;
225 case a_JEEM: return a_s_JEEM;
226 case a_HAH: return a_s_HAH;
227 case a_KHAH: return a_s_KHAH;
228 case a_SEEN: return a_s_SEEN;
229 case a_SHEEN: return a_s_SHEEN;
230 case a_SAD: return a_s_SAD;
231 case a_DAD: return a_s_DAD;
232 case a_TAH: return a_s_TAH;
233 case a_ZAH: return a_s_ZAH;
234 case a_AIN: return a_s_AIN;
235 case a_GHAIN: return a_s_GHAIN;
236 case a_FEH: return a_s_FEH;
237 case a_QAF: return a_s_QAF;
238 case a_KAF: return a_s_KAF;
239 case a_LAM: return a_s_LAM;
240 case a_MEEM: return a_s_MEEM;
241 case a_NOON: return a_s_NOON;
242 case a_HEH: return a_s_HEH;
243 case a_YEH: return a_s_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000244 }
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100245 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000246}
247
248
249/*
250 * Change shape - from ISO-8859-6/Isolated to Initial
251 */
252 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100253chg_c_a2i(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 switch (cur_c)
256 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100257 case a_YEH_HAMZA: return a_i_YEH_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 case a_HAMZA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100259 return a_s_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260 case a_ALEF_MADDA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100261 return a_s_ALEF_MADDA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000262 case a_ALEF_HAMZA_ABOVE: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100263 return a_s_ALEF_HAMZA_ABOVE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000264 case a_WAW_HAMZA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100265 return a_s_WAW_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000266 case a_ALEF_HAMZA_BELOW: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100267 return a_s_ALEF_HAMZA_BELOW;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268 case a_ALEF: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100269 return a_s_ALEF;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000270 case a_TEH_MARBUTA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100271 return a_s_TEH_MARBUTA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000272 case a_DAL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100273 return a_s_DAL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000274 case a_THAL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100275 return a_s_THAL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000276 case a_REH: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100277 return a_s_REH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000278 case a_ZAIN: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100279 return a_s_ZAIN;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000280 case a_TATWEEL: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100281 return cur_c;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000282 case a_WAW: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100283 return a_s_WAW;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000284 case a_ALEF_MAKSURA: /* exceptions */
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100285 return a_s_ALEF_MAKSURA;
286 case a_BEH: return a_i_BEH;
287 case a_TEH: return a_i_TEH;
288 case a_THEH: return a_i_THEH;
289 case a_JEEM: return a_i_JEEM;
290 case a_HAH: return a_i_HAH;
291 case a_KHAH: return a_i_KHAH;
292 case a_SEEN: return a_i_SEEN;
293 case a_SHEEN: return a_i_SHEEN;
294 case a_SAD: return a_i_SAD;
295 case a_DAD: return a_i_DAD;
296 case a_TAH: return a_i_TAH;
297 case a_ZAH: return a_i_ZAH;
298 case a_AIN: return a_i_AIN;
299 case a_GHAIN: return a_i_GHAIN;
300 case a_FEH: return a_i_FEH;
301 case a_QAF: return a_i_QAF;
302 case a_KAF: return a_i_KAF;
303 case a_LAM: return a_i_LAM;
304 case a_MEEM: return a_i_MEEM;
305 case a_NOON: return a_i_NOON;
306 case a_HEH: return a_i_HEH;
307 case a_YEH: return a_i_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000308 }
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100309 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000310}
311
312
313/*
314 * Change shape - from ISO-8859-6/Isolated to Medial
315 */
316 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100317chg_c_a2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000318{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000319 switch (cur_c)
320 {
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100321 case a_HAMZA: return a_s_HAMZA; /* exception */
322 case a_ALEF_MADDA: return a_f_ALEF_MADDA; /* exception */
323 case a_ALEF_HAMZA_ABOVE: return a_f_ALEF_HAMZA_ABOVE; /* exception */
324 case a_WAW_HAMZA: return a_f_WAW_HAMZA; /* exception */
325 case a_ALEF_HAMZA_BELOW: return a_f_ALEF_HAMZA_BELOW; /* exception */
326 case a_YEH_HAMZA: return a_m_YEH_HAMZA;
327 case a_ALEF: return a_f_ALEF; /* exception */
328 case a_BEH: return a_m_BEH;
329 case a_TEH_MARBUTA: return a_f_TEH_MARBUTA; /* exception */
330 case a_TEH: return a_m_TEH;
331 case a_THEH: return a_m_THEH;
332 case a_JEEM: return a_m_JEEM;
333 case a_HAH: return a_m_HAH;
334 case a_KHAH: return a_m_KHAH;
335 case a_DAL: return a_f_DAL; /* exception */
336 case a_THAL: return a_f_THAL; /* exception */
337 case a_REH: return a_f_REH; /* exception */
338 case a_ZAIN: return a_f_ZAIN; /* exception */
339 case a_SEEN: return a_m_SEEN;
340 case a_SHEEN: return a_m_SHEEN;
341 case a_SAD: return a_m_SAD;
342 case a_DAD: return a_m_DAD;
343 case a_TAH: return a_m_TAH;
344 case a_ZAH: return a_m_ZAH;
345 case a_AIN: return a_m_AIN;
346 case a_GHAIN: return a_m_GHAIN;
347 case a_TATWEEL: return cur_c; /* exception */
348 case a_FEH: return a_m_FEH;
349 case a_QAF: return a_m_QAF;
350 case a_KAF: return a_m_KAF;
351 case a_LAM: return a_m_LAM;
352 case a_MEEM: return a_m_MEEM;
353 case a_NOON: return a_m_NOON;
354 case a_HEH: return a_m_HEH;
355 case a_WAW: return a_f_WAW; /* exception */
356 case a_ALEF_MAKSURA: return a_f_ALEF_MAKSURA; /* exception */
357 case a_YEH: return a_m_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000358 }
Bram Moolenaar5f53dd32017-03-01 14:02:30 +0100359 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000360}
361
362
363/*
364 * Change shape - from ISO-8859-6/Isolated to final
365 */
366 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100367chg_c_a2f(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000369 /* NOTE: these encodings need to be accounted for
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100370 * a_f_ALEF_MADDA;
371 * a_f_ALEF_HAMZA_ABOVE;
372 * a_f_ALEF_HAMZA_BELOW;
373 * a_f_LAM_ALEF_MADDA_ABOVE;
374 * a_f_LAM_ALEF_HAMZA_ABOVE;
375 * a_f_LAM_ALEF_HAMZA_BELOW;
376 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000377 switch (cur_c)
378 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100379 case a_HAMZA: return a_s_HAMZA; /* exception */
380 case a_ALEF_MADDA: return a_f_ALEF_MADDA;
381 case a_ALEF_HAMZA_ABOVE: return a_f_ALEF_HAMZA_ABOVE;
382 case a_WAW_HAMZA: return a_f_WAW_HAMZA;
383 case a_ALEF_HAMZA_BELOW: return a_f_ALEF_HAMZA_BELOW;
384 case a_YEH_HAMZA: return a_f_YEH_HAMZA;
385 case a_ALEF: return a_f_ALEF;
386 case a_BEH: return a_f_BEH;
387 case a_TEH_MARBUTA: return a_f_TEH_MARBUTA;
388 case a_TEH: return a_f_TEH;
389 case a_THEH: return a_f_THEH;
390 case a_JEEM: return a_f_JEEM;
391 case a_HAH: return a_f_HAH;
392 case a_KHAH: return a_f_KHAH;
393 case a_DAL: return a_f_DAL;
394 case a_THAL: return a_f_THAL;
395 case a_REH: return a_f_REH;
396 case a_ZAIN: return a_f_ZAIN;
397 case a_SEEN: return a_f_SEEN;
398 case a_SHEEN: return a_f_SHEEN;
399 case a_SAD: return a_f_SAD;
400 case a_DAD: return a_f_DAD;
401 case a_TAH: return a_f_TAH;
402 case a_ZAH: return a_f_ZAH;
403 case a_AIN: return a_f_AIN;
404 case a_GHAIN: return a_f_GHAIN;
405 case a_TATWEEL: return cur_c; /* exception */
406 case a_FEH: return a_f_FEH;
407 case a_QAF: return a_f_QAF;
408 case a_KAF: return a_f_KAF;
409 case a_LAM: return a_f_LAM;
410 case a_MEEM: return a_f_MEEM;
411 case a_NOON: return a_f_NOON;
412 case a_HEH: return a_f_HEH;
413 case a_WAW: return a_f_WAW;
414 case a_ALEF_MAKSURA: return a_f_ALEF_MAKSURA;
415 case a_YEH: return a_f_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000416 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100417 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000418}
419
420
421/*
422 * Change shape - from Initial to Medial
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100423 * This code is unreachable, because for the relevant characters ARABIC_CHAR()
424 * is FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425 */
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100426#if 0
Bram Moolenaar071d4272004-06-13 20:20:40 +0000427 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100428chg_c_i2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000429{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000430 switch (cur_c)
431 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100432 case a_i_YEH_HAMZA: return a_m_YEH_HAMZA;
433 case a_i_BEH: return a_m_BEH;
434 case a_i_TEH: return a_m_TEH;
435 case a_i_THEH: return a_m_THEH;
436 case a_i_JEEM: return a_m_JEEM;
437 case a_i_HAH: return a_m_HAH;
438 case a_i_KHAH: return a_m_KHAH;
439 case a_i_SEEN: return a_m_SEEN;
440 case a_i_SHEEN: return a_m_SHEEN;
441 case a_i_SAD: return a_m_SAD;
442 case a_i_DAD: return a_m_DAD;
443 case a_i_TAH: return a_m_TAH;
444 case a_i_ZAH: return a_m_ZAH;
445 case a_i_AIN: return a_m_AIN;
446 case a_i_GHAIN: return a_m_GHAIN;
447 case a_i_FEH: return a_m_FEH;
448 case a_i_QAF: return a_m_QAF;
449 case a_i_KAF: return a_m_KAF;
450 case a_i_LAM: return a_m_LAM;
451 case a_i_MEEM: return a_m_MEEM;
452 case a_i_NOON: return a_m_NOON;
453 case a_i_HEH: return a_m_HEH;
454 case a_i_YEH: return a_m_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100456 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000457}
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100458#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000459
460
461/*
462 * Change shape - from Final to Medial
463 */
464 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100465chg_c_f2m(int cur_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000466{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000467 switch (cur_c)
468 {
469 /* NOTE: these encodings are multi-positional, no ?
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100470 * case a_f_ALEF_MADDA:
471 * case a_f_ALEF_HAMZA_ABOVE:
472 * case a_f_ALEF_HAMZA_BELOW:
473 */
474 case a_f_YEH_HAMZA: return a_m_YEH_HAMZA;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 case a_f_WAW_HAMZA: /* exceptions */
476 case a_f_ALEF:
477 case a_f_TEH_MARBUTA:
478 case a_f_DAL:
479 case a_f_THAL:
480 case a_f_REH:
481 case a_f_ZAIN:
482 case a_f_WAW:
483 case a_f_ALEF_MAKSURA:
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100484 return cur_c;
485 case a_f_BEH: return a_m_BEH;
486 case a_f_TEH: return a_m_TEH;
487 case a_f_THEH: return a_m_THEH;
488 case a_f_JEEM: return a_m_JEEM;
489 case a_f_HAH: return a_m_HAH;
490 case a_f_KHAH: return a_m_KHAH;
491 case a_f_SEEN: return a_m_SEEN;
492 case a_f_SHEEN: return a_m_SHEEN;
493 case a_f_SAD: return a_m_SAD;
494 case a_f_DAD: return a_m_DAD;
495 case a_f_TAH: return a_m_TAH;
496 case a_f_ZAH: return a_m_ZAH;
497 case a_f_AIN: return a_m_AIN;
498 case a_f_GHAIN: return a_m_GHAIN;
499 case a_f_FEH: return a_m_FEH;
500 case a_f_QAF: return a_m_QAF;
501 case a_f_KAF: return a_m_KAF;
502 case a_f_LAM: return a_m_LAM;
503 case a_f_MEEM: return a_m_MEEM;
504 case a_f_NOON: return a_m_NOON;
505 case a_f_HEH: return a_m_HEH;
506 case a_f_YEH: return a_m_YEH;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100508 /* NOTE: these encodings are multi-positional, no ?
509 * case a_f_LAM_ALEF_MADDA_ABOVE:
510 * case a_f_LAM_ALEF_HAMZA_ABOVE:
511 * case a_f_LAM_ALEF_HAMZA_BELOW:
512 * case a_f_LAM_ALEF:
513 */
514 }
515 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000516}
517
518
519/*
520 * Change shape - from Combination (2 char) to an Isolated
521 */
522 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100523chg_c_laa2i(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000524{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525 switch (hid_c)
526 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100527 case a_ALEF_MADDA: return a_s_LAM_ALEF_MADDA_ABOVE;
528 case a_ALEF_HAMZA_ABOVE: return a_s_LAM_ALEF_HAMZA_ABOVE;
529 case a_ALEF_HAMZA_BELOW: return a_s_LAM_ALEF_HAMZA_BELOW;
530 case a_ALEF: return a_s_LAM_ALEF;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000531 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100532 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000533}
534
535
536/*
537 * Change shape - from Combination-Isolated to Final
538 */
539 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100540chg_c_laa2f(int hid_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000541{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000542 switch (hid_c)
543 {
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100544 case a_ALEF_MADDA: return a_f_LAM_ALEF_MADDA_ABOVE;
545 case a_ALEF_HAMZA_ABOVE: return a_f_LAM_ALEF_HAMZA_ABOVE;
546 case a_ALEF_HAMZA_BELOW: return a_f_LAM_ALEF_HAMZA_BELOW;
547 case a_ALEF: return a_f_LAM_ALEF;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000548 }
Bram Moolenaar7f73b542017-03-04 14:50:19 +0100549 return 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550}
551
552/*
553 * Do "half-shaping" on character "c". Return zero if no shaping.
554 */
555 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100556half_shape(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557{
558 if (A_is_a(c))
559 return chg_c_a2i(c);
560 if (A_is_valid(c) && A_is_f(c))
561 return chg_c_f2m(c);
562 return 0;
563}
564
565/*
566 * Do Arabic shaping on character "c". Returns the shaped character.
567 * out: "ccp" points to the first byte of the character to be shaped.
568 * in/out: "c1p" points to the first composing char for "c".
569 * in: "prev_c" is the previous character (not shaped)
570 * in: "prev_c1" is the first composing char for the previous char
571 * (not shaped)
572 * in: "next_c" is the next character (not shaped).
573 */
574 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100575arabic_shape(
576 int c,
577 int *ccp,
578 int *c1p,
579 int prev_c,
580 int prev_c1,
581 int next_c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000582{
583 int curr_c;
584 int shape_c;
585 int curr_laa;
586 int prev_laa;
587
588 /* Deal only with Arabic character, pass back all others */
589 if (!A_is_ok(c))
590 return c;
591
592 /* half-shape current and previous character */
593 shape_c = half_shape(prev_c);
594
595 /* Save away current character */
596 curr_c = c;
597
598 curr_laa = A_firstc_laa(c, *c1p);
599 prev_laa = A_firstc_laa(prev_c, prev_c1);
600
601 if (curr_laa)
602 {
603 if (A_is_valid(prev_c) && !A_is_f(shape_c)
604 && !A_is_s(shape_c) && !prev_laa)
605 curr_c = chg_c_laa2f(curr_laa);
606 else
607 curr_c = chg_c_laa2i(curr_laa);
608
609 /* Remove the composing character */
610 *c1p = 0;
611 }
612 else if (!A_is_valid(prev_c) && A_is_valid(next_c))
613 curr_c = chg_c_a2i(c);
614 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
615 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
616 else if (A_is_valid(next_c))
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100617#if 0
Bram Moolenaar071d4272004-06-13 20:20:40 +0000618 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
Bram Moolenaar3ff2f092017-03-21 13:22:44 +0100619#else
620 curr_c = A_is_iso(c) ? chg_c_a2m(c) : 0;
621#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000622 else if (A_is_valid(prev_c))
623 curr_c = chg_c_a2f(c);
624 else
625 curr_c = chg_c_a2s(c);
626
627 /* Sanity check -- curr_c should, in the future, never be 0.
628 * We should, in the future, insert a fatal error here. */
629 if (curr_c == NUL)
630 curr_c = c;
631
632 if (curr_c != c && ccp != NULL)
633 {
Bram Moolenaar9a920d82012-06-01 15:21:02 +0200634 char_u buf[MB_MAXBYTES + 1];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635
636 /* Update the first byte of the character. */
637 (*mb_char2bytes)(curr_c, buf);
638 *ccp = buf[0];
639 }
640
641 /* Return the shaped character */
642 return curr_c;
643}
644
645
646/*
647 * A_firstc_laa returns first character of LAA combination if it exists
648 */
649 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100650A_firstc_laa(
651 int c, /* base character */
652 int c1) /* first composing character */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000653{
654 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
655 return c1;
656 return 0;
657}
658
659
660/*
661 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
662 * (harakat/tanween)
663 */
664 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100665A_is_harakat(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
667 return (c >= a_FATHATAN && c <= a_SUKUN);
668}
669
670
671/*
672 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
673 * (alphabet/number/punctuation)
674 */
675 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100676A_is_iso(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677{
678 return ((c >= a_HAMZA && c <= a_GHAIN)
679 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
680 || c == a_MINI_ALEF);
681}
682
683
684/*
685 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
686 * (alphabet/number/punctuation)
687 */
688 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100689A_is_formb(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000690{
691 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
692 || c == a_s_KASRATAN
693 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
694 || c == a_BYTE_ORDER_MARK);
695}
696
697
698/*
699 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
700 */
701 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100702A_is_ok(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703{
704 return (A_is_iso(c) || A_is_formb(c));
705}
706
707
708/*
709 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
710 * with some exceptions/exclusions
711 */
712 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100713A_is_valid(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000714{
715 return (A_is_ok(c) && !A_is_special(c));
716}
717
718
719/*
720 * A_is_special returns TRUE if 'c' is not a special Arabic character.
721 * Specials don't adhere to most of the rules.
722 */
723 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100724A_is_special(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000725{
726 return (c == a_HAMZA || c == a_s_HAMZA);
727}
Bram Moolenaar75464dc2016-07-02 20:27:50 +0200728
729#endif /* FEAT_ARABIC */