runtime(java): Improve the recognition of literals (#14120)
* Emend the Unicode and octal escape sequence patterns;
* Accept the (repeated) underscore separators in all
numerical literals;
* Recognise hexadecimal floating-point literals.
(The space escape sequence '\s' will be introduced along
with text blocks in another PR.)
References:
https://docs.oracle.com/javase/specs/jls/se17/html/jls-3.html#jls-3.3
https://docs.oracle.com/javase/specs/jls/se17/html/jls-3.html#jls-3.10.1
https://docs.oracle.com/javase/specs/jls/se17/html/jls-3.html#jls-3.10.2
https://docs.oracle.com/javase/specs/jls/se17/html/jls-3.html#jls-3.10.7
Signed-off-by: Aliaksei Budavei <0x000c70@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/runtime/syntax/testdir/input/java_escapes.java b/runtime/syntax/testdir/input/java_escapes.java
new file mode 100644
index 0000000..624d4fc
--- /dev/null
+++ b/runtime/syntax/testdir/input/java_escapes.java
@@ -0,0 +1,123 @@
+class EscapesTests
+{ // javap -constants EscapesTests.class
+ // static final String hello = "hello";
+ \u0073\u0074\u0061\u0074\u0069\u0063
+ \u0066\u0069\u006e\u0061\u006c
+ \u0053\u0074\u0072\u0069\u006e\u0067
+ \u0068\u0065\u006c\u006c\u006f
+ \u003d
+ \u0022\u0068\u0065\u006c\u006c\u006f\u0022
+ \u003b
+
+ static {
+ char ee[] = {
+ '\b', '\t',
+ '\n', '\f', '\r',
+ '\"', '\'', '\\',
+ };
+
+ System.out.println(new String[] {
+ "\b", "\t",
+ "\n", "\f", "\r",
+ "\"", "\'", "\\",
+ });
+
+ char oo[] = {
+ '\0', '\1', '\2', '\3', '\4', '\5', '\6', '\7',
+
+ '\00', '\01', '\02', '\03', '\04', '\05', '\06', '\07',
+
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+
+ '\10', '\11', '\12', '\13', '\14', '\15', '\16', '\17',
+ '\20', '\21', '\22', '\23', '\24', '\25', '\26', '\27',
+ '\30', '\31', '\32', '\33', '\34', '\35', '\36', '\37',
+ '\40', '\41', '\42', '\43', '\44', '\45', '\46', '\47',
+ '\50', '\51', '\52', '\53', '\54', '\55', '\56', '\57',
+ '\60', '\61', '\62', '\63', '\64', '\65', '\66', '\67',
+ '\70', '\71', '\72', '\73', '\74', '\75', '\76', '\77',
+
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+
+ '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+ };
+
+ System.out.println(new String[] {
+ "\0", "\1", "\2", "\3", "\4", "\5", "\6", "\7",
+
+ "\00", "\01", "\02", "\03", "\04", "\05", "\06", "\07",
+
+ "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
+
+ "\10", "\11", "\12", "\13", "\14", "\15", "\16", "\17",
+ "\20", "\21", "\22", "\23", "\24", "\25", "\26", "\27",
+ "\30", "\31", "\32", "\33", "\34", "\35", "\36", "\37",
+ "\40", "\41", "\42", "\43", "\44", "\45", "\46", "\47",
+ "\50", "\51", "\52", "\53", "\54", "\55", "\56", "\57",
+ "\60", "\61", "\62", "\63", "\64", "\65", "\66", "\67",
+ "\70", "\71", "\72", "\73", "\74", "\75", "\76", "\77",
+
+ "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",
+ "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\027",
+ "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\037",
+ "\040", "\041", "\042", "\043", "\044", "\045", "\046", "\047",
+ "\050", "\051", "\052", "\053", "\054", "\055", "\056", "\057",
+ "\060", "\061", "\062", "\063", "\064", "\065", "\066", "\067",
+ "\070", "\071", "\072", "\073", "\074", "\075", "\076", "\077",
+
+ "\100", "\101", "\102", "\103", "\104", "\105", "\106", "\107",
+ "\110", "\111", "\112", "\113", "\114", "\115", "\116", "\117",
+ "\120", "\121", "\122", "\123", "\124", "\125", "\126", "\127",
+ "\130", "\131", "\132", "\133", "\134", "\135", "\136", "\137",
+ "\140", "\141", "\142", "\143", "\144", "\145", "\146", "\147",
+ "\150", "\151", "\152", "\153", "\154", "\155", "\156", "\157",
+ "\160", "\161", "\162", "\163", "\164", "\165", "\166", "\167",
+ "\170", "\171", "\172", "\173", "\174", "\175", "\176", "\177",
+ "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",
+ "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",
+ "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",
+ "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",
+ "\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",
+ "\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",
+ "\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",
+ "\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",
+ "\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",
+ "\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",
+ "\320", "\321", "\322", "\323", "\324", "\325", "\326", "\327",
+ "\330", "\331", "\332", "\333", "\334", "\335", "\336", "\337",
+ "\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",
+ "\350", "\351", "\352", "\353", "\354", "\355", "\356", "\357",
+ "\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",
+ "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377",
+ });
+ }
+}
diff --git a/runtime/syntax/testdir/input/java_numbers.java b/runtime/syntax/testdir/input/java_numbers.java
new file mode 100644
index 0000000..e926534
--- /dev/null
+++ b/runtime/syntax/testdir/input/java_numbers.java
@@ -0,0 +1,88 @@
+class NumbersTests
+{
+ static {
+ double[] dd = {
+ 0x.0p0, 0x0.p0, 0x0.0p0, 0x0P0,
+ 0x.0p0d, 0x0.p0d, 0x0.0p0d, 0x0P0D,
+ 0x.0p0f, 0x0.p0f, 0x0.0p0f, 0x0P0F,
+ 0x.0p0, 0x0.p0, 0x0.0p0, 0x0P0,
+ 0x.0p0d, 0x0.p0d, 0x0.0p0d, 0x0P0D,
+ 0x.0p0f, 0x0.p0f, 0x0.0p0f, 0x0P0F,
+ 0x.0p-0, 0x0.p-0, 0x0.0p-0, 0x0P-0,
+ 0x.0p-0d, 0x0.p-0d, 0x0.0p-0d, 0x0P-0D,
+ 0x.0p-0f, 0x0.p-0f, 0x0.0p-0f, 0x0P-0F,
+ 0x.0p+0, 0x0.p+0, 0x0.0p+0, 0x0P+0,
+ 0x.0p+0d, 0x0.p+0d, 0x0.0p+0d, 0x0P+0D,
+ 0x.0p+0f, 0x0.p+0f, 0x0.0p+0f, 0x0P+0F,
+
+ 1., 1.2, 1.2e3, 1.2e3d, 1.2e3f,
+ 1.2e-3, 1.2e-3d, 1.2E-3F,
+ 1.2e+3, 1.2E+3D, 1.2e+3f,
+ .2, .2e3, .2e3d, .2e3f,
+ .2e-3, .2e-3d, .2E-3F,
+ .2e+3, .2E+3D, .2e+3f,
+ 1e3, 1e3d, 1e3f,
+ 1e-3, 1e-3d, 1E-3F,
+ 1e+3, 1E+3D, 1e+3f,
+ 1d, 1D, 1f, 1F,
+
+//// MALFORMED: :let g:java_comment_strings = 1 | doautocmd Syntax
+// 0_x.0p0, 0x._0p0, 0x.0_p0, 0x.0p_0, 0x.0p0_,
+// 0x0_.0p0, 0x_0.0p0, 0xp0,
+ };
+
+ int O = 0;
+ int OO = 00;
+ int OxO = 0x0;
+
+ int x = 0x12_345_678;
+ int y = 0b01_01_01_01_01;
+ int z = 0__1__2__3__4__5__6__7;
+
+ // String.format("%a", -1.0)
+ double minus_one_d = -0x1.0p0;
+
+ double z_d = -0x.0p0;
+ double y_d = 0xap1__0__0;
+ double x_d = .0__1__2__3__4__5__6__7__8__9;
+ double dot_O = .0;
+
+ // JLS, §3.10.2:
+ float max_dec_f = 3.4028235e38f;
+ float max_hex_f = 0x1.fffffeP+127f;
+ float min_dec_f = 1.4e-45f;
+ float min_hex_f_a = 0x0.000002P-126f;
+ float min_hex_f_b = 0x1.0P-149f;
+
+ double max_dec_d = 1.7976931348623157e3__0__8;
+ double max_hex_d = 0x1.f_ffff_ffff_ffffP+1023;
+ double min_dec_d = 4.9e-3__2__4;
+ double min_hex_d_a = 0x0.0_0000_0000_0001P-1022;
+ double min_hex_d_b = 0x1.0P-1074;
+
+ // JLS, §3.10.1:
+ int max_hex = 0x7fff_ffff;
+ int max_oct = 0177_7777_7777;
+ int max_bin = 0b0111_1111_1111_1111_1111_1111_1111_1111;
+
+ int min_hex = 0x8000_0000;
+ int min_oct = 0200_0000_0000;
+ int min_bin = 0b1000_0000_0000_0000_0000_0000_0000_0000;
+
+ int minus_one_hex = 0xffff_ffff;
+ int minus_one_oct = 0377_7777_7777;
+ int minus_one_bin = 0b1111_1111_1111_1111_1111_1111_1111_1111;
+
+ long max_hex_l = 0x7fff_ffff_ffff_ffffL;
+ long max_oct_l = 07_7777_7777_7777_7777_7777L;
+ long max_bin_l = 0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L;
+
+ long min_hex_l = 0x8000_0000_0000_0000L;
+ long min_oct_l = 010_0000_0000_0000_0000_0000L;
+ long min_bin_l = 0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L;
+
+ long minus_one_hex_l = 0xffff_ffff_ffff_ffffL;
+ long minus_one_oct_l = 017_7777_7777_7777_7777_7777L;
+ long minus_one_bin_l = 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L;
+ }
+}