I've now silenced the warnings I was seeing on FreeBSD.
I've also (hopefully) improved the way that the NV's internal structure is transferred to the string buffer as hex bytes.
For perl-5.20.x and earlier on MS Windows I discovered that the 'uniqnum preserves uniqness of full integer range (stringified)' test needs to be skipped because of the bizarre way that infs and nans are stringified on those particular perls.
AFAICT it's actually not possible to come up with a string that will numify to NaN, though strings that numify to Infs do exist (eg '1e1000').
Graham's script also needs to be skipped for those builds - for the same reason.
I suppose it would be possible to avoid those skips by rewriting the tests such that Inf strings were written differently and testing involving NaN strings was avoided, but I don't really see that as being a useful exercise.
I eventually found confirmation (at
https://en.wikipedia.org/wiki/Comparison_of_instruction_set_architectures#Endianness) that both little and big endian types start at the lowest indexed byte.
This called for a change in the Makefile.PL to the treatment of the 80-bit long double NVs.
Attached is my latest patch - formulated against Scalar-List-Utils-1.52.
I don't envisage making any further alterations, though that could change if other problems come to light.
Cheers,
Rob
--- Makefile.PL_orig 2019-08-24 11:46:15 +1000
+++ Makefile.PL 2019-08-27 23:21:26 +1000
@@ -7,11 +7,30 @@
use ExtUtils::MakeMaker;
my $PERL_CORE = grep { $_ eq 'PERL_CORE=1' } @ARGV;
+
+my $defines = $ENV{PERL_CORE} ? q[-DPERL_EXT] : q[-DPERL_EXT -DUSE_PPPORT_H];
+
+# Determine the correct NV formatting required by uniq() in ListUtil.xs
+
+if($Config{nvsize} == 8) { # double or 8-byte long double
+ $defines .= " -DUNIQ_LOOP_START=0 -DUNIQ_LOOP_END=8";
+ $defines .= " -DUNIQ_PREC_LOSS" if $Config{ivsize} >= 8; # precision loss is possible when
+ # IV/UV is converted to NV
+}
+
+elsif(length(sqrt 2) > 25) { # IEEE long double or __float128 or doubledouble
+ $defines .= " -DUNIQ_LOOP_START=0 -DUNIQ_LOOP_END=16"
+}
+
+else { # extended precision long double
+ $defines .= " -DUNIQ_LOOP_START=0 -DUNIQ_LOOP_END=10"; # ignore unused bytes
+}
+
WriteMakefile(
NAME => q[List::Util],
ABSTRACT => q[Common Scalar and List utility subroutines],
AUTHOR => q[Graham Barr <gbarr@cpan.org>],
- DEFINE => ($ENV{PERL_CORE} ? q[-DPERL_EXT] : q[-DPERL_EXT -DUSE_PPPORT_H]),
+ DEFINE => $defines,
DISTNAME => q[Scalar-List-Utils],
VERSION_FROM => 'lib/List/Util.pm',
--- ListUtil.xs_orig 2019-08-24 11:45:08 +1000
+++ ListUtil.xs 2019-08-27 23:23:46 +1000
@@ -72,6 +72,19 @@
#define sv_catpvn_flags(b,n,l,f) sv_catpvn(b,n,l)
#endif
+#ifdef UNIQ_PREC_LOSS
+#define UNIQ_BUFF_SIZE 37 /* nvsize == 8, ivsize == 8 */
+
+#elif UNIQ_LOOP_END == 8 /* nvsize == 8, ivsize < 8 **/
+#define UNIQ_BUFF_SIZE 17
+
+#elif UNIQ_LOOP_END - UNIQ_LOOP_START == 10 /* NV is 10 bytes ***********/
+#define UNIQ_BUFF_SIZE 21
+
+#else
+#define UNIQ_BUFF_SIZE 33 /* NV is 16 bytes ***********/
+#endif
+
/* Some platforms have strict exports. And before 5.7.3 cxinc (or Perl_cxinc)
was not exported. Therefore platforms like win32, VMS etc have problems
so we redefine it here -- GMB
@@ -1152,6 +1165,14 @@
int index;
SV **args = &PL_stack_base[ax];
HV *seen;
+ size_t i;
+ NV nv_arg;
+ void *p = &nv_arg;
+ char s[UNIQ_BUFF_SIZE];
+ char * buff = s;
+#ifdef UNIQ_PREC_LOSS
+ size_t uvstring = 0, ivstring = 0, offset = 0;
+#endif
if(items == 0 || (items == 1 && !SvGAMAGIC(args[0]) && SvOK(args[0]))) {
/* Optimise for the case of the empty list or a defined nonmagic
@@ -1185,12 +1206,105 @@
#endif
}
- if(!SvOK(arg) || SvUOK(arg))
- sv_setpvf(keysv, "%" UVuf, SvUV(arg));
- else if(SvIOK(arg))
- sv_setpvf(keysv, "%" IVdf, SvIV(arg));
- else
- sv_setpvf(keysv, "%.15" NVgf, SvNV(arg));
+ /* Assign appropriate value to nv_arg */
+
+ if(!SvOK(arg) || SvUOK(arg)) {
+ nv_arg = (NV)SvUV(arg);
+#ifdef UNIQ_PREC_LOSS
+ uvstring = 1;
+#endif
+ }
+
+ else if(SvIOK(arg)) {
+ nv_arg = (NV)SvIV(arg);
+#ifdef UNIQ_PREC_LOSS
+ ivstring = 1;
+#endif
+ }
+
+ else {
+ nv_arg = SvNV(arg);
+ }
+
+ /* Handle NaN, zeros and Infs */
+
+ if(nv_arg != nv_arg) {
+ sv_setpvf(keysv, "%s", "NaN");
+#ifdef UNIQ_PREC_LOSS
+ ivstring = 0;
+ uvstring = 0;
+#endif
+ }
+
+ else if(nv_arg == 0) {
+ sv_setpvf(keysv, "%s", "0");
+#ifdef UNIQ_PREC_LOSS
+ ivstring = 0;
+ uvstring = 0;
+#endif
+ }
+
+ else if(nv_arg/nv_arg != 1) {
+ if(nv_arg < 0) sv_setpvf(keysv, "%s", "-Inf");
+ else sv_setpvf(keysv, "%s", "Inf");
+#ifdef UNIQ_PREC_LOSS
+ ivstring = 0;
+ uvstring = 0;
+#endif
+ }
+
+ /* Handle all values other than NaN, zeros and Infs */
+
+ else {
+#ifdef UNIQ_PREC_LOSS
+ if(uvstring) {
+ sprintf(buff, "%" UVuf, SvUV(arg));
+ offset = strlen(buff);
+ }
+
+ else if(ivstring) {
+ sprintf(buff, "%" IVdf, SvIV(arg));
+ offset = strlen(buff);
+ }
+
+ /* Take appropriate action if nv_arg is within IV and UV bounds */
+
+ else if(ceil(nv_arg) == nv_arg
+ && nv_arg <= 1.8446744073709552e+19
+ && nv_arg >= -9.2233720368547758e+18 ) {
+
+ if(nv_arg < 0) {
+ sprintf(buff, "%" IVdf, SvIV(arg));
+ }
+ else {
+ sprintf(buff, "%" UVuf, SvUV(arg));
+ }
+
+ offset = strlen(buff);
+ }
+
+ ivstring = 0;
+ uvstring = 0;
+
+ buff += offset;
+
+ for(i = 0; i < 8; i++) {
+ sprintf(buff, "%02x", ((unsigned char*)p)[i]);
+ buff += 2;
+ }
+
+ buff -= 16 + offset; /* return pointer to original position */
+ offset = 0;
+#else
+ for(i = UNIQ_LOOP_START; i < UNIQ_LOOP_END; i++) {
+ sprintf(buff, "%02x", ((unsigned char*)p)[i]);
+ buff += 2;
+ }
+
+ buff -= 2 * UNIQ_LOOP_END; /* return pointer to original position */
+#endif
+ sv_setpvf(keysv, "%s", buff);
+ }
#ifdef HV_FETCH_EMPTY_HE
he = (HE*) hv_common(seen, NULL, SvPVX(keysv), SvCUR(keysv), 0, HV_FETCH_LVALUE | HV_FETCH_EMPTY_HE, NULL, 0);
if (HeVAL(he))
--- t/uniq.t_orig 2019-08-24 11:46:43 +1000
+++ t/uniq.t 2019-08-27 19:12:28 +1000
@@ -2,8 +2,8 @@
use strict;
use warnings;
-
-use Test::More tests => 33;
+use Config; # to determine nvsize
+use Test::More tests => 35;
use List::Util qw( uniqnum uniqstr uniq );
use Tie::Array;
@@ -87,6 +87,76 @@
'uniqnum distinguishes large floats (stringified)' );
}
+my ($uniq_count1, $uniq_count2, $equiv);
+
+if($Config{nvsize} == 8) {
+ # NV is either 'double' or 8-byte 'long double'
+
+ # The 2 values should be unequal - but just in case perl is buggy:
+ $equiv = 1 if 1.4142135623730951 == 1.4142135623730954;
+
+ $uniq_count1 = List::Util::uniqnum (1.4142135623730951,
+ 1.4142135623730954 );
+
+ $uniq_count2 = List::Util::uniqnum('1.4142135623730951',
+ '1.4142135623730954');
+}
+
+elsif(length(sqrt(2)) > 25) {
+ # NV is either IEEE 'long double' or '__float128' or doubledouble
+
+ if(1 + (2 ** -1074) != 1) {
+ # NV is doubledouble
+
+ # The 2 values should be unequal - but just in case perl is buggy:
+ $equiv = 1 if 1 + (2 ** -1074) == 1 + (2 ** - 1073);
+
+ $uniq_count1 = List::Util::uniqnum (1 + (2 ** -1074),
+ 1 + (2 ** -1073) );
+ # The 2 values should be unequal - but just in case perl is buggy:
+ $equiv = 1 if 4.0564819207303340847894502572035e31 == 4.0564819207303340847894502572034e31;
+
+ $uniq_count2 = List::Util::uniqnum('4.0564819207303340847894502572035e31',
+ '4.0564819207303340847894502572034e31');
+ }
+
+ else {
+ # NV is either IEEE 'long double' or '__float128'
+
+ # The 2 values should be unequal - but just in case perl is buggy:
+ $equiv = 1 if 1.7320508075688772935274463415058722 == 1.73205080756887729352744634150587224;
+
+ $uniq_count1 = List::Util::uniqnum (1.7320508075688772935274463415058722,
+ 1.73205080756887729352744634150587224 );
+
+ $uniq_count2 = List::Util::uniqnum('1.7320508075688772935274463415058722',
+ '1.73205080756887729352744634150587224');
+ }
+}
+
+else {
+ # NV is extended precision 'long double'
+
+ # The 2 values should be unequal - but just in case perl is buggy:
+ $equiv = 1 if 2.2360679774997896963 == 2.23606797749978969634;
+
+ $uniq_count1 = List::Util::uniqnum (2.2360679774997896963,
+ 2.23606797749978969634 );
+
+ $uniq_count2 = List::Util::uniqnum('2.2360679774997896963',
+ '2.23606797749978969634');
+}
+
+if($equiv) {
+ is($uniq_count1, 1, 'uniqnum preserves uniqness of high precision floats');
+ is($uniq_count2, 1, 'uniqnum preserves uniqness of high precision floats (stringified)');
+}
+
+else {
+ is($uniq_count1, 2, 'uniqnum preserves uniqness of high precision floats');
+ is($uniq_count2, 2, 'uniqnum preserves uniqness of high precision floats (stringified)');
+}
+
# Hard to know for sure what an Inf is going to be. Lets make one
my $Inf = 0 + 1E1000;
my $NaN;
@@ -109,8 +179,13 @@
my @strs = map "$_", @nums;
- skip( "Perl $] doesn't stringify UV_MAX right ($maxuint)", 1 )
- if $maxuint !~ /\A[0-9]+\z/;
+ if($maxuint !~ /\A[0-9]+\z/) {
+ skip( "Perl $] doesn't stringify UV_MAX right ($maxuint)", 1 );
+ }
+ elsif($] < 5.022 && $^O =~ /MSWin32/i) {
+ skip( "On MS Windows,perl $] stringifies infs and nans into something unusable", 1 );
+ }
+
is_deeply( [ uniqnum @strs, "1.0" ],
[ @strs ],