Closest-pair problem: Difference between revisions
cat link syntax |
→{{header|R}}: Changed hidden link to more visible review template with specified reason |
||
Line 1,047: | Line 1,047: | ||
=={{header|R}}== |
=={{header|R}}== |
||
{{needs-review|R|It is only a brute force algorithm and not optimized.}} |
|||
{{works with|R|2.81}} |
{{works with|R|2.81}} |
||
Line 1,072: | Line 1,073: | ||
} |
} |
||
</lang> |
</lang> |
||
[[Category:R examples needing attention]] |
|||
=={{header|Ruby}}== |
=={{header|Ruby}}== |
Revision as of 20:09, 27 July 2009
You are encouraged to solve this task according to the task description, using any language you may know.
This page uses content from Wikipedia. The original article was at Closest-pair problem. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance) |
The aim of this task is to provide a function to find the closest two points among a set of given points in two dimensions, i.e. to solve the Closest pair of points problem in the planar case.
The straightforward solution is a O(n2) algorithm (which we can call brute-force algorithm); the pseudocode (using indexes) could be simply:
bruteForceClosestPair of P(1), P(2), ... P(N) if N < 2 then return ∞ else minDistance ← |P(1) - P(2)| minPoints ← { P(1), P(2) } foreach i ∈ [1, N-1] foreach j ∈ [i+1, N] if |P(i) - P(j)| < minDistance then minDistance ← |P(i) - P(j)| minPoints ← { P(i), P(j) } endif endfor endfor return minDistance, minPoints endif
A better algorithm is based on the recursive divide&conquer approach, as explained also at Wikipedia, which is O(n log n); a pseudocode could be:
closestPair of (xP, yP) where xP is P(1) .. P(N) sorted by x coordinate, and yP is P(1) .. P(N) sorted by y coordinate (ascending order) if N ≤ 3 then return closest points of xP using brute-force algorithm else xL ← points of xP from 1 to ⌈N/2⌉ xR ← points of xP from ⌈N/2⌉+1 to N xm ← xP(⌈N/2⌉)x yL ← { p ∈ yP : px ≤ xm } yR ← { p ∈ yP : px > xm } (dL, pairL) ← closestPair of (xL, yL) (dR, pairR) ← closestPair of (xR, yR) (dmin, pairMin) ← (dR, pairR) if dL < dR then (dmin, pairMin) ← (dL, pairL) endif yS ← { p ∈ yP : |xm - px| < dmin } nS ← number of points in yS (closest, closestPair) ← (dmin, pairMin) for i from 1 to nS - 1 k ← i + 1 while k ≤ nS and yS(k)y - yS(i)y < dmin if |yS(k) - yS(i)| < closest then (closest, closestPair) ← (|yS(k) - yS(i)|, {yS(k), yS(i)}) endif k ← k + 1 endwhile endfor return closest, closestPair endif
References and further readings
- Closest pair of points problem
- Closest Pair (McGill)
- Closest Pair (UCBS)
- Closest pair (WUStL)
- Closest pair (IUPUI)
C
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <string.h>
- include <math.h>
- include <assert.h>
typedef struct {
double x, y;
} point_t;
inline double distance(point_t *p1, point_t *p2) {
return sqrt((p1->x - p2->x)*(p1->x - p2->x) +
(p1->y - p2->y)*(p1->y - p2->y)); }</lang>
We need an ad hoc sort function; here is a modified version of what you can find at Quicksort.
<lang c>typedef int(*comparator)(const void *, const void *); void quick(point_t *P, int *left, int *right, comparator compar) {
if (right > left) { int pivot = left[(right-left)/2]; int *r = right, *l = left; do { while (compar(&P[*l], &P[pivot]) < 0) l++; while (compar(&P[*r], &P[pivot]) > 0) r--; if (l <= r) { int t = *l; *l++ = *r; *r-- = t; } } while (l <= r); quick(P, left, r, compar); quick(P, l, right, compar); }
} void m_qsort(point_t *P, int *array, int length, comparator compar) {
quick(P, array, array+length-1, compar);
}
int sort_index_by_x(const void *ra, const void *rb) {
const point_t *a = ra, *b = rb; double d = a->x - b->x; return (d<0) ? -1 : ( (d==0) ? 0 : 1);
}
int sort_index_by_y(const void *ra, const void *rb) {
const point_t *a = ra, *b = rb; double d = a->y - b->y; return (d<0) ? -1 : ( (d==0) ? 0 : 1 );
}</lang>
<lang c>double closest_pair_simple_(point_t *P, int *pts, int num, int *pia, int *pib) {
int i, j; if ( num < 2 ) return HUGE_VAL; double ird = distance(&P[pts[0]], &P[pts[1]]); *pia = pts[0]; *pib = pts[1]; for(i=0; i < (num-1); i++) { for(j=i+1; j < num; j++) { double t; t = distance(&P[pts[i]], &P[pts[j]]); if ( t < ird ) {
ird = t; *pia = pts[i]; *pib = pts[j];
} } } return ird;
}</lang>
This is the entry point for the simple algorithm.
<lang c>double closest_pair_simple(point_t *P, int num, int *pia, int *pib) {
int *pts, i; double d;
pts = malloc(sizeof(int)*num); assert(pts != NULL); for(i=0; i < num; i++) pts[i] = i; d = closest_pair_simple_(P, pts, num, pia, pib); free(pts); return d;
}</lang>
<lang c>double closest_pair_(point_t *P, int *xP, int *yP, int N, int *iA, int *iB) {
int i, k, j; int *xL, *xR, *yL, *yR, *yS; int lA, lB, rA, rB, midx; double dL, dR, dmin, xm, closest; int nS;
if ( N <= 3 ) return closest_pair_simple_(P, xP, N, iA, iB);
midx = ceil((double)N/2.0) - 1; xL = malloc(sizeof(int)*(midx+1)); assert(xL != NULL); xR = malloc(sizeof(int)*(N-(midx+1))); assert(xR != NULL); yL = malloc(sizeof(int)*(midx+1)); assert(yL != NULL); yR = malloc(sizeof(int)*(N-(midx+1))); assert(yR != NULL);
for(i=0;i <= midx; i++) xL[i] = xP[i]; for(i=midx+1; i < N; i++) xR[i-(midx+1)] = xP[i];
xm = P[xP[midx]].x;
for(i=0, k=0, j=0; i < N; i++) { if ( P[yP[i]].x <= xm ) { yL[k++] = yP[i]; } else { yR[j++] = yP[i]; } }
dL = closest_pair_(P, xL, yL, midx+1, &lA, &lB); dR = closest_pair_(P, xR, yR, (N-(midx+1)), &rA, &rB);
if ( dL < dR ) { dmin = dL; *iA = lA; *iB = lB; } else { dmin = dR; *iA = rA; *iB = rB; }
yS = malloc(sizeof(int)*N); assert(yS != NULL); nS = 0; for(i=0; i < N; i++) { if ( fabs(xm - P[yP[i]].x) < dmin ) { yS[nS++] = yP[i]; } }
closest = dmin; if (nS > 1) { for(i=0; i < (nS-1); i++) { k = i + 1; while( (k < nS) && ( (P[yS[k]].y - P[yS[i]].y) < dmin ) ) {
double d = distance(&P[yS[i]], &P[yS[k]]); if ( d < closest ) { closest = d; *iA = yS[i]; *iB = yS[k]; } k++;
} } }
free(xR); free(xL); free(yL); free(yR); free(yS); return closest;
}</lang>
This is the entry point for the divide&conquer algorithm.
<lang c>double closest_pair(point_t *P, int N, int *iA, int *iB) {
int *xP, *yP, i; double d;
xP = malloc(sizeof(int)*N); assert(xP != NULL); yP = malloc(sizeof(int)*N); assert(yP != NULL);
for(i=0; i < N; i++) { xP[i] = yP[i] = i; } m_qsort(P, xP, N, sort_index_by_x); m_qsort(P, yP, N, sort_index_by_y); d = closest_pair_(P, xP, yP, N, iA, iB); free(xP); free(yP); return d;
}</lang>
Testing
<lang c>#define NP 10000
int main() {
point_t *points; int i; int p[2]; double c;
srand(31415);
points = malloc(sizeof(point_t)*NP);
for(i=0; i < NP; i++) { points[i].x = 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0; points[i].y = 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0; }
c = closest_pair_simple(points, NP, p, p+1); printf("%lf %d %d (%lf)\n", c, p[0], p[1], distance(&points[p[0]], &points[p[1]])); c = closest_pair(points, NP, p, p+1); printf("%lf %d %d (%lf)\n", c, p[0], p[1], distance(&points[p[0]], &points[p[1]]));
free(points); return EXIT_SUCCESS;
}</lang>
The divide&conquer algorithm gave 0.01user 0.00system 0:00.11elapsed, while the brute-force one gave 1.83user 0.00system 0:01.88elapsed.
Fortran
This module defines the point type and implements only two operations on "vectors" ("difference" and "length")
<lang fortran>module Points_Module
implicit none
type point real :: x, y end type point
interface operator (-) module procedure pt_sub end interface
interface len module procedure pt_len end interface
public :: point private :: pt_sub, pt_len
contains
function pt_sub(a, b) result(c) type(point), intent(in) :: a, b type(point) :: c c = point(a%x - b%x, a%y - b%y) end function pt_sub
function pt_len(a) result(l) type(point), intent(in) :: a real :: l
l = sqrt((a%x)**2 + (a%y)**2) end function pt_len
end module Points_Module</lang>
Then we need a modified version of the fortran quicksort able to handle "points" and that can use a custom comparator.
<lang fortran>module qsort_module
use Points_Module implicit none
contains
recursive subroutine qsort(a, comparator) type(point), intent(inout) :: a(:) interface integer function comparator(aa, bb) use Points_Module type(point), intent(in) :: aa, bb end function comparator end interface
integer :: split
if(size(a) > 1) then call partition(a, split, comparator) call qsort(a(:split-1), comparator) call qsort(a(split:), comparator) end if
end subroutine qsort
subroutine partition(a, marker, comparator) type(point), intent(inout) :: a(:) integer, intent(out) :: marker
interface integer function comparator(aa, bb) use Points_Module type(point), intent(in) :: aa, bb end function comparator end interface
type(point) :: pivot, temp integer :: left, right
left = 0 right = size(a) + 1 pivot = a(size(a)/2)
do while (left < right) right = right - 1 do while (comparator(a(right), pivot) > 0) right = right - 1 end do left = left + 1 do while (comparator(a(left), pivot) < 0) left = left + 1 end do if ( left < right ) then temp = a(left) a(left) = a(right) a(right) = temp end if end do
if (left == right) then marker = left + 1 else marker = left end if end subroutine partition
end module qsort_module</lang>
The module containing the custom comparators.
<lang fortran>module Order_By_XY
use Points_Module implicit none
contains
integer function order_by_x(aa, bb) type(point), intent(in) :: aa, bb
if ( aa%x < bb%x ) then order_by_x = -1 elseif (aa%x > bb%x) then order_by_x = 1 else order_by_x = 0 end if end function order_by_x
integer function order_by_y(aa, bb) type(point), intent(in) :: aa, bb
if ( aa%y < bb%y ) then order_by_y = -1 elseif (aa%y > bb%y) then order_by_y = 1 else order_by_y = 0 end if end function order_by_y
end module Order_By_XY</lang>
The closest pair functions' module.
<lang fortran>module ClosestPair
use Points_Module use Order_By_XY use qsort_module implicit none
private :: closest_pair_real
contains
function closest_pair_simple(p, pair) result(distance) type(point), dimension(:), intent(in) :: p type(point), dimension(:), intent(out), optional :: pair real :: distance
type(point), dimension(2) :: cp integer :: i, j real :: d
if ( size(P) < 2 ) then distance = huge(0.0) else distance = len(p(1) - p(2)) cp = (/ p(1), p(2) /) do i = 1, size(p) - 1 do j = i+1, size(p) d = len(p(i) - p(j)) if ( d < distance ) then distance = d cp = (/ p(i), p(j) /) end if end do end do if ( present(pair) ) pair = cp end if end function closest_pair_simple
function closest_pair(p, pair) result(distance) type(point), dimension(:), intent(in) :: p type(point), dimension(:), intent(out), optional :: pair real :: distance
type(point), dimension(2) :: dp type(point), dimension(size(p)) :: xp, yp integer :: i
xp = p yp = p
call qsort(xp, order_by_x) call qsort(yp, order_by_y)
distance = closest_pair_real(xp, yp, dp) if ( present(pair) ) pair = dp end function closest_pair
recursive function closest_pair_real(xp, yp, pair) result(distance) type(point), dimension(:), intent(in) :: xp, yp type(point), dimension(:), intent(out) :: pair real :: distance
type(point), dimension(2) :: pairl, pairr type(point), dimension(:), allocatable :: ys type(point), dimension(:), allocatable :: pl, yl type(point), dimension(:), allocatable :: pr, yr real :: dl, dr, dmin, xm, d integer :: ns, i, k, j, midx
if ( size(xp) <= 3 ) then distance = closest_pair_simple(xp, pair) else midx = ceiling(size(xp)/2.0)
allocate(ys(size(xp))) allocate(pl(midx), yl(midx)) allocate(pr(size(xp)-midx), yr(size(xp)-midx))
pl = xp(1:midx) pr = xp((midx+1):size(xp))
xm = xp(midx)%x
k = 1; j = 1 do i = 1, size(yp) if ( yp(i)%x > xm ) then ! guard ring; this is an "idiosyncrasy" that should be fixed in a ! smarter way if ( k <= size(yr) ) yr(k) = yp(i) k = k + 1 else ! guard ring (see above) if ( j <= size(yl) ) yl(j) = yp(i) j = j + 1 end if end do
dl = closest_pair_real(pl, yl, pairl) dr = closest_pair_real(pr, yr, pairr)
pair = pairr dmin = dr if ( dl < dr ) then pair = pairl dmin = dl end if
ns = 0 do i = 1, size(yp) if ( abs(yp(i)%x - xm) < dmin ) then ns = ns + 1 ys(ns) = yp(i) end if end do
distance = dmin do i = 1, ns-1 k = i + 1 do while ( k <= ns .and. abs(ys(k)%y - ys(i)%y) < dmin ) d = len(ys(k) - ys(i)) if ( d < distance ) then distance = d pair = (/ ys(k), ys(i) /) end if k = k + 1 end do end do deallocate(ys) deallocate(pl, yl) deallocate(pr, yr) end if end function closest_pair_real
end module ClosestPair</lang>
Testing:
<lang fortran>program TestClosestPair
use ClosestPair implicit none
integer, parameter :: n = 10000
integer :: i real :: x, y type(point), dimension(n) :: points
type(point), dimension(2) :: p real :: dp, dr
! init the random generator here if needed
do i = 1, n call random_number(x) call random_number(y) points(i) = point(x*20.0-10.0, y*20.0-10.0) end do
dp = closest_pair_simple(points, p) print *, "sim ", dp dr = closest_pair(points, p) print *, "rec ", dr
end program TestClosestPair</lang>
Time gave 2.92user 0.00system 0:02.94elapsed for brute force, and 0.02user 0.00system 0:00.03elapsed for the other one.
Objective-C
<lang objc>#import <Foundation/Foundation.h>
- import <math.h>
@interface Point : NSObject {
double xCoord, yCoord;
} +(id)x: (double)x y: (double)y; -(id)initWithX: (double)x andY: (double)y; -(double)x; -(double)y; -(double)dist: (Point *)pt; -(NSInteger)compareX: (Point *)pt; -(NSInteger)compareY: (Point *)pt; @end
@implementation Point +(id)x: (double)x y: (double)y {
id me = [super new]; [me initWithX: x andY: y]; return me;
} -(id)initWithX: (double)x andY: (double)y {
xCoord = x; yCoord = y; return self;
} -(double)x {
return xCoord;
} -(double)y {
return yCoord;
} -(double)dist: (Point *)pt {
return sqrt( ([self x] - [pt x])*([self x] - [pt x]) +
([self y] - [pt y])*([self y] - [pt y]) ); } -(NSInteger)compareX: (Point *)pt {
if ( [self x] < [pt x] ) { return NSOrderedAscending; } else if ( [self x] > [pt x] ) { return NSOrderedDescending; } else { return NSOrderedSame; }
} -(NSInteger)compareY: (Point *)pt {
if ( [self y] < [pt y] ) { return NSOrderedAscending; } else if ( [self y] > [pt y] ) { return NSOrderedDescending; } else { return NSOrderedSame; }
} @end</lang>
<lang objc>@interface ClosestPair : NSObject +(NSArray *)closestPairSimple: (NSArray *)pts; +(NSArray *)closestPair: (NSArray *)pts; +(NSArray *)closestPairPriv: (NSArray *)xP and: (NSArray *)yP; +(id)minBetween: (id)minA and: (id)minB; @end
@implementation ClosestPair +(NSArray *)closestPairSimple: (NSArray *)pts {
int i, j; if ( [pts count] < 2 ) return [NSArray arrayWithObject: [NSNumber numberWithDouble: HUGE_VAL]]; NSArray *r; double c = [[pts objectAtIndex: 0] dist: [pts objectAtIndex: 1]]; r = [NSArray
arrayWithObjects: [NSNumber numberWithDouble: c], [pts objectAtIndex: 0], [pts objectAtIndex: 1], nil];
for(i=0; i < ([pts count] - 1); i++) { for(j=i+1; j < [pts count]; j++) { double t; t = [[pts objectAtIndex: i] dist: [pts objectAtIndex: j]]; if ( t < c ) {
c = t; r = [NSArray arrayWithObjects: [NSNumber numberWithDouble: t], [pts objectAtIndex: i], [pts objectAtIndex: j], nil];
} } } return r;
} +(NSArray *)closestPair: (NSArray *)pts {
return [self closestPairPriv:
[pts sortedArrayUsingSelector: @selector(compareX:)] and: [pts sortedArrayUsingSelector: @selector(compareY:)]
];
} +(NSArray *)closestPairPriv: (NSArray *)xP and: (NSArray *)yP {
NSArray *pR, *pL, *minR, *minL; NSMutableArray *yR, *yL, *joiningStrip, *tDist, *minDist; double middleVLine; int i, nP, k;
if ( [xP count] <= 3 ) { return [self closestPairSimple: xP]; } else { int midx = ceil([xP count]/2.0); pL = [xP subarrayWithRange: NSMakeRange(0, midx)]; pR = [xP subarrayWithRange: NSMakeRange(midx, [xP count] - midx)]; yL = [[NSMutableArray alloc] init]; yR = [[NSMutableArray alloc] init]; middleVLine = [[pL objectAtIndex: (midx-1)] x]; for(i=0; i < [yP count]; i++) { if ( [[yP objectAtIndex: i] x] <= middleVLine ) {
[yL addObject: [yP objectAtIndex: i]];
} else {
[yR addObject: [yP objectAtIndex: i]];
} } minR = [ClosestPair closestPairPriv: pR and: yR]; minL = [ClosestPair closestPairPriv: pL and: yL]; minDist = [ClosestPair minBetween: minR and: minL]; joiningStrip = [NSMutableArray arrayWithCapacity: [xP count]]; for(i=0; i < [yP count]; i++) { if ( fabs([[yP objectAtIndex: i] x] - middleVLine) < [[minDist objectAtIndex: 0] doubleValue] ) {
[joiningStrip addObject: [yP objectAtIndex: i]];
} } tDist = minDist; nP = [joiningStrip count]; for(i=0; i < (nP - 1); i++) { k = i + 1; while( (k < nP) &&
( ([[joiningStrip objectAtIndex: k] y] - [[joiningStrip objectAtIndex: i] y]) < [[minDist objectAtIndex: 0] doubleValue] ) ) { double d = [[joiningStrip objectAtIndex: i] dist: [joiningStrip objectAtIndex: k]]; if ( d < [[tDist objectAtIndex: 0] doubleValue] ) { tDist = [NSArray arrayWithObjects: [NSNumber numberWithDouble: d], [joiningStrip objectAtIndex: i], [joiningStrip objectAtIndex: k], nil]; } k++;
} } [yL release]; [yR release]; return tDist; }
} +(id)minBetween: (id)minA and: (id)minB {
if ( [[minA objectAtIndex: 0] doubleValue] < [[minB objectAtIndex: 0] doubleValue] ) { return minA; } else { return minB; }
} @end</lang>
Testing
<lang objc>#define NP 10000
int main() {
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; NSMutableArray *p = [[NSMutableArray alloc] init]; int i;
srand(0); for(i=0; i < NP; i++) { [p addObject:
[Point x: 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0 y: 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0]
]; }
//NSArray *r1 = [ClosestPair closestPairSimple: p]; NSArray *r2 = [ClosestPair closestPair: p];
//NSLog(@"%lf", [[r1 objectAtIndex: 0] doubleValue]); NSLog(@"%lf", [[r2 objectAtIndex: 0] doubleValue]);
[p release]; [pool drain]; return EXIT_SUCCESS;
}</lang>
Timing (with the time command):
d&c: 0.22user 0.00system 0:00.41elapsed brute force: 13.53user 0.06system 0:13.87elapsed
Perl
<lang perl>#! /usr/bin/perl use strict; use POSIX qw(ceil);
sub dist {
my ( $a, $b) = @_; return sqrt( ($a->[0] - $b->[0])**2 + ($a->[1] - $b->[1])**2 );
}
sub closest_pair_simple {
my $ra = shift; my @arr = @$ra; my $inf = 1e600; return $inf if (scalar(@arr) < 2); my ( $a, $b, $d ) = ($arr[0], $arr[1], dist($arr[0], $arr[1])); while( scalar(@arr) > 0 ) {
my $p = pop @arr; foreach my $l (@arr) { my $t = dist($p, $l); ($a, $b, $d) = ($p, $l, $t) if $t < $d; }
} return ($a, $b, $d);
}
sub closest_pair {
my $r = shift; my @ax = sort { ${$a}[0] <=> ${$b}[0] } @$r; my @ay = sort { ${$a}[1] <=> ${$b}[1] } @$r; return closest_pair_real(\@ax, \@ay);
}
sub closest_pair_real {
my ($rx, $ry) = @_; my @xP = @$rx; my @yP = @$ry; my $N = @xP; return closest_pair_simple($rx) if ( scalar(@xP) <= 3 );
my $inf = 1e600; my $midx = ceil($N/2)-1;
my @PL = @xP[0 .. $midx]; my @PR = @xP[$midx+1 .. $N-1];
my $xm = ${$xP[$midx]}[0];
my @yR = (); my @yL = (); foreach my $p (@yP) {
if ( ${$p}[0] <= $xm ) { push @yR, $p; } else { push @yL, $p; }
}
my ($al, $bl, $dL) = closest_pair_real(\@PL, \@yR); my ($ar, $br, $dR) = closest_pair_real(\@PR, \@yL);
my ($m1, $m2, $dmin) = ($al, $bl, $dL); ($m1, $m2, $dmin) = ($ar, $br, $dR) if ( $dR < $dL );
my @yS = (); foreach my $p (@yP) {
push @yS, $p if ( abs($xm - ${$p}[0]) < $dmin );
}
if ( scalar(@yS) > 0 ) {
my ( $w1, $w2, $closest ) = ($m1, $m2, $dmin); foreach my $i (0 .. ($#yS - 1)) {
my $k = $i + 1; while ( ($k <= $#yS) && ( (${$yS[$k]}[1] - ${$yS[$i]}[1]) < $dmin) ) { my $d = dist($yS[$k], $yS[$i]); ($w1, $w2, $closest) = ($yS[$k], $yS[$i], $d) if ($d < $closest); $k++; }
} return ($w1, $w2, $closest);
} else {
return ($m1, $m2, $dmin);
}
}
my @points = (); my $N = 5000;
foreach my $i (1..$N) {
push @points, [rand(20)-10.0, rand(20)-10.0];
}
my ($a, $b, $d) = closest_pair_simple(\@points);
print "$d\n";
my ($a1, $b1, $d1) = closest_pair(\@points);
- print "$d1\n";</lang>
Time for the brute-force algorithm gave 40.63user 0.12system 0:41.06elapsed, while the divide&conqueer algorithm gave 0.37user 0.00system 0:00.38elapsed with 5000 points.
Python
<lang python>"""
Compute nearest pair of points using two algorithms First algorithm is 'brute force' comparison of every possible pair. Second, 'divide and conquer', is based on: www.cs.iupui.edu/~xkzou/teaching/CS580/Divide-and-conquer-closestPair.ppt
"""
from random import randint
infinity = float('inf')
- Note the use of complex numbers to represent 2D points making distance == abs(P1-P2)
def bruteForceClosestPair(point):
numPoints = len(point) if numPoints < 2: return infinity, (None, None) return min( ((abs(point[i] - point[j]), (point[i], point[j])) for i in range(numPoints-1) for j in range(i+1,numPoints)), key=lambda x: x[0])
def closestPair(point):
xP = sorted(point, key= lambda p: p.real) yP = sorted(point, key= lambda p: p.imag) return _closestPair(xP, yP)
def _closestPair(xP, yP):
numPoints = len(xP) if numPoints <= 3: return bruteForceClosestPair(xP) Pl = xP[:numPoints/2] Pr = xP[numPoints/2:] Yl, Yr = [], [] xDivider = Pl[-1].real for p in yP: if p.real <= xDivider: Yl.append(p) else: Yr.append(p) dl, pairl = _closestPair(Pl, Yl) dr, pairr = _closestPair(Pr, Yr) dm, pairm = (dl, pairl) if dl < dr else (dr, pairr) # Points within dm of xDivider sorted by Y coord closeY = [p for p in yP if abs(p.real - xDivider) < dm] numCloseY = len(closeY) if numCloseY > 1: # There is a proof that you only need compare a max of 7 next points closestY = min( ((abs(closeY[i] - closeY[j]), (closeY[i], closeY[j])) for i in range(numCloseY-1) for j in range(i+1,min(i+8, numCloseY))), key=lambda x: x[0]) return (dm, pairm) if dm <= closestY[0] else closestY else: return dm, pairm
def times():
Time the different functions import timeit
functions = [bruteForceClosestPair, closestPair] for f in functions: print 'Time for', f.__name__, timeit.Timer( '%s(pointList)' % f.__name__, 'from closestpair import %s, pointList' % f.__name__).timeit(number=1)
pointList = [randint(0,1000)+1j*randint(0,1000) for i in range(2000)]
if __name__ == '__main__':
pointList = [(5+9j), (9+3j), (2+0j), (8+4j), (7+4j), (9+10j), (1+9j), (8+2j), 10j, (9+6j)] print pointList print ' bruteForceClosestPair:', bruteForceClosestPair(pointList) print ' closestPair:', closestPair(pointList) for i in range(10): pointList = [randint(0,10)+1j*randint(0,10) for i in range(10)] print '\n', pointList print ' bruteForceClosestPair:', bruteForceClosestPair(pointList) print ' closestPair:', closestPair(pointList) print '\n' times() times() times()
</lang>
Sample output followed by timing comparisons
(Note how the two algorithms agree on the minimum distance, but may return a different pair of points if more than one pair of points share that minimum separation):
[(5+9j), (9+3j), (2+0j), (8+4j), (7+4j), (9+10j), (1+9j), (8+2j), 10j, (9+6j)] bruteForceClosestPair: (1.0, ((8+4j), (7+4j))) closestPair: (1.0, ((8+4j), (7+4j))) [(10+6j), (7+0j), (9+4j), (4+8j), (7+5j), (6+4j), (1+9j), (6+4j), (1+3j), (5+0j)] bruteForceClosestPair: (0.0, ((6+4j), (6+4j))) closestPair: (0.0, ((6+4j), (6+4j))) [(4+10j), (8+5j), (10+3j), (9+7j), (2+5j), (6+7j), (6+2j), (9+6j), (3+8j), (5+1j)] bruteForceClosestPair: (1.0, ((9+7j), (9+6j))) closestPair: (1.0, ((9+7j), (9+6j))) [(10+0j), (3+10j), (10+7j), (1+8j), (5+10j), (8+8j), (4+7j), (6+2j), (6+10j), (9+3j)] bruteForceClosestPair: (1.0, ((5+10j), (6+10j))) closestPair: (1.0, ((5+10j), (6+10j))) [(3+7j), (5+3j), 0j, (2+9j), (2+5j), (9+6j), (5+9j), (4+3j), (3+8j), (8+7j)] bruteForceClosestPair: (1.0, ((3+7j), (3+8j))) closestPair: (1.0, ((4+3j), (5+3j))) [(4+3j), (10+9j), (2+7j), (7+8j), 0j, (3+10j), (10+2j), (7+10j), (7+3j), (1+4j)] bruteForceClosestPair: (2.0, ((7+8j), (7+10j))) closestPair: (2.0, ((7+8j), (7+10j))) [(9+2j), (9+8j), (6+4j), (7+0j), (10+2j), (10+0j), (2+7j), (10+7j), (9+2j), (1+5j)] bruteForceClosestPair: (0.0, ((9+2j), (9+2j))) closestPair: (0.0, ((9+2j), (9+2j))) [(3+3j), (8+2j), (4+0j), (1+1j), (9+10j), (5+0j), (2+3j), 5j, (5+0j), (7+0j)] bruteForceClosestPair: (0.0, ((5+0j), (5+0j))) closestPair: (0.0, ((5+0j), (5+0j))) [(1+5j), (8+3j), (8+10j), (6+8j), (10+9j), (2+0j), (2+7j), (8+7j), (8+4j), (1+2j)] bruteForceClosestPair: (1.0, ((8+3j), (8+4j))) closestPair: (1.0, ((8+3j), (8+4j))) [(8+4j), (8+6j), (8+0j), 0j, (10+7j), (10+6j), 6j, (1+3j), (1+8j), (6+9j)] bruteForceClosestPair: (1.0, ((10+7j), (10+6j))) closestPair: (1.0, ((10+7j), (10+6j))) [(6+8j), (10+1j), 3j, (7+9j), (4+10j), (4+7j), (5+7j), (6+10j), (4+7j), (2+4j)] bruteForceClosestPair: (0.0, ((4+7j), (4+7j))) closestPair: (0.0, ((4+7j), (4+7j))) Time for bruteForceClosestPair 4.57953371169 Time for closestPair 0.122539596513 Time for bruteForceClosestPair 5.13221177552 Time for closestPair 0.124602707886 Time for bruteForceClosestPair 4.83609397284 Time for closestPair 0.119326618327 >>>
R
This is just a brute force solution for R that makes use of the apply function native to R for dealing with matrices. It expects x and y to take the form of separate vectors. <lang R> closestPair<-function(x,y)
{ distancev <- function(pointsv) { x1 <- pointsv[1] y1 <- pointsv[2] x2 <- pointsv[3] y2 <- pointsv[4] return(sqrt((x1 - x2)^2 + (y1 - y2)^2)) } pairstocompare <- t(combn(length(x),2)) pointsv <- cbind(x[pairstocompare[,1]],y[pairstocompare[,1]],x[pairstocompare[,2]],y[pairstocompare[,2]]) pairstocompare <- cbind(pairstocompare,apply(pointsv,1,distancev)) minrow <- pairstocompare[pairstocompare[,3] == min(pairstocompare[,3])] if (!is.null(nrow(minrow))) {print("More than one point at this distance!"); minrow <- minrow[1,]} cat("The closest pair is:\n\tPoint 1: ",x[minrow[1]],", ",y[minrow[1]], "\n\tPoint 2: ",x[minrow[2]],", ",y[minrow[2]], "\n\tDistance: ",minrow[3],"\n",sep="") return(as.list(c(closest=minrow[3],x1=x[minrow[1]],y1=y[minrow[1]],x2=x[minrow[2]],y2=y[minrow[2]]))) }
</lang>
Ruby
<lang ruby>Point = Struct.new(:x, :y)
def distance(p1, p2)
Math.hypot(p1.x - p2.x, p1.y - p2.y)
end
def closest_bruteforce(points)
mindist, minpts = Float::MAX, [] points.length.times do |i| (i+1).upto(points.length - 1) do |j| dist = distance(points[i], points[j]) if dist < mindist mindist = dist minpts = [points[i], points[j]] end end end [mindist, minpts]
end
def closest_recursive(points)
if points.length <= 3 return closest_bruteforce(points) end xP = points.sort_by {|p| p.x} mid = (points.length / 2.0).ceil pL = xP[0,mid] pR = xP[mid..-1] dL, pairL = closest_recursive(pL) dR, pairR = closest_recursive(pR) if dL < dR dmin, dpair = dL, pairL else dmin, dpair = dR, pairR end yP = xP.find_all {|p| (pL[-1].x - p.x).abs < dmin}.sort_by {|p| p.y} closest = Float::MAX closestPair = [] 0.upto(yP.length - 2) do |i| (i+1).upto(yP.length - 1) do |k| break if (yP[k].y - yP[i].y) >= dmin dist = distance(yP[i], yP[k]) if dist < closest closest = dist closestPair = [yP[i], yP[k]] end end end if closest < dmin [closest, closestPair] else [dmin, dpair] end
end
points = Array.new(100) {Point.new(rand, rand)}
p ans1 = closest_bruteforce(points)
p ans2 = closest_recursive(points)
fail "bogus!" if ans1[0] != ans2[0]
require 'benchmark'
points = Array.new(10000) {Point.new(rand, rand)} Benchmark.bm(12) do |x|
x.report("bruteforce") {ans1 = closest_bruteforce(points)} x.report("recursive") {ans2 = closest_recursive(points)}
end</lang>
[0.00522229060545241, [#<struct Point x=0.43887011964135, y=0.00656904813877568>, #<struct Point x=0.433711197400243, y=0.00575797448120408>]] [0.00522229060545241, [#<struct Point x=0.433711197400243, y=0.00575797448120408>, #<struct Point x=0.43887011964135, y=0.00656904813877568>]] user system total real bruteforce 133.437000 0.000000 133.437000 (134.633000) recursive 0.516000 0.000000 0.516000 ( 0.559000)
Smalltalk
These class methods return a three elements array, where the first two items are the points, while the third is the distance between them (which having the two points, can be computed; but it was easier to keep it already computed in the third position of the array).
<lang smalltalk>Object subclass: ClosestPair [
ClosestPair class >> raiseInvalid: arg [ SystemExceptions.InvalidArgument signalOn: arg reason: 'specify at least two points' ]
ClosestPair class >> bruteForce: pointsList [ |dist from to points| (pointsList size < 2) ifTrue: [ ^ FloatD infinity ]. points := pointsList asOrderedCollection. from := points at: 1. to := points at: 2. dist := from dist: to. [ points isEmpty ] whileFalse: [ |p0| p0 := points removeFirst. points do: [ :p | ((p0 dist: p) <= dist) ifTrue: [ from := p0. to := p. dist := p0 dist: p. ] ] ]. ^ { from. to. from dist: to } ]
ClosestPair class >> orderByX: points [ ^ points asSortedCollection: [:a :b| (a x) < (b x) ] ] ClosestPair class >> orderByY: points [ ^ points asSortedCollection: [:a :b| (a y) < (b y) ] ]
ClosestPair class >> splitLeft: pointsList [ ^ pointsList copyFrom: 1 to: ((pointsList size / 2) ceiling) ] ClosestPair class >> splitRight: pointsList [ |s| ^ pointsList copyFrom: (((pointsList size / 2) ceiling) + 1) to: (pointsList size). ]
ClosestPair class >> minBetween: a and: b [ (a at: 3) < (b at: 3) ifTrue: [ ^a ] ifFalse: [ ^b ] ]
ClosestPair class >> recursiveDAndC: orderedByX and: orderedByY [ |pR pL minL minR minDist middleVLine joiningStrip tDist nP yL yR| (orderedByX size <= 3) ifTrue: [ ^ self bruteForce: orderedByX ].
pR := self splitRight: orderedByX. pL := self splitLeft: orderedByX.
middleVLine := (pL last) x.
yR := OrderedCollection new. yL := OrderedCollection new.
orderedByY do: [ :e | (e x) <= middleVLine ifTrue: [ yL add: e ] ifFalse: [ yR add: e ] ].
minR := self recursiveDAndC: pR and: yR. minL := self recursiveDAndC: pL and: yL.
minDist := self minBetween: minR and: minL.
joiningStrip := orderedByY select: [ :p | ((middleVLine - (p x)) abs) < (minDist at: 3) ]. tDist := minDist. nP := joiningStrip size.
1 to: (nP - 1) do: [ :i | |k| k := i + 1. [ (k <= nP) & ( (((joiningStrip at: (k min: nP)) y) - ((joiningStrip at: i) y)) < (minDist at: 3) ) ] whileTrue: [ |d| d := (joiningStrip at: i) dist: (joiningStrip at: k). d < (tDist at: 3) ifTrue: [ tDist := { joiningStrip at: i. joiningStrip at: k. d } ]. k := k + 1. ] ].
^ tDist ]
ClosestPair class >> divideAndConquer: pointsList [ ^ self recursiveDAndC: (self orderByX: pointsList) and: (self orderByY: pointsList) ]
].</lang>
Testing
<lang smalltalk>|coll cp ran|
ran := Random seed: 1.
coll := (1 to: 10000 collect: [ :a |
Point x: ((ran next)*20.0 - 10.0) y: ((ran next)*20.0 - 10.0) ]).
cp := ClosestPair bruteForce: coll. ((cp at: 3) asScaledDecimal: 7) displayNl.
"or"
cp := ClosestPair divideAndConquer: coll. ((cp at: 3) asScaledDecimal: 7) displayNl.</lang>
The brute-force approach with 10000 points, run with the time tool, gave
224.21user 1.31system 3:46.84elapsed 99%CPU
while the recursive divide&conquer algorithm gave
2.37user 0.01system 0:02.56elapsed 93%CPU
(Of course these results must be considered relative and taken cum grano salis; time counts also the time taken to initialize the Smalltalk environment, and I've taken no special measures to avoid the system load falsifying the results)
Tcl
Each point is represented as a list of two floating-point numbers, the first being the x coordinate, and the second being the y. <lang Tcl>package require Tcl 8.5
- retrieve the x-coordinate
proc x p {lindex $p 0}
- retrieve the y-coordinate
proc y p {lindex $p 1}
proc distance {p1 p2} {
expr {hypot(([x $p1]-[x $p2]), ([y $p1]-[y $p2]))}
}
proc closest_bruteforce {points} {
set n [llength $points] set mindist Inf set minpts {} for {set i 0} {$i < $n - 1} {incr i} { for {set j [expr {$i + 1}]} {$j < $n} {incr j} { set p1 [lindex $points $i] set p2 [lindex $points $j] set dist [distance $p1 $p2] if {$dist < $mindist} { set mindist $dist set minpts [list $p1 $p2] } } } return [list $mindist $minpts]
}
proc closest_recursive {points} {
set n [llength $points] if {$n <= 3} { return [closest_bruteforce $points] } set xP [lsort -real -increasing -index 0 $points] set mid [expr {int(ceil($n/2.0))}] set PL [lrange $xP 0 [expr {$mid-1}]] set PR [lrange $xP $mid end] set procname [lindex [info level 0] 0] lassign [$procname $PL] dL pairL lassign [$procname $PR] dR pairR if {$dL < $dR} { set dmin $dL set dpair $pairL } else { set dmin $dR set dpair $pairR } set xM [x [lindex $PL end]] foreach p $xP { if {abs($xM - [x $p]) < $dmin} { lappend S $p } } set yP [lsort -real -increasing -index 1 $S] set closest Inf set nP [llength $yP] for {set i 0} {$i <= $nP-2} {incr i} { set yPi [lindex $yP $i] for {set k [expr {$i+1}]; set yPk [lindex $yP $k]} { $k < $nP-1 && ([y $yPk]-[y $yPi]) < $dmin } {incr k; set yPk [lindex $yP $k]} { set dist [distance $yPk $yPi] if {$dist < $closest} { set closest $dist set closestPair [list $yPi $yPk] } } } expr {$closest < $dmin ? [list $closest $closestPair] : [list $dmin $dpair]}
}
- testing
set N 10000 for {set i 1} {$i <= $N} {incr i} {
lappend points [list [expr {rand()*100}] [expr {rand()*100}]]
}
- instrument the number of calls to [distance] to examine the
- efficiency of the recursive solution
trace add execution distance enter comparisons proc comparisons args {incr ::comparisons}
puts [format "%-10s %9s %9s %s" method compares time closest] foreach method {bruteforce recursive} {
set ::comparisons 0 set time [time {set ::dist($method) [closest_$method $points]} 1] puts [format "%-10s %9d %9d %s" $method $::comparisons [lindex $time 0] [lindex $::dist($method) 0]]
}</lang> Example output
method compares time closest bruteforce 49995000 512967207 0.0015652738546658382 recursive 14613 488094 0.0015652738546658382
Note that the lindex
and llength
commands are both O(1).