One of n lines in a file: Difference between revisions

From Rosetta Code
Content added Content deleted
(No longer draft as people seem able to follow the requirements OK.)
No edit summary
Line 113: Line 113:
let rec aux i r =
let rec aux i r =
if i >= n then r else
if i >= n then r else
if (Random.float 1.0) < (1.0 /. float (i + 1))
if Random.int (i + 1) = 0
then aux (succ i) i
then aux (succ i) i
else aux (succ i) r
else aux (succ i) r
Line 121: Line 121:
let test ~n ~trials =
let test ~n ~trials =
let ar = Array.make n 0 in
let ar = Array.make n 0 in
for i = 0 to pred trials do
for i = 1 to trials do
let d = one_of_n n in
let d = one_of_n n in
ar.(d) <- succ ar.(d)
ar.(d) <- succ ar.(d)
Line 159: Line 159:


=={{header|Python}}==
=={{header|Python}}==
<lang python>from random import random as rnd
<lang python>from random import randrange


def one_of_n(n):
def one_of_n(n):
Line 165: Line 165:
choice = 0
choice = 0
for i in range(1, n):
for i in range(1, n):
if rnd() < 1. / (i + 1.):
if randrange(i+1) == 0:
choice = i
choice = i
return choice
return choice

Revision as of 08:35, 11 September 2011

Task
One of n lines in a file
You are encouraged to solve this task according to the task description, using any language you may know.

A method of choosing a line randomly from a file:

  • Without reading the file more than once
  • When substantial parts of the file cannot be held in memory
  • Without knowing how many lines are in the file

Is to:

  • keep the first line of the file as a possible choice, then
  • Read the second line of the file if possible and make it the possible choice if a uniform random value between zero and one is less than 1/2.
  • Read the third line of the file if possible and make it the possible choice if a uniform random value between zero and one is less than 1/3.
...
  • Read the Nth line of the file if possible and make it the possible choice if a uniform random value between zero and one is less than 1/N
  • Return the computed possible choice when no further lines exist in the file.
Task
  1. Create a function/method/routine called one_of_n that given n, the number of actual lines in a file, follows the algotrithm above to return an integer - the line number of the line chosen from the file.
    The number returned can vary, randomly, in each run.
  2. Use one_of_n in a simulation to find what woud be the chosen line of a 10 line file simulated 1,000,000 times.
  3. Print and show how many times each of the 10 lines is chosen as a rough measure of how well the algorithm works.

Note: You may choose a smaller number of repetitions if necessary, but mention this up-front.

Ada

<lang Ada>with Ada.Text_IO, Ada.Numerics.Float_Random;

procedure One_Of_N is

  Num_Of_Lines: constant Positive := 10;
  package Rnd renames Ada.Numerics.Float_Random;
  Gen: Rnd.Generator; -- used globally
  function Choose_One_Of_N(Last_Line_Number: Positive) return Natural is
     Current_Choice: Natural := 0;
  begin
     for Line_Number in 1 .. Last_Line_Number loop
       if (Rnd.Random(Gen) * Float(Line_Number) <= 1.0) then
          Current_Choice := Line_Number;
       end if;
     end loop;
     return Current_Choice;
  end Choose_One_Of_N;
  Results: array(1 .. Num_Of_Lines) of Natural := (others => 0);
  Index: Integer range 1 .. Num_Of_Lines;

begin

  Rnd.Reset(Gen);
  for I in 1 .. 1_000_000 loop    -- compute results
     Index := Choose_One_Of_N(Num_Of_Lines);
     Results(Index) := Results(Index) + 1;
  end loop;
  for R in Results'Range loop    -- output results
     Ada.Text_IO.Put(Integer'Image(Results(R)));
  end loop;

end One_Of_N;</lang>

Example output:

 100104 100075 99761 99851 100457 100315 100101 99557 99678 100101

C

<lang c>#include <stdio.h>

  1. include <stdlib.h>

inline int irand(int n) { int r, randmax = RAND_MAX/n * n; while ((r = rand()) >= randmax); return r / (randmax / n); }

inline int one_of_n(int n) { int i, r = 0; for (i = 1; i < n; i++) if (!irand(i + 1)) r = i; return r; }

int main(void) { int i, r[10] = {0};

for (i = 0; i < 1000000; i++, r[one_of_n(10)]++); for (i = 0; i < 10; i++) printf("%d%c", r[i], i == 9 ? '\n':' ');

return 0; }</lang>output<lang>100561 99814 99816 99721 99244 99772 100790 100072 99997 100213</lang>

Icon and Unicon

Translation of: Python

<lang Icon>procedure main() # one of n

  one_of_n_test(10,1000000)

end

procedure one_of_n(n)

  every i := 1 to n do 
     choice := (?0  < 1. / i, i)
  return \choice | fail

end

procedure one_of_n_test(n,trials)

  bins := table(0)
  every i := 1 to trials do
        bins[one_of_n(n)] +:= 1
  every writes(bins[i := 1 to n]," ")
  return bins

end</lang>

Sample output:

99470 99806 99757 99921 100213 100001 99778 100385 100081 100588

OCaml

<lang ocaml>let one_of_n n =

 let rec aux i r =
   if i >= n then r else
     if Random.int (i + 1) = 0
     then aux (succ i) i
     else aux (succ i) r
 in
 aux 1 0

let test ~n ~trials =

 let ar = Array.make n 0 in
 for i = 1 to trials do
   let d = one_of_n n in
   ar.(d) <- succ ar.(d)
 done;
 Array.iter (Printf.printf " %d") ar;
 print_newline ()

let () =

 Random.self_init ();
 test ~n:10 ~trials:1_000_000</lang>

Executing:

$ ocamlopt -o one.opt one.ml
$ ./one.opt 
 100620 99719 99928 99864 99760 100151 99553 100529 99800 100076

Perl 6

Translation of: Python

<lang perl6>sub one_of_n($n) {

   my $choice;
   $choice = $_ if rand * $_ < 1 for 1 .. $n;
   $choice - 1;

}

sub one_of_n_test($n = 10, $trials = 1000000) {

   my @bins;
   @bins[one_of_n($n)]++ for ^$trials;
   @bins;

}

say one_of_n_test();</lang> Output:

100288 100047 99660 99773 100256 99633 100161 100483 99789 99910

Python

<lang python>from random import randrange

def one_of_n(n):

   # Zero based line numbers
   choice = 0
   for i in range(1, n):
       if randrange(i+1) == 0:
           choice = i
   return choice
           

def one_of_n_test(n=10, trials=1000000):

   bins = [0] * n
   if n:
       for i in range(trials):
           bins[one_of_n(n)] += 1
   return bins

print(one_of_n_test())</lang>

Sample output
[99833, 100303, 99902, 100132, 99608, 100117, 99531, 100017, 99795, 100762]

Tcl

<lang tcl>package require Tcl 8.5 proc 1ofN {n} {

   for {set line 1} {$line <= $n} {incr line} {

if {rand() < 1.0/[incr fraction]} { set result $line }

   }
   return $result

}

for {set i 0} {$i < 1000000} {incr i} {

   incr count([1ofN 10])

} parray count; # Alphabetic order, but convenient</lang> Sample output:

count(1)  = 99862
count(10) = 100517
count(2)  = 100545
count(3)  = 100339
count(4)  = 99636
count(5)  = 99920
count(6)  = 99263
count(7)  = 100283
count(8)  = 99871
count(9)  = 99764