Textonyms: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎{{header|REXX}}: added the REXX language. -- ~~~~)
Line 453: Line 453:
{{out}}
{{out}}
<pre>
<pre>
puts "There are #{Textonyms.inject(0){|n,g| n+g[1].length}} words in #{"http://rosettacode.org/wiki/Textonyms/wordlist"} which can be represented by the Textonyms mapping."
puts "There are #{Textonyms.inject(0){|n,g| n+g[1].length}} words in #{"Wordlist"} which can be represnted by the Textonyms mapping."
puts "They require #{Textonyms.length} digit combinations to represent them."
puts "They require #{Textonyms.length} digit combinations to represent them."
puts "#{Textonyms.inject(0){|n,g| g[1].length > 1 ? n+1 : n}} digit combinations correspond to a Textonym"


There are 132916 words in http://rosettacode.org/wiki/Textonyms/wordlist which can be represented by the Textonyms mapping.
There are 132916 words in Wordlist which can be represnted by the Textonyms mapping.
They require 117868 digit combinations to represent them.
They require 117868 digit combinations to represent them.
9579 digit combinations correspond to a Textonym
</pre>
</pre>
<pre>
<pre>

Revision as of 16:05, 10 February 2015

Task
Textonyms
You are encouraged to solve this task according to the task description, using any language you may know.

When entering text on a phone's digital pad it is possible that a particular combination of digits corresponds to more than one word. Such are called textonyms.

Assuming the digit keys are mapped to letters as follows:

    2 -> ABC
    3 -> DEF
    4 -> GHI
    5 -> JKL
    6 -> MNO
    7 -> PQRS
    8 -> TUV
    9 -> WXYZ  

The task is to write a program that finds textonyms in a list of words such as Textonyms/wordlist or [1].

The task should produce a report:

There are #{0} words in #{1} which can be represented by the digit key mapping.
They require #{2} digit combinations to represent them.
#{3} digit combinations represent Textonyms.

Where:

#{0} is the number of words in the list which can be represented by the digit key mapping.
#{1} is the URL of the wordlist being used.
#{2} is the number of digit combinations required to represent the words in #{0}.
#{3} is the number of #{2} which represent more than one word.

At your discretion show a couple of examples of your solution displaying Textonys. e.g.

 2748424767 -> "Briticisms", "criticisms"

Extra credit:

Use a word list and keypad mapping other than English.

Go

Uses a local file and shows it's name rather than re-fetching a URL each run and printing that URL.

Like the Phython example, the examples shown are the numbers that map to the most words. <lang go>package main

import ( "bufio" "flag" "fmt" "io" "log" "os" "strings" "unicode" )

func main() { log.SetFlags(0) log.SetPrefix("textonyms: ")

wordlist := flag.String("wordlist", "wordlist", "file containing the list of words to check") flag.Parse() if flag.NArg() != 0 { flag.Usage() os.Exit(2) }

t := NewTextonym(phoneMap) _, err := ReadFromFile(t, *wordlist) if err != nil { log.Fatal(err) } t.Report(os.Stdout, *wordlist) }

// phoneMap is the digit to letter mapping of a typical phone. var phoneMap = map[byte][]rune{ '2': []rune("ABC"), '3': []rune("DEF"), '4': []rune("GHI"), '5': []rune("JKL"), '6': []rune("MNO"), '7': []rune("PQRS"), '8': []rune("TUV"), '9': []rune("WXYZ"), }

// ReadFromFile is a generic convience function that allows the use of a // filename with an io.ReaderFrom and handles errors related to open and // closing the file. func ReadFromFile(r io.ReaderFrom, filename string) (int64, error) { f, err := os.Open(filename) if err != nil { return 0, err } n, err := r.ReadFrom(f) if cerr := f.Close(); err == nil && cerr != nil { err = cerr } return n, err }

type Textonym struct { numberMap map[string][]string // map numeric string into words letterMap map[rune]byte // map letter to digit count int // total number of words in numberMap textonyms int // number of numeric strings with >1 words }

func NewTextonym(dm map[byte][]rune) *Textonym { lm := make(map[rune]byte, 26) for d, ll := range dm { for _, l := range ll { lm[l] = d } } return &Textonym{letterMap: lm} }

func (t *Textonym) ReadFrom(r io.Reader) (n int64, err error) { t.numberMap = make(map[string][]string) buf := make([]byte, 0, 32) sc := bufio.NewScanner(r) sc.Split(bufio.ScanWords) scan: for sc.Scan() { buf = buf[:0] word := sc.Text()

// XXX we only bother approximating the number of bytes // consumed. This isn't used in the calling code and was // only included to match the io.ReaderFrom interface. n += int64(len(word)) + 1

for _, r := range word { d, ok := t.letterMap[unicode.ToUpper(r)] if !ok { //log.Printf("ignoring %q\n", word) continue scan } buf = append(buf, d) } //log.Printf("scanned %q\n", word) num := string(buf) t.numberMap[num] = append(t.numberMap[num], word) t.count++ if len(t.numberMap[num]) == 2 { t.textonyms++ } //log.Printf("%q → %v\t→ %v\n", word, num, t.numberMap[num]) } return n, sc.Err() }

func (t *Textonym) Most() (most int, subset map[string][]string) { for k, v := range t.numberMap { switch { case len(v) > most: subset = make(map[string][]string) most = len(v) fallthrough case len(v) == most: subset[k] = v } } return most, subset }

func (t *Textonym) Report(w io.Writer, name string) { // Could be fancy and use text/template package but fmt is sufficient fmt.Fprintf(w, ` There are %v words in %q which can be represented by the digit key mapping. They require %v digit combinations to represent them. %v digit combinations represent Textonyms. `, t.count, name, len(t.numberMap), t.textonyms)

n, sub := t.Most() fmt.Fprintln(w, "\nThe numbers mapping to the most words map to", n, "words each:") for k, v := range sub { fmt.Fprintln(w, "\t", k, "maps to:", strings.Join(v, ", ")) } }</lang>

Output:
There are 13085 words in "wordlist" which can be represented by the digit key mapping.
They require 11932 digit combinations to represent them.
661 digit combinations represent Textonyms.

The numbers mapping to the most words map to 15 words each:
	 27 maps to: AP, AQ, AR, AS, Ar, As, BP, BR, BS, Br, CP, CQ, CR, Cr, Cs
Output with "-wordlist unixdict.txt":
There are 24978 words in "unixdict.txt" which can be represented by the digit key mapping.
They require 22903 digit combinations to represent them.
1473 digit combinations represent Textonyms.

The numbers mapping to the most words map to 9 words each:
	 269 maps to: amy, any, bmw, bow, box, boy, cow, cox, coy
	 729 maps to: paw, pax, pay, paz, raw, ray, saw, sax, say

J

<lang J>require'regex strings web/gethttp'

strip=:dyad define

 (('(?s)',x);) rxrplc y

)

fetch=:monad define

txt=. '.*

' strip '

.*' strip gethttp y

 cutopen tolower txt-.' '

)

keys=:noun define

2 abc
3 def
4 ghi
5 jkl
6 mno
7 pqrs
8 tuv
9 wxyz

)

reporttext=:noun define There are #{0} words in #{1} which can be represented by the digit key mapping. They require #{2} digit combinations to represent them.

  1. {3} digit combinations represent Textonyms.

)

report=:dyad define

 x rplc (":&.>y),.~('#{',":,'}'"_)&.>i.#y

)

textonymrpt=:dyad define

 'digits letters'=. |:>;,&.>,&.>/&.>/"1 <;._1;._2 x
 valid=. (#~ */@e.&letters&>) fetch y NB. ignore illegals
 reps=. {&digits@(letters&i.)&.> valid NB. reps is digit seq
 reporttext report (#valid);y;(#~.reps);+/(1<#)/.~reps

)</lang>

Required example:

<lang J> keys textonymrpt 'http://rosettacode.org/wiki/Textonyms/wordlist' There are 13085 words in http://rosettacode.org/wiki/Textonyms/wordlist which can be represented by the digit key mapping. They require 11932 digit combinations to represent them. 661 digit combinations represent Textonyms.</lang>

In this example, the intermediate results in textonymrpt would look like this (just looking at the first 5 elements of the really big values:

<lang J> digits 22233344455566677778889999

  letters

abcdefghijklmnopqrstuvwxyz

  5{.valid

┌─┬──┬───┬───┬──┐ │a│aa│aaa│aam│ab│ └─┴──┴───┴───┴──┘

  5{.reps

┌─┬──┬───┬───┬──┐ │2│22│222│226│22│ └─┴──┴───┴───┴──┘</lang>

Here's another example:

<lang J> keys textonymrpt 'http://www.puzzlers.org/pub/wordlists/unixdict.txt' There are 24978 words in http://www.puzzlers.org/pub/wordlists/unixdict.txt which can be represnted by the digit key mapping. They require 22903 digit combinations to represent them. 1473 digit combinations represent Textonyms.</lang>

Perl

This uses a file named "words.txt" as the dictionary. <lang perl>sub find { my @m = qw/$ $ abc def ghi jkl mno pqrs tvu wxyz/; (my $r = shift) =~ s{(\d)}{[$m[$1]]}g; grep /^$r$/i, split ' ', `cat words.txt`; # cats don't run on windows }

print join("\n", $_, find($_)), "\n\n" for @ARGV</lang> Usage:

./textnym.pl 7353284667 7361672
7353284667
rejections
selections

736672
senora

Incidentially, it sort of supports wildcards:

./textnym.pl '5432.*'
5432.*
liechtenstein

Python

This example is incorrect. Please fix the code and remove this message.

Details: It reports 22895 Textonyms instead of 1473.

<lang python>from collections import defaultdict import urllib.request

CH2NUM = {ch: str(num) for num, chars in enumerate('abc def ghi jkl mno pqrs tuv wxyz'.split(), 2) for ch in chars} URL = 'http://www.puzzlers.org/pub/wordlists/unixdict.txt'


def getwords(url):

return urllib.request.urlopen(url).read().decode("utf-8").lower().split()

def mapnum2words(words):

   number2words = defaultdict(list)
   reject = 0
   for word in words:
       try:
           number2words[.join(CH2NUM[ch] for ch in word)].append(word)
       except KeyError:
           # Reject words with non a-z e.g. '10th'
           reject += 1
   return dict(number2words), reject

def interactiveconversions():

   global inp, ch, num
   while True:
       inp = input("\nType a number or a word to get the translation and textonyms: ").strip().lower()
       if inp:
           if all(ch in '23456789' for ch in inp):
               if inp in num2words:
                   print("  Number {0} has the following textonyms in the dictionary: {1}".format(inp, ', '.join(
                       num2words[inp])))
               else:
                   print("  Number {0} has no textonyms in the dictionary.".format(inp))
           elif all(ch in CH2NUM for ch in inp):
               num = .join(CH2NUM[ch] for ch in inp)
               print("  Word {0} is{1} in the dictionary and is number {2} with textonyms: {3}".format(
                   inp, ( if inp in wordset else "n't"), num, ', '.join(num2words[num])))
           else:
               print("  I don't understand %r" % inp)
       else:
           print("Thank you")
           break


if __name__ == '__main__':

   words = getwords(URL)
   print("Read %i words from %r" % (len(words), URL))
   wordset = set(words)
   num2words, reject = mapnum2words(words)
   morethan1word = sum(1 for w in num2words if len(w) > 1)
   maxwordpernum = max(len(values) for values in num2words.values())
   print("""

There are {0} words in {1} which can be represented by the Textonyms mapping. They require {2} digit combinations to represent them. {3} digit combinations represent Textonyms.\ """.format(len(words) - reject, URL, len(num2words), morethan1word))

   print("\nThe numbers mapping to the most words map to %i words each:" % maxwordpernum)
   maxwpn = sorted((key, val) for key, val in num2words.items() if len(val) == maxwordpernum)
   for num, wrds in maxwpn:
       print("  %s maps to: %s" % (num, ', '.join(wrds)))
   interactiveconversions()</lang>
Output:
Read 25104 words from 'http://www.puzzlers.org/pub/wordlists/unixdict.txt'

There are 24978 words in http://www.puzzlers.org/pub/wordlists/unixdict.txt which can be represented by the Textonyms mapping.
They require 22903 digit combinations to represent them.
22895 digit combinations represent Textonyms.

The numbers mapping to the most words map to 9 words each:
  269 maps to: amy, any, bmw, bow, box, boy, cow, cox, coy
  729 maps to: paw, pax, pay, paz, raw, ray, saw, sax, say

Type a number or a word to get the translation and textonyms: rosetta
  Word rosetta is in the dictionary and is number 7673882 with textonyms: rosetta

Type a number or a word to get the translation and textonyms: code
  Word code is in the dictionary and is number 2633 with textonyms: bode, code, coed

Type a number or a word to get the translation and textonyms: 2468
  Number 2468 has the following textonyms in the dictionary: ainu, chou

Type a number or a word to get the translation and textonyms: 3579
  Number 3579 has no textonyms in the dictionary.

Type a number or a word to get the translation and textonyms: 
Thank you

REXX

<lang rexx>/*REXX program counts the number of textonyms are in a file (dictionary)*/ parse arg iFID . /*get optional fileID of the file*/ if iFID== then iFID='UNIXDICT.TXT' /*filename of the word dictionary*/ @.=0 /*digit combinations placeholder.*/ !.= /*sparse array of textonyms. */ alphabet='ABCDEFGHIJKLMNOPQRSTUVWXYZ' /*supported alphabet to be used. */ digitKey= 22233344455566677778889999 /*translated alphabet to dig key.*/ digKey=0 /*number of digit combinations. */ wordCount=0 /*count of words in dictionary. */ first=0; last=0; long=0; longest=0 /*remembering first/last/longest.*/ call linein iFID, 1, 0 /*point to the first word in dict*/

  1. =0 /*number of textonyms in the file*/
                                      /* [↑]   ───in case file is open.*/
 do j=1  while lines(iFID)\==0        /*keep reading until exhausted.  */
 x=linein(iFID);   y=x;    upper x    /*get a word  and  uppercase it. */
 if \datatype(x,'U')  then iterate    /*Not all letters?  Then skip it.*/
 wordCount=wordCount+1                /*bump the word count (for file).*/
 z=translate(x, digitKey, alphabet)   /*build translated digit key word*/
 @.z=@.z+1                            /*flag the digit key word exists.*/
 !.z=!.z y                            /*build a list of same digit key.*/
 if @.z==2  then do;   #=#+1          /*bump the count of the textonyms*/
                 if first==0   then first=z  /*the first textonym found*/
                 last=z                      /* "   last     "      "  */
                 _=length(!.z)               /*length of the digit key.*/
                 if _>longest  then long=z   /*is this the longest ?   */
                 longest=max(_, longest)     /*now, shoot for this len.*/
                 end                         /* [↑] discretionary stuff*/
 if @.z\==1  then iterate             /*Does it already exist?  Skip it*/
 digKey=digKey+1                      /*bump count of digit key words. */
 end     /*j*/
                 @@=' which can be represented by the digit key mapping.'

say wordCount 'is the number of words in file "'iFID'"' @@ say 'They require' digKey "combination"s(digKey) 'to represent them.' say # 'digit combination's(#) "represent Textonyms." say if first\==0 then say ' first digit key='  !.first if last\==0 then say ' last digit key='  !.last if long\==0 then say ' longest digit key='  !.long exit /*stick a fork in it, we're done.*/ /*──────────────────────────────────S subroutine────────────────────────*/ s: if arg(1)==1 then return ; return 's' /*a simple pluralizer.*/</lang> output   when using the default input file:

24978 is the number of words in file  "UNIXDICT.TXT"  which can be represented by the digit key mapping.
They require 22903 combinations to represent them.
1473 digit combinations represent Textonyms.

    first digit key=  aaa aba abc cab
     last digit key=  woe zoe
  longest digit key=  claustrophobia claustrophobic

Ruby

<lang ruby> Textonyms = Hash.new {|n, g| n[g] = []} File.open("Textonyms.txt") do |file|

 file.each_line {|line|
   Textonyms[(n=line.chomp).gsub(/a|b|c|A|B|C/, '2').gsub(/d|e|f|D|E|F/, '3').gsub(/g|h|i|G|H|I/, '4').gsub(/p|q|r|s|P|Q|R|S/, '7')
                    .gsub(/j|k|l|J|K|L/, '5').gsub(/m|n|o|M|N|O/, '6').gsub(/t|u|v|T|U|V/, '8').gsub(/w|x|y|z|W|X|Y|Z/, '9')] += [n]
 }

end </lang>

Output:
puts "There are #{Textonyms.inject(0){|n,g| n+g[1].length}} words in #{"Wordlist"} which can be represnted by the Textonyms mapping."
puts "They require #{Textonyms.length} digit combinations to represent them."
puts "#{Textonyms.inject(0){|n,g| g[1].length > 1 ? n+1 : n}} digit combinations correspond to a Textonym"

There are 132916 words in Wordlist which can be represnted by the Textonyms mapping.
They require 117868 digit combinations to represent them.
9579 digit combinations correspond to a Textonym
puts Textonymes["7353284667"]

rejections
selections
puts Textonymes["736672"]

remora
senora

zkl

Translation of: Python

Like the Python example, this solution uses the Unix Dictionary, rather than the textonyms word list as I don't want to parse the HTML. <lang zkl>URL:="http://www.puzzlers.org/pub/wordlists/unixdict.txt"; var ZC=Import("zklCurl"); var keypad=D("a",2,"b",2,"c",2, "d",3,"e",3,"f",3, "g",4,"h",4,"i",4,

            "j",5,"k",5,"l",5,  "m",6,"n",6,"o",6,  "p",7,"q",7,"r",7,"s",7,
            "t",8,"u",8,"v",8,  "w",9,"x",9,"y",9,"z",9);

//fcn numerate(word){ word.toLower().apply(keypad.find.fp1("")) } fcn numerate(word){ word.toLower().apply(keypad.get) } //-->textonym or error println("criticisms --> ",numerate("criticisms"));

words:=ZC().get(URL); //--> T(Data,bytes of header, bytes of trailer) words=words[0].del(0,words[1]); // remove HTTP header println("Read %d words from %s".fmt(words.len(1),URL));

wcnt:=D(); foreach word in (words.walker(11)){ // iterate over stripped lines

  w2n:=try{ numerate(word) }catch(NotFoundError){ continue }; 
  wcnt.appendV(w2n,word);  // -->[textonym:list of words]

}

moreThan1Word:=wcnt.reduce(fcn(s,[(k,v)]){ s+=(v.len()>1) },0); maxWordPerNum:=(0).max(wcnt.values.apply("len"));

("There are %d words which can be represented by the Textonyms mapping.\n" "There are %d overlaps.").fmt(wcnt.len(),moreThan1Word).println();

println("Max collisions: %d words:".fmt(maxWordPerNum)); foreach k,v in (wcnt.filter('wrap([(k,v)]){ v.len()==maxWordPerNum })){

  println("  %s is the textonym of: %s".fmt(k,v.concat(", ")));

}</lang>

Output:
criticisms --> 2748424767
Read 25104 words from http://www.puzzlers.org/pub/wordlists/unixdict.txt
There are 22903 words which can be represented by the Textonyms mapping.
There are 1473 overlaps.
Max collisions: 9 words:
  729 is the textonym of: paw, pax, pay, paz, raw, ray, saw, sax, say
  269 is the textonym of: amy, any, bmw, bow, box, boy, cow, cox, coy