Textonyms: Difference between revisions
m (→{{header|REXX}}: added the most numerous digit key to the program and output.) |
(→{{header|Perl 6}}: add entry) |
||
Line 297: | Line 297: | ||
</pre> |
</pre> |
||
=={{header|Perl 6}}== |
|||
<lang perl6>my $src = 'unixdict.txt'; |
|||
my @words = slurp($src).lc.lines.grep(/^<[a..z]>+$/).unique; |
|||
my @dials = @words.classify: { |
|||
.trans('abcdefghijklmnopqrstuvwxyz' |
|||
=> '22233344455566677778889999'); |
|||
} |
|||
my @textonyms = @dials.grep(*.value > 1); |
|||
say qq:to 'END'; |
|||
There are {+@words} words in $src which can be represented by the digit key mapping. |
|||
They require {+@dials} digit combinations to represent them. |
|||
{+@textonyms} digit combinations represent Textonyms. |
|||
END |
|||
say "Top 5 in ambiguity:"; |
|||
say " ",$_ for @textonyms.sort(-*.value)[^5]; |
|||
say "\nTop 5 in length:"; |
|||
say " ",$_ for @textonyms.sort(-*.key.chars)[^5];</lang> |
|||
{{out}} |
|||
<pre>There are 24978 words in unixdict.txt which can be represented by the digit key mapping. |
|||
They require 22903 digit combinations to represent them. |
|||
1473 digit combinations represent Textonyms. |
|||
Top 5 in ambiguity: |
|||
269 => amy any bmw bow box boy cow cox coy |
|||
729 => paw pax pay paz raw ray saw sax say |
|||
2273 => acre bard bare base cape card care case |
|||
726 => pam pan ram ran sam san sao scm |
|||
426 => gam gao ham han ian ibm ibn |
|||
Top 5 in length: |
|||
25287876746242 => claustrophobia claustrophobic |
|||
7244967473642 => schizophrenia schizophrenic |
|||
666628676342 => onomatopoeia onomatopoeic |
|||
49376746242 => hydrophobia hydrophobic |
|||
2668368466 => contention convention</pre> |
|||
=={{header|Python}}== |
=={{header|Python}}== |
Revision as of 02:33, 11 February 2015
![Task](http://static.miraheze.org/rosettacodewiki/thumb/b/ba/Rcode-button-task-crushed.png/64px-Rcode-button-task-crushed.png)
You are encouraged to solve this task according to the task description, using any language you may know.
When entering text on a phone's digital pad it is possible that a particular combination of digits corresponds to more than one word. Such are called textonyms.
Assuming the digit keys are mapped to letters as follows:
2 -> ABC 3 -> DEF 4 -> GHI 5 -> JKL 6 -> MNO 7 -> PQRS 8 -> TUV 9 -> WXYZ
The task is to write a program that finds textonyms in a list of words such as Textonyms/wordlist or [1].
The task should produce a report:
There are #{0} words in #{1} which can be represented by the digit key mapping. They require #{2} digit combinations to represent them. #{3} digit combinations represent Textonyms.
Where:
#{0} is the number of words in the list which can be represented by the digit key mapping. #{1} is the URL of the wordlist being used. #{2} is the number of digit combinations required to represent the words in #{0}. #{3} is the number of #{2} which represent more than one word.
At your discretion show a couple of examples of your solution displaying Textonys. e.g.
2748424767 -> "Briticisms", "criticisms"
Extra credit:
Use a word list and keypad mapping other than English.
Go
Uses a local file and shows its name rather than re-fetching a URL each run and printing that URL.
Like the Phython example, the examples shown are the numbers that map to the most words. <lang go>package main
import ( "bufio" "flag" "fmt" "io" "log" "os" "strings" "unicode" )
func main() { log.SetFlags(0) log.SetPrefix("textonyms: ")
wordlist := flag.String("wordlist", "wordlist", "file containing the list of words to check") flag.Parse() if flag.NArg() != 0 { flag.Usage() os.Exit(2) }
t := NewTextonym(phoneMap) _, err := ReadFromFile(t, *wordlist) if err != nil { log.Fatal(err) } t.Report(os.Stdout, *wordlist) }
// phoneMap is the digit to letter mapping of a typical phone. var phoneMap = map[byte][]rune{ '2': []rune("ABC"), '3': []rune("DEF"), '4': []rune("GHI"), '5': []rune("JKL"), '6': []rune("MNO"), '7': []rune("PQRS"), '8': []rune("TUV"), '9': []rune("WXYZ"), }
// ReadFromFile is a generic convience function that allows the use of a // filename with an io.ReaderFrom and handles errors related to open and // closing the file. func ReadFromFile(r io.ReaderFrom, filename string) (int64, error) { f, err := os.Open(filename) if err != nil { return 0, err } n, err := r.ReadFrom(f) if cerr := f.Close(); err == nil && cerr != nil { err = cerr } return n, err }
type Textonym struct { numberMap map[string][]string // map numeric string into words letterMap map[rune]byte // map letter to digit count int // total number of words in numberMap textonyms int // number of numeric strings with >1 words }
func NewTextonym(dm map[byte][]rune) *Textonym { lm := make(map[rune]byte, 26) for d, ll := range dm { for _, l := range ll { lm[l] = d } } return &Textonym{letterMap: lm} }
func (t *Textonym) ReadFrom(r io.Reader) (n int64, err error) { t.numberMap = make(map[string][]string) buf := make([]byte, 0, 32) sc := bufio.NewScanner(r) sc.Split(bufio.ScanWords) scan: for sc.Scan() { buf = buf[:0] word := sc.Text()
// XXX we only bother approximating the number of bytes // consumed. This isn't used in the calling code and was // only included to match the io.ReaderFrom interface. n += int64(len(word)) + 1
for _, r := range word { d, ok := t.letterMap[unicode.ToUpper(r)] if !ok { //log.Printf("ignoring %q\n", word) continue scan } buf = append(buf, d) } //log.Printf("scanned %q\n", word) num := string(buf) t.numberMap[num] = append(t.numberMap[num], word) t.count++ if len(t.numberMap[num]) == 2 { t.textonyms++ } //log.Printf("%q → %v\t→ %v\n", word, num, t.numberMap[num]) } return n, sc.Err() }
func (t *Textonym) Most() (most int, subset map[string][]string) { for k, v := range t.numberMap { switch { case len(v) > most: subset = make(map[string][]string) most = len(v) fallthrough case len(v) == most: subset[k] = v } } return most, subset }
func (t *Textonym) Report(w io.Writer, name string) { // Could be fancy and use text/template package but fmt is sufficient fmt.Fprintf(w, ` There are %v words in %q which can be represented by the digit key mapping. They require %v digit combinations to represent them. %v digit combinations represent Textonyms. `, t.count, name, len(t.numberMap), t.textonyms)
n, sub := t.Most() fmt.Fprintln(w, "\nThe numbers mapping to the most words map to", n, "words each:") for k, v := range sub { fmt.Fprintln(w, "\t", k, "maps to:", strings.Join(v, ", ")) } }</lang>
- Output:
There are 13085 words in "wordlist" which can be represented by the digit key mapping. They require 11932 digit combinations to represent them. 661 digit combinations represent Textonyms. The numbers mapping to the most words map to 15 words each: 27 maps to: AP, AQ, AR, AS, Ar, As, BP, BR, BS, Br, CP, CQ, CR, Cr, Cs
- Output with "-wordlist unixdict.txt":
There are 24978 words in "unixdict.txt" which can be represented by the digit key mapping. They require 22903 digit combinations to represent them. 1473 digit combinations represent Textonyms. The numbers mapping to the most words map to 9 words each: 269 maps to: amy, any, bmw, bow, box, boy, cow, cox, coy 729 maps to: paw, pax, pay, paz, raw, ray, saw, sax, say
J
<lang J>require'regex strings web/gethttp'
strip=:dyad define
(('(?s)',x);) rxrplc y
)
fetch=:monad define
txt=. '.*
' strip '
.*' strip gethttp y
cutopen tolower txt-.' '
)
keys=:noun define
2 abc 3 def 4 ghi 5 jkl 6 mno 7 pqrs 8 tuv 9 wxyz
)
reporttext=:noun define There are #{0} words in #{1} which can be represented by the digit key mapping. They require #{2} digit combinations to represent them.
- {3} digit combinations represent Textonyms.
)
report=:dyad define
x rplc (":&.>y),.~('#{',":,'}'"_)&.>i.#y
)
textonymrpt=:dyad define
'digits letters'=. |:>;,&.>,&.>/&.>/"1 <;._1;._2 x valid=. (#~ */@e.&letters&>) fetch y NB. ignore illegals reps=. {&digits@(letters&i.)&.> valid NB. reps is digit seq reporttext report (#valid);y;(#~.reps);+/(1<#)/.~reps
)</lang>
Required example:
<lang J> keys textonymrpt 'http://rosettacode.org/wiki/Textonyms/wordlist' There are 13085 words in http://rosettacode.org/wiki/Textonyms/wordlist which can be represented by the digit key mapping. They require 11932 digit combinations to represent them. 661 digit combinations represent Textonyms.</lang>
In this example, the intermediate results in textonymrpt would look like this (just looking at the first 5 elements of the really big values:
<lang J> digits 22233344455566677778889999
letters
abcdefghijklmnopqrstuvwxyz
5{.valid
┌─┬──┬───┬───┬──┐ │a│aa│aaa│aam│ab│ └─┴──┴───┴───┴──┘
5{.reps
┌─┬──┬───┬───┬──┐ │2│22│222│226│22│ └─┴──┴───┴───┴──┘</lang>
Here's another example:
<lang J> keys textonymrpt 'http://www.puzzlers.org/pub/wordlists/unixdict.txt' There are 24978 words in http://www.puzzlers.org/pub/wordlists/unixdict.txt which can be represnted by the digit key mapping. They require 22903 digit combinations to represent them. 1473 digit combinations represent Textonyms.</lang>
Perl
This uses a file named "words.txt" as the dictionary. <lang perl>sub find { my @m = qw/$ $ abc def ghi jkl mno pqrs tvu wxyz/; (my $r = shift) =~ s{(\d)}{[$m[$1]]}g; grep /^$r$/i, split ' ', `cat words.txt`; # cats don't run on windows }
print join("\n", $_, find($_)), "\n\n" for @ARGV</lang> Usage:
./textnym.pl 7353284667 7361672 7353284667 rejections selections 736672 senora
Incidentially, it sort of supports wildcards:
./textnym.pl '5432.*' 5432.* liechtenstein
Perl 6
<lang perl6>my $src = 'unixdict.txt';
my @words = slurp($src).lc.lines.grep(/^<[a..z]>+$/).unique;
my @dials = @words.classify: {
.trans('abcdefghijklmnopqrstuvwxyz' => '22233344455566677778889999');
}
my @textonyms = @dials.grep(*.value > 1);
say qq:to 'END';
There are {+@words} words in $src which can be represented by the digit key mapping. They require {+@dials} digit combinations to represent them. {+@textonyms} digit combinations represent Textonyms. END
say "Top 5 in ambiguity:"; say " ",$_ for @textonyms.sort(-*.value)[^5];
say "\nTop 5 in length:"; say " ",$_ for @textonyms.sort(-*.key.chars)[^5];</lang>
- Output:
There are 24978 words in unixdict.txt which can be represented by the digit key mapping. They require 22903 digit combinations to represent them. 1473 digit combinations represent Textonyms. Top 5 in ambiguity: 269 => amy any bmw bow box boy cow cox coy 729 => paw pax pay paz raw ray saw sax say 2273 => acre bard bare base cape card care case 726 => pam pan ram ran sam san sao scm 426 => gam gao ham han ian ibm ibn Top 5 in length: 25287876746242 => claustrophobia claustrophobic 7244967473642 => schizophrenia schizophrenic 666628676342 => onomatopoeia onomatopoeic 49376746242 => hydrophobia hydrophobic 2668368466 => contention convention
Python
<lang python>from collections import defaultdict import urllib.request
CH2NUM = {ch: str(num) for num, chars in enumerate('abc def ghi jkl mno pqrs tuv wxyz'.split(), 2) for ch in chars} URL = 'http://www.puzzlers.org/pub/wordlists/unixdict.txt'
def getwords(url):
return urllib.request.urlopen(url).read().decode("utf-8").lower().split()
def mapnum2words(words):
number2words = defaultdict(list) reject = 0 for word in words: try: number2words[.join(CH2NUM[ch] for ch in word)].append(word) except KeyError: # Reject words with non a-z e.g. '10th' reject += 1 return dict(number2words), reject
def interactiveconversions():
global inp, ch, num while True: inp = input("\nType a number or a word to get the translation and textonyms: ").strip().lower() if inp: if all(ch in '23456789' for ch in inp): if inp in num2words: print(" Number {0} has the following textonyms in the dictionary: {1}".format(inp, ', '.join( num2words[inp]))) else: print(" Number {0} has no textonyms in the dictionary.".format(inp)) elif all(ch in CH2NUM for ch in inp): num = .join(CH2NUM[ch] for ch in inp) print(" Word {0} is{1} in the dictionary and is number {2} with textonyms: {3}".format( inp, ( if inp in wordset else "n't"), num, ', '.join(num2words[num]))) else: print(" I don't understand %r" % inp) else: print("Thank you") break
if __name__ == '__main__':
words = getwords(URL) print("Read %i words from %r" % (len(words), URL)) wordset = set(words) num2words, reject = mapnum2words(words) morethan1word = sum(1 for w in num2words if len(num2words[w]) > 1) maxwordpernum = max(len(values) for values in num2words.values()) print("""
There are {0} words in {1} which can be represented by the Textonyms mapping. They require {2} digit combinations to represent them. {3} digit combinations represent Textonyms.\ """.format(len(words) - reject, URL, len(num2words), morethan1word))
print("\nThe numbers mapping to the most words map to %i words each:" % maxwordpernum) maxwpn = sorted((key, val) for key, val in num2words.items() if len(val) == maxwordpernum) for num, wrds in maxwpn: print(" %s maps to: %s" % (num, ', '.join(wrds)))
interactiveconversions()</lang>
- Output:
Read 25104 words from 'http://www.puzzlers.org/pub/wordlists/unixdict.txt' There are 24978 words in http://www.puzzlers.org/pub/wordlists/unixdict.txt which can be represented by the Textonyms mapping. They require 22903 digit combinations to represent them. 1473 digit combinations represent Textonyms. The numbers mapping to the most words map to 9 words each: 269 maps to: amy, any, bmw, bow, box, boy, cow, cox, coy 729 maps to: paw, pax, pay, paz, raw, ray, saw, sax, say Type a number or a word to get the translation and textonyms: rosetta Word rosetta is in the dictionary and is number 7673882 with textonyms: rosetta Type a number or a word to get the translation and textonyms: code Word code is in the dictionary and is number 2633 with textonyms: bode, code, coed Type a number or a word to get the translation and textonyms: 2468 Number 2468 has the following textonyms in the dictionary: ainu, chou Type a number or a word to get the translation and textonyms: 3579 Number 3579 has no textonyms in the dictionary. Type a number or a word to get the translation and textonyms: Thank you
REXX
This REXX version checks for and displays the count of the number of (illegal) words not representable by the key digits.
It also detects and displays the count of duplicate words.
<lang rexx>/*REXX program counts the number of textonyms are in a file (dictionary)*/
parse arg iFID . /*get optional fileID of the file*/
if iFID== then iFID='UNIXDICT.TXT' /*filename of the word dictionary*/
@.=0 /*digit combinations placeholder.*/
!.=; $.= /*sparse array of textonyms;words*/
alphabet='ABCDEFGHIJKLMNOPQRSTUVWXYZ' /*supported alphabet to be used. */
digitKey= 22233344455566677778889999 /*translated alphabet to dig key.*/
digKey=0; wordCount=0 /*# digit combinations; wordCount*/
ills=0; dups=0; longest=0; mostus=0 /*illegals; duplicate words; lit.*/
first=0; last=0; long=0; most=0 /*for: first, last, longest, ··· */
call linein iFID, 1, 0 /*point to the first word in dict*/
- =0 /*number of textonyms in the file*/
/* [↑] ───in case file is open.*/ do j=1 while lines(iFID)\==0 /*keep reading until exhausted. */ x=linein(iFID); y=x; upper x /*get a word and uppercase it. */ if \datatype(x,'U') then do; ills=ills+1; iterate; end /*illegal? */ if $.x\== then do; dups=dups+1; iterate; end /*duplicate?*/ else $.x=. /*indicate it's a righteous word.*/ wordCount=wordCount+1 /*bump the word count (for file).*/ z=translate(x, digitKey, alphabet) /*build translated digit key word*/ @.z=@.z+1 /*flag the digit key word exists.*/ !.z=!.z y; _=words(!.z) /*build a list of same digit key.*/ if _>most then do; mostus=z; most=_; end /*remember mostus digKeys.*/ if @.z==2 then do; #=#+1 /*bump the count of the textonyms*/ if first==0 then first=z /*the first textonym found*/ last=z /* " last " " */ _=length(!.z) /*length of the digit key.*/ if _>longest then long=z /*is this the longest ? */ longest=max(_, longest) /*now, shoot for this len.*/ end /* [↑] discretionary stuff*/ if @.z\==1 then iterate /*Does it already exist? Skip it*/ digKey=digKey+1 /*bump count of digit key words. */ end /*j*/ @@=' which can be represented by the digit key mapping.'
say wordCount 'is the number of words in file "'iFID'"' @@ if ills\==0 then say ills 'word's(ills) "contained illegal characters." if dups\==0 then say dups "duplicate word"s(dups) 'detected.' say 'They require' digKey "combination"s(digKey) 'to represent them.' say # 'digit combination's(#) "represent Textonyms." say if first\==0 then say ' first digit key=' !.first if last\==0 then say ' last digit key=' !.last if long\==0 then say ' longest digit key=' !.long if most\==0 then say ' numerous digit key=' !.mostus ' ('most "words)" exit /*stick a fork in it, we're done.*/ /*──────────────────────────────────S subroutine────────────────────────*/ s: if arg(1)==1 then return ; return 's' /*a simple pluralizer.*/</lang> output when using the default input file:
24978 is the number of words in file "UNIXDICT.TXT" which can be represented by the digit key mapping. 126 words contained illegal characters. They require 22903 combinations to represent them. 1473 digit combinations represent Textonyms. first digit key= aaa aba abc cab last digit key= woe zoe longest digit key= claustrophobia claustrophobic numerous digit key= amy any bmw bow box boy cow cox coy (9 words)
output when using the input file: textonyms.txt
12990 is the number of words in file "textonyms.txt" which can be represented by the digit key mapping. 95 duplicate words were detected. They require 11932 combinations to represent them. 650 digit combinations represent Textonyms. first digit key= AA AB AC BA BB BC CA CB last digit key= Phillip Phillis longest digit key= Anglophobia Anglophobic numerous digit key= AP AQ AR AS BP BR BS CP CQ CR Cs (11 words)
Ruby
<lang ruby> Textonyms = Hash.new {|n, g| n[g] = []} File.open("Textonyms.txt") do |file|
file.each_line {|line| Textonyms[(n=line.chomp).gsub(/a|b|c|A|B|C/, '2').gsub(/d|e|f|D|E|F/, '3').gsub(/g|h|i|G|H|I/, '4').gsub(/p|q|r|s|P|Q|R|S/, '7') .gsub(/j|k|l|J|K|L/, '5').gsub(/m|n|o|M|N|O/, '6').gsub(/t|u|v|T|U|V/, '8').gsub(/w|x|y|z|W|X|Y|Z/, '9')] += [n] }
end </lang>
- Output:
puts "There are #{Textonyms.inject(0){|n,g| n+g[1].length}} words in #{"Wordlist"} which can be represnted by the Textonyms mapping." puts "They require #{Textonyms.length} digit combinations to represent them." puts "#{Textonyms.inject(0){|n,g| g[1].length > 1 ? n+1 : n}} digit combinations correspond to a Textonym" There are 132916 words in Wordlist which can be represnted by the Textonyms mapping. They require 117868 digit combinations to represent them. 9579 digit combinations correspond to a Textonym
puts Textonymes["7353284667"] rejections selections
puts Textonymes["736672"] remora senora
zkl
Like the Python example, this solution uses the Unix Dictionary, rather than the textonyms word list as I don't want to parse the HTML. <lang zkl>URL:="http://www.puzzlers.org/pub/wordlists/unixdict.txt"; var ZC=Import("zklCurl"); var keypad=D("a",2,"b",2,"c",2, "d",3,"e",3,"f",3, "g",4,"h",4,"i",4,
"j",5,"k",5,"l",5, "m",6,"n",6,"o",6, "p",7,"q",7,"r",7,"s",7, "t",8,"u",8,"v",8, "w",9,"x",9,"y",9,"z",9);
//fcn numerate(word){ word.toLower().apply(keypad.find.fp1("")) } fcn numerate(word){ word.toLower().apply(keypad.get) } //-->textonym or error println("criticisms --> ",numerate("criticisms"));
words:=ZC().get(URL); //--> T(Data,bytes of header, bytes of trailer) words=words[0].del(0,words[1]); // remove HTTP header println("Read %d words from %s".fmt(words.len(1),URL));
wcnt:=D(); foreach word in (words.walker(11)){ // iterate over stripped lines
w2n:=try{ numerate(word) }catch(NotFoundError){ continue }; wcnt.appendV(w2n,word); // -->[textonym:list of words]
}
moreThan1Word:=wcnt.reduce(fcn(s,[(k,v)]){ s+=(v.len()>1) },0); maxWordPerNum:=(0).max(wcnt.values.apply("len"));
("There are %d words which can be represented by the Textonyms mapping.\n" "There are %d overlaps.").fmt(wcnt.len(),moreThan1Word).println();
println("Max collisions: %d words:".fmt(maxWordPerNum)); foreach k,v in (wcnt.filter('wrap([(k,v)]){ v.len()==maxWordPerNum })){
println(" %s is the textonym of: %s".fmt(k,v.concat(", ")));
}</lang>
- Output:
criticisms --> 2748424767 Read 25104 words from http://www.puzzlers.org/pub/wordlists/unixdict.txt There are 22903 words which can be represented by the Textonyms mapping. There are 1473 overlaps. Max collisions: 9 words: 729 is the textonym of: paw, pax, pay, paz, raw, ray, saw, sax, say 269 is the textonym of: amy, any, bmw, bow, box, boy, cow, cox, coy