Count occurrences of a substring
![Task](http://static.miraheze.org/rosettacodewiki/thumb/b/ba/Rcode-button-task-crushed.png/64px-Rcode-button-task-crushed.png)
You are encouraged to solve this task according to the task description, using any language you may know.
The task is to either create a function, or show a built-in function, to count the number of non-overlapping occurrences of a substring inside a string. The function should take two arguments: the first argument being the string to search and the second a substring to be search for. It should return an integer count.
<lang pseudocode>print countSubstring("the three truths","th") 3
// do not count substrings that overlap with previously-counted substrings: print countSubstring("ababababab","abab") 2</lang>
The matching should yield the highest number of non-overlapping matches. In general, this essentially means matching from left-to-right or right-to-left (see proof on talk page).
Ada
<lang Ada>with Ada.Strings.Fixed, Ada.Text_IO;
procedure Count_Substrings is
function Substrings(Main: String; Sub: String) return Natural is Idx: Natural := Ada.Strings.Fixed.Index(Source => Main, Pattern => Sub); begin if Idx = 0 then return 0; else return 1 + Substrings(Main(Idx+Sub'Length .. Main'Last), Sub); end if; end Substrings;
begin
Ada.Text_IO.Put(Integer'Image(Substrings("the three truths", "th"))); Ada.Text_IO.Put(Integer'Image(Substrings("ababababab", "abab")));
end Count_Substrings; </lang>
Output:
3 2
ALGOL 68
Algol68 has no build in function to do this task, hence the next to create a count string in string routine. <lang algol68>#!/usr/local/bin/a68g --script #
PROC count string in string = (STRING needle, haystack)INT: (
INT start:=LWB haystack, next, out:=0; FOR count WHILE string in string(needle, next, haystack[start:]) DO start+:=next+UPB needle-LWB needle; out:=count OD; out
);
printf(($d" "$,
count string in string("th", "the three truths"), # expect 3 # count string in string("abab", "ababababab"), # expect 2 # count string in string("a*b", "abaabba*bbaba*bbab"), # expect 2 # $l$
))</lang> Output:
3 2 2
AutoHotkey
While it is simple enough to parse the string, AutoHotkey has a rather unconventional method which outperforms this. StringReplace sets the number of replaced strings to ErrorLevel. <lang AutoHotkey>MsgBox % countSubstring("the three truths","th") ; 3 MsgBox % countSubstring("ababababab","abab") ; 2
CountSubstring(fullstring, substring){
StringReplace, junk, fullstring, %substring%, , UseErrorLevel return errorlevel
}</lang>
AWK
<lang AWK>#!/usr/local/bin/awk -f
function countsubstring (str,pat) { n=0; while (match(str,pat)) { n++;
str = substr(str,RSTART+RLENGTH);
} return n; }
BEGIN { print countsubstring("the three truths","th"); print countsubstring("ababababab","abab"); print countsubstring(ARGV[1],ARGV[2]); }</lang>
Output:
$ ./countsubstring.awk aaaabacad aa 3 2 2
BASIC
In FreeBASIC, this needs to be compiled with -lang qb
or -lang fblite
.
<lang qbasic>DECLARE FUNCTION countSubstring& (where AS STRING, what AS STRING)
PRINT "the three truths, th:", countSubstring&("the three truths", "th") PRINT "ababababab, abab:", countSubstring&("ababababab", "abab")
FUNCTION countSubstring& (where AS STRING, what AS STRING)
DIM c AS LONG, s AS LONG s = 1 - LEN(what) DO s = INSTR(s + LEN(what), where, what) IF 0 = s THEN EXIT DO c = c + 1 LOOP countSubstring = c
END FUNCTION</lang>
Output:
the three truths, th: 3 ababababab, abab: 2
See also: Liberty BASIC, PowerBASIC, PureBasic.
BBC BASIC
<lang bbcbasic> tst$ = "the three truths"
sub$ = "th" PRINT ; FNcountSubstring(tst$, sub$) " """ sub$ """ in """ tst$ """" tst$ = "ababababab" sub$ = "abab" PRINT ; FNcountSubstring(tst$, sub$) " """ sub$ """ in """ tst$ """" END DEF FNcountSubstring(A$, B$) LOCAL I%, N% I% = 1 : N% = 0 REPEAT I% = INSTR(A$, B$, I%) IF I% THEN N% += 1 : I% += LEN(B$) UNTIL I% = 0 = N%
</lang> Output:
3 "th" in "the three truths" 2 "abab" in "ababababab"
Bracmat
<lang bracmat> ( count-substring
= n S s p . 0:?n:?p & !arg:(?S.?s) & @( !S : ? ( [!p ? !s [?p ? & !n+1:?n & ~ ) ) | !n )
& out$(count-substring$("the three truths".th)) & out$(count-substring$(ababababab.abab)) & ;</lang> Output:
3 2
C
<lang C>#include <stdio.h>
- include <string.h>
int match(const char *s, const char *p, int overlap) {
int c = 0, l = strlen(p);
while (*s != '\0') { if (strncmp(s++, p, l)) continue; if (!overlap) s += l - 1; c++; } return c;
}
int main() {
printf("%d\n", match("the three truths", "th", 0)); printf("overlap:%d\n", match("abababababa", "aba", 1)); printf("not: %d\n", match("abababababa", "aba", 0)); return 0;
}</lang>
Alternate version: <lang c>#include <stdio.h>
- include <string.h>
// returns count of non-overlapping occurrences of 'sub' in 'str' int countSubstring(const char *str, const char *sub) {
int length = strlen(sub); if (length == 0) return 0; int count = 0; for (str = strstr(str, sub); str; str = strstr(str + length, sub)) ++count; return count;
}
int main() {
printf("%d\n", countSubstring("the three truths", "th")); printf("%d\n", countSubstring("ababababab", "abab")); printf("%d\n", countSubstring("abaabba*bbaba*bbab", "a*b"));
return 0;
}</lang> Output:
3 2 2
C++
<lang cpp>#include <iostream>
- include <string>
// returns count of non-overlapping occurrences of 'sub' in 'str' int countSubstring(const std::string& str, const std::string& sub) {
if (sub.length() == 0) return 0; int count = 0; for (size_t offset = str.find(sub); offset != std::string::npos;
offset = str.find(sub, offset + sub.length()))
{ ++count; } return count;
}
int main() {
std::cout << countSubstring("the three truths", "th") << '\n'; std::cout << countSubstring("ababababab", "abab") << '\n'; std::cout << countSubstring("abaabba*bbaba*bbab", "a*b") << '\n';
return 0;
}</lang>
- Output:
3 2 2
C#
<lang c sharp> using System;
class SubStringTestClass {
public static int CountSubStrings(string testString, string testSubstring) ( int location = 0; int count = 0; while (location < testString.Length) { int found = testString.IndexOf(testSubstring, location); if (found == -1) { break; } else { count++; location = location + testSubstring.Length; } } return count; }
}</lang>
CoffeeScript
<lang coffeescript> countSubstring = (str, substr) ->
n = 0 i = 0 while (pos = str.indexOf(substr, i)) != -1 n += 1 i = pos + substr.length n
console.log countSubstring "the three truths", "th" console.log countSubstring "ababababab", "abab" </lang>
Common Lisp
<lang lisp>(defun count-sub (str pat)
(loop with z = 0 with s = 0 while s do
(when (setf s (search pat str :start2 s)) (incf z) (incf s (length pat))) finally (return z)))
(count-sub "ababa" "ab") ; 2 (count-sub "ababa" "aba") ; 1</lang>
D
<lang d>import std.stdio, std.algorithm;
void main() {
writeln("the three truths".count("th")); writeln("ababababab".count("abab"));
}</lang> Output:
3 2
Delphi
<lang Delphi>program OccurrencesOfASubstring;
{$APPTYPE CONSOLE}
uses StrUtils;
function CountSubstring(const aString, aSubstring: string): Integer; var
lPosition: Integer;
begin
Result := 0; lPosition := PosEx(aSubstring, aString); while lPosition <> 0 do begin Inc(Result); lPosition := PosEx(aSubstring, aString, lPosition + Length(aSubstring)); end;
end;
begin
Writeln(CountSubstring('the three truths', 'th')); Writeln(CountSubstring('ababababab', 'abab'));
end.</lang>
Eiffel
<lang eiffel> class APPLICATION inherit ARGUMENTS create make feature {NONE} -- Initialization make -- Run application. do occurance := 0 from index := 1 until index > text.count loop temp := text.fuzzy_index(search_for, index, 0) if temp /= 0 then index := temp + search_for.count occurance := occurance + 1 else index := text.count + 1 end end print(occurance) end
index:INTEGER temp:INTEGER occurance:INTEGER text:STRING = "ababababab" search_for:STRING = "abab" end </lang>
Erlang
<lang Erlang> %% Count non-overlapping substrings in Erlang for the rosetta code wiki. %% Implemented by J.W. Luiten
-module(substrings). -export([main/2]).
%% String exhausted, present result match([], _Sub, _OrigSub, Acc) ->
Acc;
%% Sub exhausted, count a match match(String, [], Sub, Acc) ->
match(String, Sub, Sub, Acc+1);
%% First character matches, advance match([X|MainTail], [X|SubTail], Sub, Acc) ->
match(MainTail, SubTail, Sub, Acc);
%% First characters do not match. Keep scanning for sub in remainder of string match([_X|MainTail], [_Y|_SubTail], Sub, Acc)->
match(MainTail, Sub, Sub, Acc).
main(String, Sub) ->
match(String, Sub, Sub, 0).</lang>
Command: <lang Erlang>substrings:main("ababababab","abab").</lang> Output:
2
Euphoria
<lang euphoria>function countSubstring(sequence s, sequence sub)
integer from,count count = 0 from = 1 while 1 do from = match_from(sub,s,from) if not from then exit end if from += length(sub) count += 1 end while return count
end function
? countSubstring("the three truths","th") ? countSubstring("ababababab","abab")</lang>
Output:
3 2
EGL
The "remove and count the difference" and "manual loop" methods. Implementation includes protection from empty source and search strings. <lang EGL>program CountStrings
function main() SysLib.writeStdout("Remove and Count:"); SysLib.writeStdout(countSubstring("th", "the three truths")); SysLib.writeStdout(countSubstring("abab", "ababababab")); SysLib.writeStdout(countSubstring("a*b", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstring("a", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstring(" ", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstring("", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstring("a", "")); SysLib.writeStdout(countSubstring("", ""));
SysLib.writeStdout("Manual Loop:"); SysLib.writeStdout(countSubstringWithLoop("th", "the three truths")); SysLib.writeStdout(countSubstringWithLoop("abab", "ababababab")); SysLib.writeStdout(countSubstringWithLoop("a*b", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstringWithLoop("a", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstringWithLoop(" ", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstringWithLoop("", "abaabba*bbaba*bbab")); SysLib.writeStdout(countSubstringWithLoop("a", "")); SysLib.writeStdout(countSubstringWithLoop("", "")); end
function countSubstring(substr string in, str string in) returns(int) if(str.length() > 0 and substr.length() > 0)
return (str.length() - str.replaceStr(subStr, "").length()) / subStr.length(); else return 0; end
end
function countSubstringWithLoop(substr string in, str string in) returns(int) count int = 0; loc, index int = 1; strlen int = str.length(); substrlen int = substr.length();
if(strlen > 0 and substrlen > 0) while(loc != 0 and index <= strlen) loc = str.indexOf(substr, index); if(loc > 0) count += 1; index = loc + substrlen; end end end return count; end
end </lang> Output:
Remove and count: 3 2 2 7 0 0 0 0 Manual Loop: 3 2 2 7 0 0 0 0
Forth
<lang forth>: str-count ( s1 len s2 len -- n )
2swap 0 >r begin 2over search while 2over nip /string r> 1+ >r repeat 2drop 2drop r> ;
s" the three truths" s" th" str-count . \ 3 s" ababababab" s" abab" str-count . \ 2</lang>
Fortran
<lang fortran>program Example
implicit none integer :: n n = countsubstring("the three truths", "th") write(*,*) n n = countsubstring("ababababab", "abab") write(*,*) n n = countsubstring("abaabba*bbaba*bbab", "a*b") write(*,*) n
contains
function countsubstring(s1, s2) result(c)
character(*), intent(in) :: s1, s2 integer :: c, p, posn c = 0 if(len(s2) == 0) return p = 1 do posn = index(s1(p:), s2) if(posn == 0) return c = c + 1 p = p + posn + len(s2) end do
end function end program</lang> Output
3 2 2
Go
Using strings.Count() method: <lang go>package main import (
"fmt" "strings"
)
func main() {
fmt.Println(strings.Count("the three truths", "th")) // says: 3 fmt.Println(strings.Count("ababababab", "abab")) // says: 2
}</lang>
Groovy
Solution, uses the Groovy "find" operator (=~), and the Groovy-extended Matcher property "count": <lang groovy>println (('the three truths' =~ /th/).count) println (('ababababab' =~ /abab/).count) println (('abaabba*bbaba*bbab' =~ /a*b/).count) println (('abaabba*bbaba*bbab' =~ /a\*b/).count)</lang>
Output:
3 2 9 2
Haskell
<lang haskell> import Data.Text hiding (length)
-- Return the number of non-overlapping occurrences of sub in str. countSubStrs str sub = length $ breakOnAll (pack sub) (pack str)
main = do
print $ countSubStrs "the three truths" "th" print $ countSubStrs "ababababab" "abab"
</lang> Output:
3 2
Icon and Unicon
<lang Icon>procedure main() every A := ![ ["the three truths","th"], ["ababababab","abab"] ] do
write("The string ",image(A[2])," occurs as a non-overlapping substring ", countSubstring!A , " times in ",image(A[1]))
end
procedure countSubstring(s1,s2) #: return count of non-overlapping substrings c := 0 s1 ? while tab(find(s2)) do {
move(*s2) c +:= 1 }
return c end</lang>
Output:
The string "th" occurs as a non-overlapping substring 3 times in "the three truths" The string "abab" occurs as a non-overlapping substring 2 times in "ababababab"
J
<lang j>require'strings' countss=: #@] %~ #@[ - [ #@rplc ;~]</lang>
In other words: find length of original string, replace the string to be counted with the empty string, find the difference in lengths and divide by the length of the string to be counted.
Example use:
<lang j> 'the three truths' countss 'th' 3
'ababababab' countss 'abab'
2</lang>
Java
The "remove and count the difference" method: <lang java>public class CountSubstring { public static int countSubstring(String subStr, String str){ return (str.length() - str.replace(subStr, "").length()) / subStr.length(); }
public static void main(String[] args){ System.out.println(countSubstring("th", "the three truths")); System.out.println(countSubstring("abab", "ababababab")); System.out.println(countSubstring("a*b", "abaabba*bbaba*bbab")); } }</lang> Output:
3 2 2
The "split and count" method: <lang java>import java.util.regex.Pattern;
public class CountSubstring { public static int countSubstring(String subStr, String str){ // the result of split() will contain one more element than the delimiter // the "-1" second argument makes it not discard trailing empty strings return str.split(Pattern.quote(subStr), -1).length - 1; }
public static void main(String[] args){ System.out.println(countSubstring("th", "the three truths")); System.out.println(countSubstring("abab", "ababababab")); System.out.println(countSubstring("a*b", "abaabba*bbaba*bbab")); } }</lang> Output:
3 2 2
Manual looping
<lang java>public class CountSubstring {
public static int countSubstring(String subStr, String str){
int count = 0;
for (int loc = str.indexOf(subStr); loc != -1;
loc = str.indexOf(subStr, loc + subStr.length()))
count++;
return count;
}
public static void main(String[] args){ System.out.println(countSubstring("th", "the three truths")); System.out.println(countSubstring("abab", "ababababab")); System.out.println(countSubstring("a*b", "abaabba*bbaba*bbab")); } }</lang> Output:
3 2 2
JavaScript
Using regexes: <lang javascript>function countSubstring(str, subStr){ return str.match(new RegExp(subStr, "g")).length }</lang>
K
The dyadic verb _ss gives the positions of substring y in string x. <lang K> "the three truths" _ss "th" 0 4 13
#"the three truths" _ss "th"
3
"ababababab" _ss "abab"
0 4
#"ababababab" _ss "abab"
2 </lang>
Liberty BASIC
<lang lb> print countSubstring( "the three truths", "th") print countSubstring( "ababababab", "abab") end
function countSubstring( a$, s$)
c =0 la =len( a$) ls =len( s$) for i =1 to la -ls if mid$( a$, i, ls) =s$ then c =c +1: i =i +ls -1 next i countSubstring =c
end function </lang>
Lua
<lang lua>function Count_Substring( s1, s2 )
local magic = "[%^%$%(%)%%%.%[%]%*%+%-%?]" local percent = function(s)return "%"..s end return select( 2, s1:gsub( s2:gsub(magic,percent), "" ) )
end
print( Count_Substring( "the three truths", "th" ) ) print( Count_Substring( "ababababab","abab" ) )</lang>
3 2
Mathematica
<lang Mathematica>StringPosition["the three truths","th",Overlaps->False]//Length 3 StringPosition["ababababab","abab",Overlaps->False]//Length 2</lang>
MATLAB / Octave
<lang Matlab> % Count occurrences of a substring without overlap
length(findstr("ababababab","abab",0)) length(findstr("the three truths","th",0))
% Count occurrences of a substring with overlap length(findstr("ababababab","abab",1)) </lang>
Output:
>> % Count occurrences of a substring without overlap >> length(findstr("ababababab","abab",0)) ans = 2 >> length(findstr("the three truths","th",0)) ans = 3 >> % Count occurrences of a substring with overlap >> length(findstr("ababababab","abab",1)) ans = 4 >>
Maxima
<lang maxima>scount(e, s) := block(
[n: 0, k: 1], while integerp(k: ssearch(e, s, k)) do (n: n + 1, k: k + 1), n
)$
scount("na", "banana"); 2</lang>
Mirah
<lang mirah>import java.util.regex.Pattern import java.util.regex.Matcher
- The "remove and count the difference" method
def count_substring(pattern:string, source:string)
(source.length() - source.replace(pattern, "").length()) / pattern.length()
end
puts count_substring("th", "the three truths") # ==> 3 puts count_substring("abab", "ababababab") # ==> 2 puts count_substring("a*b", "abaabba*bbaba*bbab") # ==> 2
- The "split and count" method
def count_substring2(pattern:string, source:string)
# the result of split() will contain one more element than the delimiter
# the "-1" second argument makes it not discard trailing empty strings
source.split(Pattern.quote(pattern), -1).length - 1
end
puts count_substring2("th", "the three truths") # ==> 3 puts count_substring2("abab", "ababababab") # ==> 2 puts count_substring2("a*b", "abaabba*bbaba*bbab") # ==> 2
- This method does a match and counts how many times it matches
def count_substring3(pattern:string, source:string)
result = 0 Matcher m = Pattern.compile(Pattern.quote(pattern)).matcher(source); while (m.find()) result = result + 1 end result
end
puts count_substring3("th", "the three truths") # ==> 3 puts count_substring3("abab", "ababababab") # ==> 2 puts count_substring3("a*b", "abaabba*bbaba*bbab") # ==> 2 </lang>
NewLISP
<lang NewLISP>; file: stringcount.lsp
- url
- http://rosettacode.org/wiki/Count_occurrences_of_a_substring
- author
- oofoe 2012-01-29
- Obvious (and non-destructive...)
- Note that NewLISP performs an /implicit/ slice on a string or list
- with this form "(start# end# stringorlist)". If the end# is omitted,
- the slice will go to the end of the string. This is handy here to
- keep removing the front part of the string as it gets matched.
(define (scount needle haystack)
(let ((h (copy haystack)) ; Copy of haystack string. (i 0) ; Cursor. (c 0)) ; Count of occurences.
(while (setq i (find needle h)) (inc c) (setq h ((+ i (length needle)) h)))
c)) ; Return count.
- Tricky -- Uses functionality from replace function to find all
- non-overlapping occurrences, replace them, and return the count of
- items replaced in system variable $0.
(define (rcount needle haystack)
(replace needle haystack "X") $0)
- Test
(define (test f needle haystack)
(println "Found " (f needle haystack) " occurences of '" needle "' in '" haystack "'."))
(dolist (f (list scount rcount))
(test f "glart" "hinkerpop") (test f "abab" "ababababab") (test f "th" "the three truths") (println) )
(exit)</lang>
Sample output:
Found 0 occurences of 'glart' in 'hinkerpop'. Found 2 occurences of 'abab' in 'ababababab'. Found 3 occurences of 'th' in 'the three truths'. Found 0 occurences of 'glart' in 'hinkerpop'. Found 2 occurences of 'abab' in 'ababababab'. Found 3 occurences of 'th' in 'the three truths'.
Objective-C
The "split and count" method: <lang objc>@interface NSString (CountSubstrings) - (NSUInteger)occurrencesOfSubstring:(NSString *)subStr; @end
@implementation NSString (CountSubstrings) - (NSUInteger)occurrencesOfSubstring:(NSString *)subStr {
return [[self componentsSeparatedByString:subStr] count] - 1;
} @end
int main(int argc, const char *argv[]) {
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; NSLog(@"%lu", [@"the three truths" occurrencesOfSubstring:@"th"]); NSLog(@"%lu", [@"ababababab" occurrencesOfSubstring:@"abab"]); NSLog(@"%lu", [@"abaabba*bbaba*bbab" occurrencesOfSubstring:@"a*b"]); [pool release]; return 0;
}</lang>
- Output:
3 2 2
The "remove and count the difference" method:
<lang objc>@interface NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr;
@end
@implementation NSString (CountSubstrings) - (NSUInteger)occurrencesOfSubstring:(NSString *)subStr {
return ([self length] - [[self stringByReplacingOccurrencesOfString:subStr withString:@""] length]) / [subStr length];
} @end
int main(int argc, const char *argv[]) {
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; NSLog(@"%lu", [@"the three truths" occurrencesOfSubstring:@"th"]); NSLog(@"%lu", [@"ababababab" occurrencesOfSubstring:@"abab"]); NSLog(@"%lu", [@"abaabba*bbaba*bbab" occurrencesOfSubstring:@"a*b"]); [pool release]; return 0;
}</lang>
- Output:
3 2 2
Manual looping:
<lang objc>@interface NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr;
@end
@implementation NSString (CountSubstrings) - (NSUInteger)occurrencesOfSubstring:(NSString *)subStr {
NSUInteger count = 0; for (NSRange range = [self rangeOfString:subStr]; range.location != NSNotFound; range.location += range.length, range = [self rangeOfString:subStr options:0 range:NSMakeRange(range.location, [self length] - range.location)]) count++; return count;
} @end
int main(int argc, const char *argv[]) {
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; NSLog(@"%lu", [@"the three truths" occurrencesOfSubstring:@"th"]); NSLog(@"%lu", [@"ababababab" occurrencesOfSubstring:@"abab"]); NSLog(@"%lu", [@"abaabba*bbaba*bbab" occurrencesOfSubstring:@"a*b"]); [pool release]; return 0;
}</lang>
- Output:
3 2 2
OCaml
<lang ocaml>let count_substring str sub =
let sub_len = String.length sub in let len_diff = (String.length str) - sub_len and reg = Str.regexp_string sub in let rec aux i n = if i > len_diff then n else try let pos = Str.search_forward reg str i in aux (pos + sub_len) (succ n) with Not_found -> n in aux 0 0
let () =
Printf.printf "count 1: %d\n" (count_substring "the three truth" "th"); Printf.printf "count 2: %d\n" (count_substring "ababababab" "abab");
- </lang>
ooRexx
<lang ooRexx>
bag="the three truths" x="th" say left(bag,30) left(x,15) 'found' bag~countstr(x)
bag="ababababab" x="abab" say left(bag,30) left(x,15) 'found' bag~countstr(x)
-- can be done caselessly too bag="abABAbaBab" x="abab" say left(bag,30) left(x,15) 'found' bag~caselesscountstr(x)
</lang> Output:
the three truths th found 3 ababababab abab found 2 abABAbaBab abab found 2
Pascal
See Delphi
Perl
<lang perl>sub countSubstring {
my $str = shift; my $sub = quotemeta(shift); my $count = () = $str =~ /$sub/g; return $count;
- or return scalar( () = $str =~ /$sub/g );
}
print countSubstring("the three truths","th"), "\n"; # prints "3" print countSubstring("ababababab","abab"), "\n"; # prints "2"</lang>
Perl 6
<lang perl6>sub count-substring($big,$little) { +$big.comb: /$little/ }
say count-substring("the three truths","th"); say count-substring("ababababab","abab");</lang> Note that in Perl 6 the /$little/ matches the variable literally, so there's no need to quote regex metacharacters. Also, prefix + forces numeric context in Perl 6 (it's a no-op in Perl 5). One other style point: we now tend to prefer hyphenated names over camelCase.
PHP
<lang php><?php echo substr_count("the three truths", "th"), "\n"; // prints "3" echo substr_count("ababababab", "abab"), "\n"; // prints "2" ?></lang>
PicoLisp
<lang PicoLisp>(de countSubstring (Str Sub)
(let (Cnt 0 H (chop Sub)) (for (S (chop Str) S (cdr S)) (when (head H S) (inc 'Cnt) (setq S (map prog2 H S)) ) ) Cnt ) )</lang>
Test:
: (countSubstring "the three truths" "th") -> 3 : (countSubstring "ababababab" "abab") -> 2
PL/I
<lang PL/I> cnt: procedure options (main);
declare (i, tally) fixed binary; declare (text, key) character (100) varying;
get edit (text) (L); put skip data (text); get edit (key) (L); put skip data (key);
tally = 0; i = 1; do until (i = 0); i = index(text, key, i); if i > 0 then do; tally = tally + 1; i = i + length(key); end; end; put skip list (tally);
end cnt; </lang> Output for the two specified strings is as expected. Output for the following data:-
TEXT='AAAAAAAAAAAAAAA'; KEY='AA'; 7
PowerBASIC
Windows versions of PowerBASIC (at least since PB/Win 7, and possibly earlier) provide the TALLY
function, which does exactly what this task requires (count non-overlapping substrings).
PB/DOS can use the example under BASIC, above.
Note that while this example is marked as working with PB/Win, the PRINT
statement would need to be replaced with MSGBOX
, or output to a file. (PB/Win does not support console output.)
<lang powerbasic>FUNCTION PBMAIN () AS LONG
PRINT "the three truths, th:", TALLY("the three truths", "th") PRINT "ababababab, abab:", TALLY("ababababab", "abab")
END FUNCTION</lang>
Output:
the three truths, th: 3 ababababab, abab: 2
PureBasic
<lang PureBasic>a = CountString("the three truths","th") b = CountString("ababababab","abab")
- a = 3
- b = 2</lang>
Python
<lang python>>>> "the three truths".count("th") 3 >>> "ababababab".count("abab") 2</lang>
REXX
Some older REXXes don't have the built-in function countstr, so one is included here.
The countstr subroutine (below) mimics the bif in newer REXXes.
<lang rexx>/*REXX program: use a subroutine that counts occurrences of a substring.*/
bag = "the three truths"
x = "th" say left(bag,30) left(x,15) 'found' countstr(bag,x)
bag = "ababababab"
x = "abab" say left(bag,30) left(x,15) 'found' countstr(bag,x)
exit /*stick a fork in it, we're done.*/ /*─────────────────────────────────────COUNTSTR subroutine──────────────*/ countstr: procedure; parse arg haystack, needle, startAt if startAt== then startAt=1; width=length(needle)
do k=0 until _==0; _=pos(needle,haystack,startAt) startAt = _ + width end /*k*/
return k</lang> output
the three truths th found 3 ababababab abab found 2
Ruby
<lang ruby>irb(main):001:0> "the three truths".scan("th").length => 3 irb(main):002:0> "ababababab".scan("abab").length => 2</lang>
String#scan returns an array of substrings, and Array#length (or Array#size) counts them.
Run BASIC
<lang runbasic>print countSubstring("the three truths","th") print countSubstring("ababababab","abab")
FUNCTION countSubstring(s$,find$) WHILE instr(s$,find$,i) <> 0
countSubstring = countSubstring + 1 i = instr(s$,find$,i) + len(find$)
WEND
END FUNCTION</lang>Output:
3 2
Scala
<lang scala>import scala.annotation.tailrec def countSubstring(str1:String, str2:String):Int={
@tailrec def count(pos:Int, c:Int):Int={ val idx=str1 indexOf(str2, pos) if(idx == -1) c else count(idx+str2.size, c+1) } count(0,0)
}
println(countSubstring("ababababab", "abab")) println(countSubstring("the three truths", "th"))</lang> Output:
2 3
Seed7
<lang seed7>$ include "seed7_05.s7i";
const func integer: countSubstring (in string: stri, in string: searched) is func
result var integer: count is 0; local var integer: offset is 0; begin offset := pos(stri, searched); while offset <> 0 do incr(count); offset := pos(stri, searched, offset + length(searched)); end while; end func;
const proc: main is func
begin writeln(countSubstring("the three truths", "th")); writeln(countSubstring("ababababab", "abab")); end func;</lang>
Output:
3 2
SNOBOL4
<lang SNOBOL4>
DEFINE("countSubstring(t,s)")
OUTPUT = countSubstring("the three truths","th") OUTPUT = countSubstring("ababababab","abab")
:(END)
countSubstring t ARB s = :F(RETURN)
countSubstring = countSubstring + 1 :(countSubstring)
END 3 2 </lang>
Standard ML
<lang sml>fun count_substrings (str, sub) =
let fun aux (str', count) = let val suff = #2 (Substring.position sub str') in if Substring.isEmpty suff then count else aux (Substring.triml (size sub) suff, count + 1) end in aux (Substring.full str, 0) end;
print (Int.toString (count_substrings ("the three truths", "th")) ^ "\n"); print (Int.toString (count_substrings ("ababababab", "abab")) ^ "\n"); print (Int.toString (count_substrings ("abaabba*bbaba*bbab", "a*b")) ^ "\n");</lang>
TUSCRIPT
<lang tuscript> $$ MODE TUSCRIPT, {} occurences=COUNT ("the three truths", ":th:") occurences=COUNT ("ababababab", ":abab:") occurences=COUNT ("abaabba*bbaba*bbab",":a\*b:") </lang> Output:
3 2 2
Tcl
The regular expression engine is ideal for this task, especially as the ***= prefix makes it interpret the rest of the argument as a literal string to match: <lang tcl>proc countSubstrings {haystack needle} {
regexp -all ***=$needle $haystack
} puts [countSubstrings "the three truths" "th"] puts [countSubstrings "ababababab" "abab"] puts [countSubstrings "abaabba*bbaba*bbab" "a*b"]</lang>
Output:
3 2 2
TXR
<lang txr>@(next :args) @(do (defun count-occurrences (haystack needle)
(for* ((occurrences 0) (old-pos 0) (new-pos (search-str haystack needle old-pos nil))) (new-pos occurrences) ((inc occurrences) (set old-pos (+ new-pos (length needle))) (set new-pos (search-str haystack needle old-pos nil))))))
@ndl @hay @(output) @(count-occurrences hay ndl) occurrences(s) of @ndl inside @hay @(end)</lang>
$ ./txr count-occurrences.txr "baba" "babababa" 2 occurence(s) of baba inside babababa $ ./txr count-occurrences.txr "cat" "catapultcatalog" 2 occurence(s) of cat inside catapultcatalog
XPL0
<lang XPL0>include c:\cxpl\codes; \intrinsic 'code' declarations string 0; \use zero-terminated strings, instead of MSb terminated
func StrNCmp(A, B, N); \Compare string A to string B up to N bytes long
\This returns:
\ >0 if A > B
\ =0 if A = B
\ <0 if A < B
char A, B; \strings to be compared
int N; \number of bytes to compare
int I;
[for I:= 0 to N-1 do
if A(I) # B(I) then return A(I) - B(I);
return 0; \they're equal ]; \StrNCmp
func StrLen(Str); \Return the number of characters in an ASCIIZ string
char Str;
int I;
for I:= 0 to -1>>1-1 do
if Str(I) = 0 then return I;
func SubStr(A, B); \Count number of times string B occurs in A
char A, B;
int LA, LB, C, I;
[LA:= StrLen(A); LB:= StrLen(B);
C:= 0; I:= 0;
while I < LA do
if StrNCmp(B, A+I, LB) = 0 then [C:= C+1; I:= I+LB] else I:= I+1;
return C; ];
[IntOut(0, SubStr("the three truths", "th")); CrLf(0);
IntOut(0, SubStr("ababababab", "abab")); CrLf(0);
]</lang>
Output:
3 2
pre> >> % Count occurrences of a substring without overlap >> length(findstr("ababababab","abab",0)) ans = 2 >> length(findstr("the three truths","th",0)) ans = 3 >> % Count occurrences of a substring with overlap >> length(findstr("ababababab","abab",1)) ans = 4 >>
- Programming Tasks
- String manipulation
- Ada
- ALGOL 68
- AutoHotkey
- AWK
- BASIC
- BBC BASIC
- Bracmat
- C
- C++
- C sharp
- CoffeeScript
- Common Lisp
- D
- Delphi
- Eiffel
- Erlang
- Euphoria
- EGL
- Forth
- Fortran
- Go
- Groovy
- Haskell
- Icon
- Unicon
- J
- Java
- JavaScript
- K
- Liberty BASIC
- Lua
- Mathematica
- MATLAB
- Octave
- Maxima
- Mirah
- NewLISP
- Objective-C
- OCaml
- OoRexx
- Pascal
- Perl
- Perl 6
- PHP
- PicoLisp
- PL/I
- PowerBASIC
- PureBasic
- Python
- REXX
- Ruby
- Run BASIC
- Scala
- Seed7
- SNOBOL4
- Standard ML
- TUSCRIPT
- Tcl
- TXR
- XPL0