Text between

From Rosetta Code
Revision as of 17:00, 5 January 2018 by rosettacode>Zbentley (Add a shell solution.)
Text between is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Get the text between a start and end delimiter, which can be the start and end of the string.

Example 1. Both delimiters set Text: "Hello Rosetta Code world" Start delimiter: "Hello " End delimiter: " world" Output: "Rosetta Code"

Example 2. Start delimiter is the start of the string Text: "Hello Rosetta Code world" Start delimiter: "start" End delimiter: " world" Output: "Hello Rosetta Code"

Example 3. End delimiter is the end of the string Text: "Hello Rosetta Code world" Start delimiter: "Hello" End delimiter: "end" Output: "Rosetta Code world"




AppleScript

<lang applescript> my text_between("Hello Rosetta Code world", "Hello ", " world")

on text_between(this_text, start_text, end_text) set return_text to "" try if (start_text is not "start") then set AppleScript's text item delimiters to start_text set return_text to text items 2 thru end of this_text as string else set return_text to this_text end if if (end_text is not "end") then set AppleScript's text item delimiters to end_text set return_text to text item 1 of return_text as string set AppleScript's text item delimiters to "" end if end try set AppleScript's text item delimiters to ""

return return_text end text_between </lang>

C

<lang c> /*

* textBetween: Gets text between two delimiters
*/

char* textBetween(char* thisText, char* startText, char* endText, char* returnText) { //printf("textBetween\n");

   char* startPointer = NULL;
   int stringLength = 0;
   char* endPointer = NULL;
   int endLength = 0;

if (strstr(startText, "start") != NULL) { // Set the beginning of the string startPointer = thisText; } else { startPointer = strstr(thisText, startText);

   	if (startPointer != NULL)

{

       	startPointer = startPointer + strlen(startText);
       }

} // end if the start delimiter is "start"

   if (startPointer != NULL)
   {

if (strstr(endText, "end") != NULL) { // Set the end of the string endPointer = thisText; endLength = 0; } else { endPointer = strstr(startPointer, endText); endLength = (int)strlen(endPointer); } // end if the end delimiter is "end"

       stringLength = strlen(startPointer) - endLength;
       
       if (stringLength == 0)
       {

returnText = ""; startPointer = NULL;

       } else {

// Copy characters between the start and end delimiters

   	    strncpy(returnText,startPointer, stringLength);

returnText[stringLength++] = '\0'; }

   } else {

//printf("Start pointer not found\n"); returnText = "";

   } // end if the start pointer is not found
   
   return startPointer;

} // end textBetween method</lang>

Haskell

<lang Haskell>import Data.Text (Text, breakOn, pack, stripPrefix) import Control.Arrow ((***))

textBetween :: (Either Text Text, Either Text Text) -> Text -> Text textBetween (s, e) txt =

 let prune f txt = either (const txt) (f . flip breakOn txt)
     -- Pruned rightward text
     r = prune snd (prune fst txt e) s
     -- Maybe pruned leftward text
     mb = either (const (Just r)) (`stripPrefix` r) s
 in case mb of
      Just x -> x
      _ -> pack []

-- TEST ------------------------------------------------------------------ sample :: Text sample = pack "Hello Rosetta Code world"

testPairs :: [(Either Text Text, Either Text Text)] testPairs =

 let wrap x =
       if x `elem` ["start", "end"]
         then Left (pack [])
         else Right (pack x)
 in (wrap *** wrap) <$>
    [("Hello ", " world"), ("start", " world"), ("Hello", "end")]

main :: IO () main = mapM_ print $ flip textBetween sample <$> testPairs</lang>

Output:
"Rosetta Code"
"Hello Rosetta Code"
" Rosetta Code world"

Java

javac textBetween.java
java -cp . textBetween "hello Rosetta Code world" "hello " " world"

<lang java> public class textBetween {

   /*
    * textBetween: Get the text between two delimiters
    */
   static String textBetween(String thisText, String startString, String endString)
   {
   	String returnText = "";
   	int startIndex = 0;
   	int endIndex = 0;
   	
   	if (startString.equals("start"))
   	{
   		startIndex = 0;
   	} else {

startIndex = thisText.indexOf(startString);

if (startIndex < 0) { return ""; } else { startIndex = startIndex + startString.length(); }

   	}
       
   	if (endString.equals("end"))
   	{
   		endIndex = thisText.length();
   	} else {
   		endIndex = thisText.indexOf(endString);
           
           if (endIndex <= 0) 
           {
           	return "";
           } else {
           }	
   	}
   	
   	returnText = thisText.substring(startIndex,endIndex);
   	
   	return returnText;
   } // end method textBetween
   /**
    * Main method
    */
   public static void main(String[] args)
   {
   	String thisText = args[0];
   	String startDelimiter = args[1];
   	String endDelimiter = args[2];
   	
   	String returnText = "";
   	returnText = textBetween(thisText, startDelimiter, endDelimiter);
   	
       System.out.println(returnText);
   } // end method main
   

} // end class TextBetween </lang>

JavaScript

<lang javascript> function textBetween(thisText, startString, endString) { if (thisText == undefined) { return ""; }

var start_pos = 0; if (startString != 'start') { start_pos = thisText.indexOf(startString);

// If the text does not contain the start string, return a blank string if (start_pos < 0) { return ; }

// Skip the first startString characters start_pos = start_pos + startString.length; }

var end_pos = thisText.length; if (endString != 'end') { end_pos = thisText.indexOf(endString,start_pos); }

// If the text does not have the end string after the start string, return the whole string after the start if (end_pos < start_pos) { end_pos = thisText.length; }

var newText = thisText.substring(start_pos,end_pos);

return newText; } // end textBetween </lang>


PHP

http://localhost/textBetween.php?thisText=hello%20Rosetta%20Code%20world&start=hello%20&end=%20world

<lang php> <?php function text_between($string, $start, $end) {

   //$string = " ".$string;
   $startIndex = strpos($string,$start);
   
   if ($start == "start")
   {
   	$startIndex = 0;
   } else {
   	if ($startIndex == 0)
   	{
   		return "Start text not found";
   	}
   }
   
   if ($end == "end")
   {
   	$endIndex=strlen($string);
   	$resultLength = $endIndex - $startIndex;
   } else {

$resultLength = strpos($string,$end,$startIndex) - $startIndex; }

   if ($start != "start")
   {

$startIndex += strlen($start); }

   if ($resultLength <= 0)
   {
   	return "End text not found";
   }
   
   return substr($string,$startIndex,$resultLength);

}

$thisText=$_GET["thisText"]; $startDelimiter=$_GET["start"]; $endDelimiter=$_GET["end"];

$returnText = text_between($thisText, $startDelimiter, $endDelimiter);

print_r($returnText); ?> </lang>

Python

<lang python>

  1. !/usr/bin/env python

from sys import argv

  1. textBetween in python
  2. Get the text between two delimiters
  3. Usage:
  4. python textBetween.py "hello Rosetta Code world" "hello " " world"

def textBetween( thisText, startString, endString ):

   try:
   	if startString is 'start':
   		startIndex = 0
   	else:
   		startIndex = thisText.index( startString ) 
   	
   	if not (startIndex >= 0):
   		return 'Start delimiter not found'
   	else:
       	startIndex = startIndex + len( startString )
       
       returnText = thisText[startIndex:]


   	if endString is 'end':
   		return returnText
   	else:
   		endIndex = returnText.index( endString )
   	if not (endIndex >= 0):
   		return 'End delimiter not found'
   	else:
       	returnText = returnText[:endIndex]
       return returnText
   except ValueError:
       return "Value error"

script, first, second, third = argv

thisText = first startString = second endString = third

print textBetween( thisText, startString, endString ) </lang>

Ruby

Test <lang ruby> class String

 def textBetween startDelimiter, endDelimiter
 
 	if (startDelimiter == "start") then
 		startIndex = 0
 	else
 		startIndex = self.index(startDelimiter) + startDelimiter.length
 	end
 	
 	if (startIndex == nil) then
 		return "Start delimiter not found"
 	end
 	
 	thisLength = self.length
 	
 	returnText = self[startIndex, thisLength]
 	  	
	if (endDelimiter == "end") then
 		endIndex = thisLength
 	else
 		endIndex = returnText.index(endDelimiter)
 	end
 	
 	if (endIndex == nil) then
 		return "End delimiter not found"
 	end
 	  	
 	returnText = returnText[0, endIndex]
 	
 	return returnText
 
 end

end

thisText = ARGV[0] startDelimiter = ARGV[1] endDelimiter = ARGV[2]

  1. puts thisText
  2. puts startDelimiter
  3. puts endDelimiter

returnText = thisText.textBetween(startDelimiter, endDelimiter)

puts returnText </lang>

UNIX Shell

Works with: Bash
Works with: Dash
Works with: Zsh

This implementation creates no processes/subshells in modern shells (e.g. shells in which 'echo' and '[' are builtins). It modifies/leaks no global state other than the "text_between" function's name. Its behavior is not changed by the presence or absence of common shell options (e.g. "-e", "-u", "pipefail", or POSIX compatibility mode) or settings (e.g. "IFS").

This can be made to work with ksh (93) by removing all uses of the "local" keyword, though this will cause it to modify global variables.

<lang bash>text_between() { local search="${1:-}" local start_str="${2:-}" local end_str="${3:-}" local index_temp

if [ -z "$start_str" ]; then echo "Start text not provided" >&2 return 1 elif [ "$start_str" != "start" ]; then index_temp="${search##*$start_str}" if [ "$index_temp" = "$search" ]; then echo "Start text not found" >&2 return 1 fi search="$index_temp" fi

if [ -z "$end_str" ]; then echo "End text not provided" >&2 return 1 elif [ "$end_str" = "end" ]; then echo "$search" else index_temp="${search%%$end_str*}" # This error condition assumes that if end occurs before start, invocation should fail. if [ "$index_temp" = "$search" ]; then echo "End text not found" >&2 return 1 fi echo "$index_temp" fi }

text_between "Hello Rosetta Code world" "Hello " " world" text_between "Hello Rosetta Code world" "start" " world" text_between "Hello Rosetta Code world" "Hello " "end" </lang>

zkl

<lang zkl>fcn getText(text,start,end){

  s = (if((s:=text.find(start))==Void) 0 else s + start.len());
  e = (if((e:=text.find(end,s))==Void) text.len() else e);
  text[s,e - s]

} getText("Hello Rosetta Code world","Hello "," world").println(); getText("Hello Rosetta Code world","start", " world").println(); getText("Hello Rosetta Code world","Hello", "end" ).println();</lang>

Output:
Rosetta Code
Hello Rosetta Code
 Rosetta Code world