end 024
This commit is contained in:
parent
b1f1525f81
commit
3a032a08b1
668
024_String_data_type_methods/024_csharp.md
Normal file
668
024_String_data_type_methods/024_csharp.md
Normal file
@ -0,0 +1,668 @@
|
|||||||
|
# Modify the content of strings using built-in string data type methods in C#
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
Suppose you're a developer for an application letting a business update its
|
||||||
|
"last chance deals" website by sending an email. The update email uses special
|
||||||
|
required text in the title and body of the email to instruct the automation how
|
||||||
|
to update the website. The mail includes next deal title, discount %,
|
||||||
|
expiration, and when to publish the offer live.
|
||||||
|
|
||||||
|
Frequently, application data you need to work with is from *other* software
|
||||||
|
systems, and has data you don't want or need. Sometimes the data is in an
|
||||||
|
unusable format, containing *extra* information that makes the important
|
||||||
|
information difficult to extract. To adjust data for your application, you need
|
||||||
|
tools and techniques to parse through string data, isolate the information you
|
||||||
|
need, and remove the information you don't need.
|
||||||
|
|
||||||
|
In this module, you use string helper methods to identify and isolate the
|
||||||
|
information you're interested in. You learn how to copy a smaller portion of a
|
||||||
|
larger string. You replace characters, or remove characters from a string.
|
||||||
|
|
||||||
|
By the end of this module, you're able to modify a string's contents, isolating
|
||||||
|
specific portions to extract, replace, or remove.
|
||||||
|
|
||||||
|
#### Learning objectives
|
||||||
|
|
||||||
|
In this module, you will:
|
||||||
|
|
||||||
|
- Identify the position of a character or string inside of another string
|
||||||
|
- Extract portions of strings
|
||||||
|
- Remove portions of strings
|
||||||
|
- Replace values in strings with different values
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Exercise
|
||||||
|
|
||||||
|
### Use the string's IndexOf() and Substring() helper methods
|
||||||
|
|
||||||
|
In this exercise, you use the `IndexOf()` method to locate the position of one
|
||||||
|
or more characters string inside a larger string. You use the `Substring()`
|
||||||
|
method to return the part of the larger string that follows the character
|
||||||
|
positions you specify.
|
||||||
|
|
||||||
|
You'll also use an overloaded version of the `Substring()` method to set the
|
||||||
|
length of characters to return after a specified position in a string.
|
||||||
|
|
||||||
|
### Write code to find parenthesis pairs embedded in a string
|
||||||
|
|
||||||
|
Type the following code into the code editor:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "Find what is (inside the parentheses)";
|
||||||
|
|
||||||
|
int openingPosition = message.IndexOf('(');
|
||||||
|
int closingPosition = message.IndexOf(')');
|
||||||
|
|
||||||
|
Console.WriteLine(openingPosition);
|
||||||
|
Console.WriteLine(closingPosition);
|
||||||
|
```
|
||||||
|
|
||||||
|
At the Terminal command prompt, to run your code, type `dotnet run` and then
|
||||||
|
press Enter.
|
||||||
|
|
||||||
|
You should see the following output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
13
|
||||||
|
36
|
||||||
|
```
|
||||||
|
|
||||||
|
In this case, the index of the `(` character is 13. Remember, these values are
|
||||||
|
zero-based, so it's the 14th character in the string. The index of the `)`
|
||||||
|
character is 36.
|
||||||
|
|
||||||
|
Now that you have the two indexes, you can use them as the boundaries to
|
||||||
|
retrieve the value between them.
|
||||||
|
|
||||||
|
#### Add code to retrieve the value between parenthesis
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "Find what is (inside the parentheses)";
|
||||||
|
|
||||||
|
int openingPosition = message.IndexOf('(');
|
||||||
|
int closingPosition = message.IndexOf(')');
|
||||||
|
|
||||||
|
// Console.WriteLine(openingPosition);
|
||||||
|
// Console.WriteLine(closingPosition);
|
||||||
|
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
```
|
||||||
|
|
||||||
|
Save your code file, and then run your code. You should see the following
|
||||||
|
output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
(inside the parentheses
|
||||||
|
```
|
||||||
|
|
||||||
|
The `Substring()` method needs the starting position and the number of
|
||||||
|
characters, or length, to retrieve. So, you calculate the length in a temporary
|
||||||
|
variable called `length`, and pass it with the `openingPosition` value to
|
||||||
|
retrieve the string inside of the parenthesis.
|
||||||
|
|
||||||
|
The result is close, however the output includes the opening parenthesis. In
|
||||||
|
this exercise, the inclusion of the parenthesis isn't desired. To remove the
|
||||||
|
parenthesis from output, you have to update the code to skip the index of the
|
||||||
|
parenthesis itself.
|
||||||
|
|
||||||
|
#### Modify the starting position of the sub string
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "Find what is (inside the parentheses)";
|
||||||
|
|
||||||
|
int openingPosition = message.IndexOf('(');
|
||||||
|
int closingPosition = message.IndexOf(')');
|
||||||
|
|
||||||
|
openingPosition += 1;
|
||||||
|
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
```
|
||||||
|
|
||||||
|
Save your code file, and then run your code. You should see the following
|
||||||
|
output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
inside the parentheses
|
||||||
|
```
|
||||||
|
|
||||||
|
Take a moment to review the previous code and the line `openingPosition += 1;`.
|
||||||
|
|
||||||
|
By increasing the `openingPosition` by `1`, you skip over the opening
|
||||||
|
parenthesis character.
|
||||||
|
|
||||||
|
The reason you're using the value `1` is because that is the length of the
|
||||||
|
character. If you attempt to locate a value starting after a longer string, for
|
||||||
|
example, `<div>` or `---`, you would use the length of that string instead.
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "What is the value <span>between the tags</span>?";
|
||||||
|
|
||||||
|
int openingPosition = message.IndexOf("<span>");
|
||||||
|
int closingPosition = message.IndexOf("</span>");
|
||||||
|
|
||||||
|
openingPosition += 6;
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
```
|
||||||
|
|
||||||
|
Take a moment to review the previous code and the line `openingPosition += 6;`.
|
||||||
|
|
||||||
|
The preceding snippet of code shows how to find the value inside an opening and
|
||||||
|
closing `<span>` tag.
|
||||||
|
|
||||||
|
In this case, you're adding `6` to the `openingPosition` as the offset to
|
||||||
|
calculate the length of the sub string.
|
||||||
|
|
||||||
|
#### Avoid magic values
|
||||||
|
|
||||||
|
Hardcoded strings like `"<span>"` in the previous code listing are known as
|
||||||
|
"magic strings" and hardcoded numeric values like `6` are known as "magic
|
||||||
|
numbers". These "Magic" values are undesirable for many reasons and you should
|
||||||
|
try to avoid them if possible.
|
||||||
|
|
||||||
|
Review the previous code to consider how the code might break if you hardcoded
|
||||||
|
the string `"<span>"` multiple times in your code, but misspelled one instance
|
||||||
|
of it as `"<sapn>"`.
|
||||||
|
|
||||||
|
The compiler doesn't catch `"<sapn>"` at compile time because the value is in a
|
||||||
|
string. The misspelling leads to problems at run time, and depending on the
|
||||||
|
complexity of your code, it might be difficult to track down.
|
||||||
|
|
||||||
|
Furthermore, if you change the string `"<span>"` to the shorter `"<div>"`, but
|
||||||
|
forget to change the number 6 to 5, then your code produces undesirable results.
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "What is the value <span>between the tags</span>?";
|
||||||
|
|
||||||
|
const string openSpan = "<span>";
|
||||||
|
const string closeSpan = "</span>";
|
||||||
|
|
||||||
|
int openingPosition = message.IndexOf(openSpan);
|
||||||
|
int closingPosition = message.IndexOf(closeSpan);
|
||||||
|
|
||||||
|
openingPosition += openSpan.Length;
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
```
|
||||||
|
|
||||||
|
Take a minute to examine the updated code and the use of the keyword `const` as
|
||||||
|
used in `const string openSpan = "<span>";`.
|
||||||
|
|
||||||
|
The code uses a constant with the `const` keyword. A constant allows you to
|
||||||
|
define and initialize a variable whose value can never be changed. You would
|
||||||
|
then use that constant in the rest of the code whenever you needed that value.
|
||||||
|
This ensures that the value is only defined once and misspelling the `const`
|
||||||
|
variable is caught by the compiler.
|
||||||
|
|
||||||
|
The previous code listing is a safer way to write the same code you examined in
|
||||||
|
the previous section. Now, if the value of `openSpan` changes to `<div>`, the
|
||||||
|
line of code that uses the `Length` property continues to be valid.
|
||||||
|
|
||||||
|
### Recap
|
||||||
|
|
||||||
|
This unit covered much material. Here's the most important things to remember:
|
||||||
|
|
||||||
|
- `IndexOf()` gives you the first position of a character or string inside of
|
||||||
|
another string.
|
||||||
|
- `IndexOf()` returns `-1` if it can't find a match.
|
||||||
|
- `Substring()` returns just the specified portion of a string, using a
|
||||||
|
starting position and optional length.
|
||||||
|
- There's often more than one way to solve a problem. You used two separate
|
||||||
|
techniques to find all instances of a given character or string.
|
||||||
|
- Avoid hardcoded magic values. Instead, define a `const` variable. A constant
|
||||||
|
variable's value can't be changed after initialization.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Exercise
|
||||||
|
|
||||||
|
### Use the string's IndexOfAny() and LastIndexOf() helper methods
|
||||||
|
|
||||||
|
In this exercise, you use the `IndexOfAny()` method to find the first location
|
||||||
|
of any of the `string` from selected array. You also use `LastIndexOf()` to
|
||||||
|
find the final location of a string within another string.
|
||||||
|
|
||||||
|
#### Retrieve the last occurrence of a sub string
|
||||||
|
|
||||||
|
You increase the complexity of the `message` variable by adding many sets of
|
||||||
|
parentheses, then write code to retrieve the content inside the last set of
|
||||||
|
parentheses.
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "(What if) I am (only interested) in the last (set of parentheses)?";
|
||||||
|
int openingPosition = message.LastIndexOf('(');
|
||||||
|
|
||||||
|
openingPosition += 1;
|
||||||
|
int closingPosition = message.LastIndexOf(')');
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
```
|
||||||
|
|
||||||
|
Save your code file, and then run your code. You should see the following
|
||||||
|
output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
set of parentheses
|
||||||
|
```
|
||||||
|
|
||||||
|
The key to this example is the use of `LastIndexOf()`, which you use to get the
|
||||||
|
positions of the last opening and closing parentheses.
|
||||||
|
|
||||||
|
#### Retrieve all instances of substrings inside parentheses
|
||||||
|
|
||||||
|
This time, update the `message` to have three sets of parentheses, and write
|
||||||
|
code to extract any text inside of the parentheses. You're able to reuse
|
||||||
|
portions of the previous work, but you need to add a `while` statement to
|
||||||
|
iterate through the string until all sets of parentheses are discovered,
|
||||||
|
extracted, and displayed.
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "(What if) there are (more than) one (set of parentheses)?";
|
||||||
|
while (true) {
|
||||||
|
int openingPosition = message.IndexOf('(');
|
||||||
|
if (openingPosition == -1) break;
|
||||||
|
|
||||||
|
openingPosition += 1;
|
||||||
|
int closingPosition = message.IndexOf(')');
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
|
||||||
|
// Note the overload of the Substring to return only the remaining
|
||||||
|
// unprocessed message:
|
||||||
|
message = message.Substring(closingPosition + 1);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Run your code. You should see the following output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
What if
|
||||||
|
more than
|
||||||
|
set of parentheses
|
||||||
|
```
|
||||||
|
|
||||||
|
Take a minute to observe last line of code inside the `while` loop, pulled out
|
||||||
|
in the following code:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
message = message.Substring(closingPosition + 1);
|
||||||
|
```
|
||||||
|
|
||||||
|
When you use `Substring()` without specifying a length input parameter, it will
|
||||||
|
return every character after the starting position you specify. With the string
|
||||||
|
being processed,
|
||||||
|
`message = "(What if) there are (more than) one (set of parentheses)?"`, there's
|
||||||
|
an advantage to removing the first set of parentheses `(What if)` from the
|
||||||
|
value of `message`. What remains is then processed in the next iteration of the
|
||||||
|
`while` loop.
|
||||||
|
|
||||||
|
Take a minute to consider what happens during the final iteration of the `while`
|
||||||
|
loop, when only the final `?` character remains.
|
||||||
|
|
||||||
|
The followings code addresses handling the end of the string:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
int openingPosition = message.IndexOf('(');
|
||||||
|
if (openingPosition == -1) break;
|
||||||
|
```
|
||||||
|
|
||||||
|
The `IndexOf()` method returns `-1` if it can't find the input parameter in the
|
||||||
|
string. You merely check for the value `-1` and `break` out of the loop.
|
||||||
|
|
||||||
|
#### Work with different types of symbol sets
|
||||||
|
|
||||||
|
This time, search for several different symbols, not just a set of parentheses.
|
||||||
|
|
||||||
|
Update the `message` string, adding different types of symbols like square `[]`
|
||||||
|
brackets and curly braces `{}.` To search for multiple symbols simultaneously,
|
||||||
|
use `.IndexOfAny()`. You search with `.IndexOfAny()` to return the index of the
|
||||||
|
first symbol from the array `openSymbols` found in the message string.
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "Help (find) the {opening symbols}";
|
||||||
|
Console.WriteLine($"Searching THIS Message: {message}");
|
||||||
|
char[] openSymbols = { '[', '{', '(' };
|
||||||
|
int startPosition = 5;
|
||||||
|
int openingPosition = message.IndexOfAny(openSymbols);
|
||||||
|
Console.WriteLine($"Found WITHOUT using startPosition: {message.Substring(openingPosition)}");
|
||||||
|
|
||||||
|
openingPosition = message.IndexOfAny(openSymbols, startPosition);
|
||||||
|
Console.WriteLine($"Found WITH using startPosition {startPosition}: {message.Substring(openingPosition)}");
|
||||||
|
```
|
||||||
|
|
||||||
|
Save your code file, and then run your code.
|
||||||
|
|
||||||
|
You should see the following output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
Searching THIS message: Help (find) the {opening symbols}
|
||||||
|
Found WITHOUT using startPosition: (find) the {opening symbols}
|
||||||
|
Found WITH using startPosition 5: (find) the {opening symbols}
|
||||||
|
```
|
||||||
|
|
||||||
|
Take a minute to review the code previously entered.
|
||||||
|
|
||||||
|
You used `.IndexOfAny()` without, and then with, the starting position overload.
|
||||||
|
|
||||||
|
Now that you found an opening symbol, you need to find its matching closing
|
||||||
|
symbol.
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "(What if) I have [different symbols] but every {open symbol} needs a [matching closing symbol]?";
|
||||||
|
|
||||||
|
// The IndexOfAny() helper method requires a char array of characters.
|
||||||
|
// You want to look for:
|
||||||
|
char[] openSymbols = { '[', '{', '(' };
|
||||||
|
|
||||||
|
// You'll use a slightly different technique for iterating through
|
||||||
|
// the characters in the string. This time, use the closing
|
||||||
|
// position of the previous iteration as the starting index for the
|
||||||
|
//next open symbol. So, you need to initialize the closingPosition
|
||||||
|
// variable to zero:
|
||||||
|
int closingPosition = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int openingPosition = message.IndexOfAny(openSymbols, closingPosition);
|
||||||
|
if (openingPosition == -1) break;
|
||||||
|
string currentSymbol = message.Substring(openingPosition, 1);
|
||||||
|
// Now find the matching closing symbol
|
||||||
|
char matchingSymbol = ' ';
|
||||||
|
switch (currentSymbol) {
|
||||||
|
case "[":
|
||||||
|
matchingSymbol = ']';
|
||||||
|
break;
|
||||||
|
case "{":
|
||||||
|
matchingSymbol = '}';
|
||||||
|
break;
|
||||||
|
case "(":
|
||||||
|
matchingSymbol = ')';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// To find the closingPosition, use an overload of the IndexOf method to specify
|
||||||
|
// that the search for the matchingSymbol should start at the openingPosition in the string.
|
||||||
|
openingPosition += 1;
|
||||||
|
closingPosition = message.IndexOf(matchingSymbol, openingPosition);
|
||||||
|
|
||||||
|
// Finally, use the techniques you've already learned to display the sub-string:
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Take a few minutes to examine the previous code and to read the comments that
|
||||||
|
help explain the code.
|
||||||
|
|
||||||
|
Continue examining the code and locate the following line of code using
|
||||||
|
`IndexOf()` to define `closingPosition`:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
closingPosition = message.IndexOf(matchingSymbol, openingPosition);
|
||||||
|
```
|
||||||
|
|
||||||
|
The variable `closingPosition` is used to find the length passed into the
|
||||||
|
`Substring()` method, and to find the next `openingPosition` value:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
int openingPosition = message.IndexOfAny(openSymbols, closingPosition);
|
||||||
|
```
|
||||||
|
|
||||||
|
For this reason, the `closingPosition` variable is defined outside of the
|
||||||
|
`while` loop scope and initialized to `0` for the first iteration.
|
||||||
|
|
||||||
|
Save your code file, and then run your code. You should see the following
|
||||||
|
output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
What if
|
||||||
|
different symbols
|
||||||
|
open symbol
|
||||||
|
matching closing symbol
|
||||||
|
```
|
||||||
|
|
||||||
|
### Recap
|
||||||
|
|
||||||
|
Here are two important things to remember:
|
||||||
|
|
||||||
|
- `LastIndexOf()` returns the last position of a character or string inside of
|
||||||
|
another string.
|
||||||
|
- `IndexOfAny()` returns the first position of an array of char that occurs
|
||||||
|
inside of another string.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Exercise
|
||||||
|
|
||||||
|
### Use the Remove() and Replace() methods
|
||||||
|
|
||||||
|
In this exercise, you remove characters from a string using the `Remove()`
|
||||||
|
method and replace characters using the `Replace()` method.
|
||||||
|
|
||||||
|
Sometimes, you need to modify the contents of a string, removing or replacing
|
||||||
|
characters. While you could replace characters with the tools you already know,
|
||||||
|
it requires a bit of temporarily storing and stitching strings back together.
|
||||||
|
Fortunately, the `string` data type has other built-in methods, `Remove()` and
|
||||||
|
`Replace()`, for these specialized scenarios.
|
||||||
|
|
||||||
|
### Use the Remove() method
|
||||||
|
|
||||||
|
You would typically use `Remove()` when there's a standard and consistent
|
||||||
|
position of the characters you want to remove from the string.
|
||||||
|
|
||||||
|
This exercise has data stored in older files having a fixed length, and with
|
||||||
|
character positions allocated for certain fields of information. The first five digits represent a customer identification number. The next 20 digits contain a customer's name. The next six positions represent the customer's latest invoice amount, and the last three positions represent the number of items ordered on that invoice.
|
||||||
|
|
||||||
|
In the following steps, you need to remove the customer's name to format the
|
||||||
|
data so that it can be sent to a separate process. Since you know the exact
|
||||||
|
position and length of the user's name, you can easily remove it using the
|
||||||
|
`Remove()` method.
|
||||||
|
|
||||||
|
#### Remove characters in specific locations from a string
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string data = "12345John Smith 5000 3 ";
|
||||||
|
string updatedData = data.Remove(5, 20);
|
||||||
|
Console.WriteLine(updatedData);
|
||||||
|
```
|
||||||
|
|
||||||
|
The Program.cs file must be saved before building or running the code.
|
||||||
|
|
||||||
|
At the Terminal command prompt, to run your code, type `dotnet run` and then
|
||||||
|
press Enter.
|
||||||
|
|
||||||
|
You should see the following output:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
123455000 3
|
||||||
|
```
|
||||||
|
|
||||||
|
The `Remove()` method works similarly to the `Substring()` method. You supply a
|
||||||
|
starting position and the length to remove those characters from the string.
|
||||||
|
|
||||||
|
### Use the `Replace()` method
|
||||||
|
|
||||||
|
The `Replace()` method is used when you need to replace one or more characters
|
||||||
|
with a different character (or no character). The `Replace()` method is
|
||||||
|
different from the other methods used so far, it replaces every instance of the
|
||||||
|
given characters, not just the first or last instance.
|
||||||
|
|
||||||
|
#### Remove characters no matter where they appear in a string
|
||||||
|
|
||||||
|
Update your code as follows:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
string message = "This--is--ex-amp-le--da-ta";
|
||||||
|
message = message.Replace("--", " ");
|
||||||
|
message = message.Replace("-", "");
|
||||||
|
Console.WriteLine(message);
|
||||||
|
```
|
||||||
|
|
||||||
|
Save your code file, and then run your code.
|
||||||
|
|
||||||
|
You should see the following output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
This is example data
|
||||||
|
```
|
||||||
|
|
||||||
|
Rere you used the `Replace()` method twice. The first time you replaced the
|
||||||
|
string `--` with a space. The second time you replaced the string `-` with an
|
||||||
|
empty string, which completely removes the character from the string.
|
||||||
|
|
||||||
|
### Recap
|
||||||
|
|
||||||
|
Here are two important things to remember:
|
||||||
|
|
||||||
|
- The `Remove()` method works like the `Substring()` method, except that it
|
||||||
|
deletes the specified characters in the string.
|
||||||
|
- The `Replace()` method swaps all instances of a string with a new string.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Exercise
|
||||||
|
|
||||||
|
### Complete a challenge to extract, replace, and remove data from an input string
|
||||||
|
|
||||||
|
Code challenges reinforce learning and help you gain some confidence before
|
||||||
|
continuing on.
|
||||||
|
|
||||||
|
In this challenge, you work with a string that contains a fragment of HTML. You
|
||||||
|
extract data from the HTML fragment, replace some of its content, and remove
|
||||||
|
other parts of its content to achieve the desired output.
|
||||||
|
|
||||||
|
If you're unfamiliar with HTML code, review the
|
||||||
|
["Quick HTML primer"](https://learn.microsoft.com/en-us/training/modules/csharp-modify-content/5-exercise-challenge-extract-replace-remove-data#quick-html-primer)
|
||||||
|
section at the end of this unit.
|
||||||
|
|
||||||
|
#### Extract, replace, and remove data from an input string
|
||||||
|
|
||||||
|
In code editor, add the following "starter" code to get the data for the
|
||||||
|
challenge:
|
||||||
|
|
||||||
|
```cs
|
||||||
|
const string input = "<div><h2>Widgets ™</h2><span>5000</span></div>";
|
||||||
|
|
||||||
|
string quantity = "";
|
||||||
|
string output = "";
|
||||||
|
|
||||||
|
// Your work here
|
||||||
|
|
||||||
|
Console.WriteLine(quantity);
|
||||||
|
Console.WriteLine(output);
|
||||||
|
```
|
||||||
|
|
||||||
|
If you run the code the output displays blank lines, the starting values for
|
||||||
|
quantity and output are empty string values.
|
||||||
|
|
||||||
|
Take a minute to review the initial line of the code containing a string of HTML.
|
||||||
|
|
||||||
|
```cs
|
||||||
|
const string input = "<div><h2>Widgets ™</h2><span>5000</span></div>";
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice the tags:`<div>`,`<h2>`,`<span>` and symbol code `™` contained in
|
||||||
|
the `input` variable.
|
||||||
|
|
||||||
|
Examine the desired output for the final program output:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
Quantity: 5000
|
||||||
|
Output: <h2>Widgets ®</h2><span>5000</span>
|
||||||
|
```
|
||||||
|
|
||||||
|
Begin adding your solution code to the starter code under the comment
|
||||||
|
`// Your work here`.
|
||||||
|
|
||||||
|
Set the `quantity` variable to the value obtained by extracting the text
|
||||||
|
between the `<span>` and `</span>` tags.
|
||||||
|
|
||||||
|
Set the output variable to the value of input, then remove the `<div>` and
|
||||||
|
`</div>` tags.
|
||||||
|
|
||||||
|
Replace the HTML character `™` (`™`) with `®` (`®`) in the `output`
|
||||||
|
variable.
|
||||||
|
|
||||||
|
Run your solution and verify the output put matches the expected output.
|
||||||
|
|
||||||
|
```txt
|
||||||
|
Quantity: 5000
|
||||||
|
Output: <h2>Widgets ®</h2><span>5000</span>
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether you get stuck and need to peek at the solution or you finish
|
||||||
|
successfully, continue on to view a solution to this challenge.
|
||||||
|
|
||||||
|
### Quick HTML primer
|
||||||
|
|
||||||
|
In case you're unfamiliar with HTML, it's the markup language that is used to
|
||||||
|
create all web pages. Skip this section if you have a good understanding of
|
||||||
|
HTML. The information is designed to provide enough information to complete
|
||||||
|
this challenge, and not to be a comprehensive HTML tutorial.
|
||||||
|
|
||||||
|
In HTML, you define the structure of a document using tags. A tag is composed
|
||||||
|
of:
|
||||||
|
|
||||||
|
- an opening angle bracket `<`
|
||||||
|
- a closing angle bracket `>`
|
||||||
|
- a word describing the type of tag, so for example: `<div>`, `<span>`, `<h2>`,
|
||||||
|
etc.
|
||||||
|
|
||||||
|
Each tag has a corresponding closing tag that introduces a forward slash
|
||||||
|
character `/.` So, if you see `<div>` there should be a corresponding `</div>`
|
||||||
|
tag.
|
||||||
|
|
||||||
|
The content between the opening and closing tag is the content of that tag. The
|
||||||
|
content can include text and other tags.
|
||||||
|
|
||||||
|
A set of tags can be embedded inside another set of tags, giving an HTML
|
||||||
|
document its hierarchical structure.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
|
||||||
|
Your goal was to extract, remove, and replace values in strings. Often, the
|
||||||
|
data you receive has extraneous data or characters that you need to avoid or
|
||||||
|
eliminate before you can use the target data.
|
||||||
|
|
||||||
|
Utilizing the `IndexOf()` method, enabled you to identify the position of a
|
||||||
|
character or string within another string. The position returned from the
|
||||||
|
`IndexOf()` method was the first building block to using the `Substring()`
|
||||||
|
method to extract a portion of a string given the starting position and the
|
||||||
|
number of characters to extract (the length). It also enabled you to use the
|
||||||
|
`Remove()` method to eliminate characters from a string given the starting
|
||||||
|
position and the length. You learned of variations like `LastIndexOf()` method
|
||||||
|
to find the last position of a character of string within another string, and
|
||||||
|
the `IndexOfAny()` to find the position of any value of a given `char` array.
|
||||||
|
You used the `while` statement to iterate through a longer string to find and
|
||||||
|
extract all instances of a character or string within a larger source string.
|
||||||
|
Finally, you used the `Replace()` method to swap all instances of a character
|
||||||
|
or string inside of a larger string.
|
||||||
|
|
||||||
|
While it might be possible to perform these kinds of operations using a `char`
|
||||||
|
array, iterating through each `char` to find matches, keeping track of the
|
||||||
|
starting and ending points you wanted to locate, and so on. It would take many
|
||||||
|
more steps to accomplish what these string helper methods can accomplish in a
|
||||||
|
single call.
|
24
024_String_data_type_methods/challenge/Program.cs
Normal file
24
024_String_data_type_methods/challenge/Program.cs
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
const string input = "<div><h2>Widgets ™</h2><span>5000</span></div>";
|
||||||
|
|
||||||
|
string quantity;
|
||||||
|
string output;
|
||||||
|
|
||||||
|
// Extract text inside the div tag
|
||||||
|
const string open_div = "<div>";
|
||||||
|
const string close_div = "</div>";
|
||||||
|
int open_div_end = input.IndexOf(open_div)+5;
|
||||||
|
int close_div_start = input.IndexOf(close_div);
|
||||||
|
int length = close_div_start - open_div_end;
|
||||||
|
output = input.Substring(open_div_end, length);
|
||||||
|
|
||||||
|
// Replace trademark symbol
|
||||||
|
output = output.Replace("&trade", "®");
|
||||||
|
|
||||||
|
// Extract quantity
|
||||||
|
int quantity_start = input.IndexOf("<span>") + 6;
|
||||||
|
int quantity_end = input.IndexOf("</span>");
|
||||||
|
length = quantity_end - quantity_start;
|
||||||
|
quantity = input.Substring(quantity_start, length);
|
||||||
|
|
||||||
|
Console.WriteLine($"Quantity: {quantity}");
|
||||||
|
Console.WriteLine($"Output: {output}");
|
10
024_String_data_type_methods/challenge/challenge.csproj
Normal file
10
024_String_data_type_methods/challenge/challenge.csproj
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
</Project>
|
127
024_String_data_type_methods/string_methods/Program.cs
Normal file
127
024_String_data_type_methods/string_methods/Program.cs
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
string message = "Find what is (inside the parentheses)";
|
||||||
|
|
||||||
|
int openingPosition = message.IndexOf('(');
|
||||||
|
int closingPosition = message.IndexOf(')');
|
||||||
|
|
||||||
|
Console.WriteLine(openingPosition);
|
||||||
|
Console.WriteLine(closingPosition);
|
||||||
|
|
||||||
|
int length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
|
||||||
|
openingPosition += 1;
|
||||||
|
length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
|
||||||
|
Console.WriteLine("\n--------------------------------\n");
|
||||||
|
message = "What is the value <span>between the tags</span>?";
|
||||||
|
|
||||||
|
const string openSpan = "<span>";
|
||||||
|
const string closeSpan = "</span>";
|
||||||
|
|
||||||
|
openingPosition = message.IndexOf(openSpan);
|
||||||
|
closingPosition = message.IndexOf(closeSpan);
|
||||||
|
|
||||||
|
openingPosition += 6;
|
||||||
|
length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
|
||||||
|
Console.WriteLine("\n--------------------------------\n");
|
||||||
|
|
||||||
|
message = "(What if) I am (only interested) in the last (set of parentheses)?";
|
||||||
|
openingPosition = message.LastIndexOf('(');
|
||||||
|
|
||||||
|
openingPosition += 1;
|
||||||
|
closingPosition = message.LastIndexOf(')');
|
||||||
|
length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
|
||||||
|
Console.WriteLine("\n--------------------------------\n");
|
||||||
|
|
||||||
|
message = "(What if) there are (more than) one (set of parentheses)?";
|
||||||
|
while (true) {
|
||||||
|
openingPosition = message.IndexOf('(');
|
||||||
|
if (openingPosition == -1) break;
|
||||||
|
openingPosition += 1;
|
||||||
|
closingPosition = message.IndexOf(')');
|
||||||
|
length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
// Note the overload of the Substring to return only the remaining
|
||||||
|
// unprocessed message:
|
||||||
|
message = message.Substring(closingPosition + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine("\n--------------------------------\n");
|
||||||
|
|
||||||
|
message = "Help (find) the {opening symbols}";
|
||||||
|
Console.WriteLine($"Searching THIS Message: {message}");
|
||||||
|
char[] openSymbols = { '[', '{', '(' };
|
||||||
|
int startPosition = 5;
|
||||||
|
openingPosition = message.IndexOfAny(openSymbols);
|
||||||
|
Console.WriteLine(
|
||||||
|
"Found WITHOUT using startPosition: " +
|
||||||
|
$"{message.Substring(openingPosition)}"
|
||||||
|
);
|
||||||
|
|
||||||
|
openingPosition = message.IndexOfAny(openSymbols, startPosition);
|
||||||
|
Console.WriteLine(
|
||||||
|
$"Found WITH using startPosition {startPosition}: " +
|
||||||
|
$"{message.Substring(openingPosition)}"
|
||||||
|
);
|
||||||
|
|
||||||
|
Console.WriteLine("\n--------------------------------\n");
|
||||||
|
|
||||||
|
message = "(What if) I have [different symbols] but every {open symbol} " +
|
||||||
|
"needs a [matching closing symbol]?";
|
||||||
|
|
||||||
|
// The IndexOfAny() helper method requires a char array of characters.
|
||||||
|
// You want to look for:
|
||||||
|
char[] openSymbols2 = { '[', '{', '(' };
|
||||||
|
|
||||||
|
// You'll use a slightly different technique for iterating through
|
||||||
|
// the characters in the string. This time, use the closing
|
||||||
|
// position of the previous iteration as the starting index for the
|
||||||
|
//next open symbol. So, you need to initialize the closingPosition
|
||||||
|
// variable to zero:
|
||||||
|
closingPosition = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
openingPosition = message.IndexOfAny(openSymbols2, closingPosition);
|
||||||
|
if (openingPosition == -1) break;
|
||||||
|
string currentSymbol = message.Substring(openingPosition, 1);
|
||||||
|
// Now find the matching closing symbol
|
||||||
|
char matchingSymbol = ' ';
|
||||||
|
switch (currentSymbol) {
|
||||||
|
case "[":
|
||||||
|
matchingSymbol = ']';
|
||||||
|
break;
|
||||||
|
case "{":
|
||||||
|
matchingSymbol = '}';
|
||||||
|
break;
|
||||||
|
case "(":
|
||||||
|
matchingSymbol = ')';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// To find the closingPosition, use an overload of the IndexOf method to specify
|
||||||
|
// that the search for the matchingSymbol should start at the openingPosition in the string.
|
||||||
|
openingPosition += 1;
|
||||||
|
closingPosition = message.IndexOf(matchingSymbol, openingPosition);
|
||||||
|
|
||||||
|
// Finally, use the techniques you've already learned to display the sub-string:
|
||||||
|
length = closingPosition - openingPosition;
|
||||||
|
Console.WriteLine(message.Substring(openingPosition, length));
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine("\n--------------------------------\n");
|
||||||
|
|
||||||
|
string data = "12345John Smith 5000 3 ";
|
||||||
|
string updatedData = data.Remove(5, 20);
|
||||||
|
Console.WriteLine(updatedData);
|
||||||
|
|
||||||
|
Console.WriteLine("\n--------------------------------\n");
|
||||||
|
|
||||||
|
message = "This--is--ex-amp-le--da-ta";
|
||||||
|
message = message.Replace("--", " ");
|
||||||
|
message = message.Replace("-", "");
|
||||||
|
Console.WriteLine(message);
|
@ -0,0 +1,10 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
</Project>
|
@ -28,3 +28,4 @@ Following
|
|||||||
21. [Convert data types](./021_Casting_and_conversion_techniques/021_csharp.md)
|
21. [Convert data types](./021_Casting_and_conversion_techniques/021_csharp.md)
|
||||||
22. [Array Operations](./022_array_operations/022_csharp.md)
|
22. [Array Operations](./022_array_operations/022_csharp.md)
|
||||||
23. [Format alphanumeric data](/023_alphanumeric_data_format/023_csharp.md)
|
23. [Format alphanumeric data](/023_alphanumeric_data_format/023_csharp.md)
|
||||||
|
24. [String data type methods](/024_String_data_type_methods/024_csharp.md)
|
||||||
|
Loading…
Reference in New Issue
Block a user