VB.NET 正则表达式完全指南
VB.NET通过 System.Text.RegularExpressions
命名空间提供正则表达式支持。本指南将详细介绍VB.NET中正则表达式的使用方法、性能优化和最佳实践。
1. 基础知识
1.1 导入命名空间
Imports System.Text.RegularExpressions
1.2 基本使用
Public Class RegexBasics
Public Sub BasicExamples()
Dim text As String = "Hello, my phone is 123-456-7890"
' 创建正则表达式对象
Dim regex As New Regex("\d+")
' 检查是否匹配
Dim isMatch As Boolean = regex.IsMatch(text)
' 查找第一个匹配
Dim match As Match = regex.Match(text)
If match.Success Then
Console.WriteLine($"Found: {match.Value}")
End If
' 查找所有匹配
Dim matches As MatchCollection = regex.Matches(text)
For Each m As Match In matches
Console.WriteLine($"Found: {m.Value}")
Next
End Sub
End Class
1.3 正则表达式选项
Public Class RegexOptions
Public Sub OptionsExample()
' 不区分大小写
Dim caseInsensitive As New Regex("hello", RegexOptions.IgnoreCase)
' 多行模式
Dim multiline As New Regex("^start", RegexOptions.Multiline)
' 忽略空白字符和注释
Dim ignored As New Regex("
\d+ # 匹配数字
\s* # 可选的空白字符
\w+ # 匹配单词
", RegexOptions.IgnorePatternWhitespace)
' 编译正则表达式以提高性能
Dim compiled As New Regex("\d+", RegexOptions.Compiled)
End Sub
End Class
2. 正则表达式语法
2.1 字符匹配
Public Class CharacterMatching
Public Sub MatchingExamples()
Dim text As String = "VB.NET 2022 is awesome! Price: $99.99"
' 匹配数字
Dim digits As New Regex("\d+")
For Each m As Match In digits.Matches(text)
Console.WriteLine($"Number: {m.Value}")
Next
' 匹配单词
Dim words As New Regex("\w+")
Dim wordMatches = words.Matches(text).
Cast(Of Match)().
Select(Function(m) m.Value).
ToList()
' 匹配空白字符
Dim parts() As String = Regex.Split(text, "\s+")
' 自定义字符类
Dim vowels As New Regex("[aeiou]", RegexOptions.IgnoreCase)
Dim vowelMatches = vowels.Matches(text).
Cast(Of Match)().
Select(Function(m) m.Value).
ToList()
End Sub
End Class
2.2 分组和捕获
Public Class GroupingExample
Public Sub GroupExamples()
Dim text As String = "John Smith, Jane Doe, Bob Johnson"
' 基本分组
Dim regex As New Regex("(\w+)\s(\w+)")
For Each match As Match In regex.Matches(text)
Console.WriteLine($"Full name: {match.Groups(0).Value}")
Console.WriteLine($"First name: {match.Groups(1).Value}")
Console.WriteLine($"Last name: {match.Groups(2).Value}")
Next
' 命名分组
Dim namedRegex As New Regex("(?<first>\w+)\s(?<last>\w+)")
For Each match As Match In namedRegex.Matches(text)
Console.WriteLine($"First: {match.Groups("first").Value}")
Console.WriteLine($"Last: {match.Groups("last").Value}")
Next
End Sub
End Class
3. 高级特性
3.1 替换操作
Public Class ReplacementOperations
Public Function ReplaceExample(text As String) As String
' 简单替换
Dim result1 As String = Regex.Replace(text, "\d+", "X")
' 使用委托
Dim result2 As String = Regex.Replace(text, "\d+", _
Function(match As Match)
Dim number As Integer = Integer.Parse(match.Value)
Return (number * 2).ToString()
End Function)
' 使用命名组的替换
Dim regex As New Regex("(?<first>\w+)\s(?<last>\w+)")
Dim result3 As String = regex.Replace(text, "${last}, ${first}")
Return result3
End Function
End Class
3.2 前瞻和后顾
Public Class LookAroundExample
Public Sub LookAroundDemo()
Dim text As String = "Price: $100, Cost: $50"
' 正向前瞻
Dim positiveAhead As New Regex("\d+(?=\s*dollars)")
' 负向前瞻
Dim negativeAhead As New Regex("\d+(?!\s*dollars)")
' 正向后顾
Dim positiveBehind As New Regex("(?<=\$)\d+")
' 负向后顾
Dim negativeBehind As New Regex("(?<!\$)\d+")
End Sub
End Class
4. 实用工具类
4.1 验证器
Public Class Validator
Private Shared ReadOnly EmailRegex As New Regex(
"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
RegexOptions.Compiled)
Private Shared ReadOnly PhoneRegex As New Regex(
"^1[3-9]\d{9}$",
RegexOptions.Compiled)
Private Shared ReadOnly PasswordRegex As New Regex(
"^(?=.*[A-Za-z])(?=.*\d)[A-Za-z\d]{8,}$",
RegexOptions.Compiled)
Public Shared Function IsValidEmail(email As String) As Boolean
If String.IsNullOrEmpty(email) Then Return False
Return EmailRegex.IsMatch(email)
End Function
Public Shared Function IsValidPhone(phone As String) As Boolean
If String.IsNullOrEmpty(phone) Then Return False
Return PhoneRegex.IsMatch(phone)
End Function
Public Shared Function IsValidPassword(password As String) As Boolean
If String.IsNullOrEmpty(password) Then Return False
Return PasswordRegex.IsMatch(password)
End Function
End Class
4.2 文本处理器
Public Class TextProcessor
Private Shared ReadOnly UrlRegex As New Regex(
"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+[^\s]*",
RegexOptions.Compiled)
Private Shared ReadOnly HtmlTagRegex As New Regex(
"<[^>]+>",
RegexOptions.Compiled)
Public Shared Function ExtractUrls(text As String) As IEnumerable(Of String)
If String.IsNullOrEmpty(text) Then Return Enumerable.Empty(Of String)
Return UrlRegex.Matches(text).
Cast(Of Match)().
Select(Function(m) m.Value)
End Function
Public Shared Function StripHtmlTags(html As String) As String
If String.IsNullOrEmpty(html) Then Return String.Empty
Return HtmlTagRegex.Replace(html, String.Empty)
End Function
Public Shared Function CleanWhitespace(text As String) As String
If String.IsNullOrEmpty(text) Then Return String.Empty
Return Regex.Replace(text.Trim(), "\s+", " ")
End Function
End Class
5. 性能优化
5.1 静态编译正则表达式
Public Class RegexOptimization
' 使用共享字段存储编译后的正则表达式
Private Shared ReadOnly CompiledRegex As New Regex(
"\d+",
RegexOptions.Compiled)
' 使用延迟初始化
Private Shared ReadOnly LazyRegex As New Lazy(Of Regex)(
Function() New Regex("\d+", RegexOptions.Compiled))
Public Sub OptimizedExample()
' 使用编译后的正则表达式
Dim isMatch As Boolean = CompiledRegex.IsMatch("123")
' 使用延迟初始化的正则表达式
Dim lazyMatch As Boolean = LazyRegex.Value.IsMatch("123")
End Sub
End Class
5.2 性能考虑
Public Class PerformanceConsiderations
' 1. 使用适当的选项
Private Shared ReadOnly FastRegex As New Regex(
"\d+",
RegexOptions.Compiled Or RegexOptions.ExplicitCapture)
' 2. 避免过度使用通配符
Private Shared ReadOnly BetterRegex As New Regex(
"[^/]*foo[^/]*", ' 比 .*foo.* 更高效
RegexOptions.Compiled)
' 3. 使用非捕获组
Private Shared ReadOnly NonCapturingRegex As New Regex(
"(?:\d+)(?:[a-z]+)", ' 使用(?:)表示非捕获组
RegexOptions.Compiled)
End Class
6. 异常处理
Public Class RegexExceptionHandling
Public Shared Function CreateSafeRegex(pattern As String) As Regex
Try
Return New Regex(pattern, RegexOptions.Compiled)
Catch ex As ArgumentException
Throw New ArgumentException($"Invalid regex pattern: {ex.Message}", ex)
End Try
End Function
Public Shared Function SafeIsMatch(input As String, pattern As String) As Boolean
Try
Return Regex.IsMatch(input, pattern)
Catch ex As RegexMatchTimeoutException
Console.WriteLine($"Regex matching timed out: {ex.Message}")
Return False
Catch ex As ArgumentException
Console.WriteLine($"Invalid regex pattern: {ex.Message}")
Return False
End Try
End Function
End Class
7. 单元测试
<TestClass>
Public Class ValidatorTests
<TestMethod>
Public Sub TestEmailValidation()
Assert.IsTrue(Validator.IsValidEmail("test@example.com"))
Assert.IsTrue(Validator.IsValidEmail("user@domain.co.uk"))
Assert.IsFalse(Validator.IsValidEmail("invalid.email"))
Assert.IsFalse(Validator.IsValidEmail("@domain.com"))
End Sub
<TestMethod>
Public Sub TestPhoneValidation()
Assert.IsTrue(Validator.IsValidPhone("13812345678"))
Assert.IsFalse(Validator.IsValidPhone("12345678"))
Assert.IsFalse(Validator.IsValidPhone("2381234567"))
End Sub
<TestMethod>
Public Sub TestTextProcessing()
Dim html As String = "<p>Hello</p><div>World</div>"
Assert.AreEqual(
"HelloWorld",
TextProcessor.StripHtmlTags(html))
Dim text As String = " multiple spaces here "
Assert.AreEqual(
"multiple spaces here",
TextProcessor.CleanWhitespace(text))
End Sub
End Class
总结
VB.NET的正则表达式实现具有以下特点:
- 完整的.NET正则表达式引擎支持
- 编译选项提供高性能
- LINQ集成
- 完整的Unicode支持
最佳实践:
- 使用共享(Shared)编译的Regex对象提高性能
- 合理使用RegexOptions
- 处理超时和异常情况
- 编写完整的单元测试
- 使用命名捕获组提高可读性
注意事项:
- Regex对象创建开销大,应该重用
- 考虑使用Compiled选项提高性能
- 处理RegexMatchTimeoutException
- 注意内存使用
记住:在VB.NET中使用正则表达式时,要充分利用.NET框架提供的功能。VB.NET的语法可能与C#略有不同,但底层的正则表达式引擎是相同的。合理使用静态编译和缓存可以显著提高性能。