解析剪贴板中的HTML内容
https://blog.clso.fun/posts/Parser-Clipboard-HTML.html
Imports System.Text
Imports System.Text.RegularExpressions
Imports System.IO
Shared Function ParserClipboardHTML() As String
If Not Clipboard.ContainsText(TextDataFormat.Html) Then
Return Nothing
End If
' 获取剪贴板HTML原始数据
Dim ms As MemoryStream = Clipboard.GetData("Html Format")
ms.Position = 0
Dim bs(ms.Length - 1) As Byte
ms.Read(bs, 0, bs.Length)
Dim html As String = Encoding.UTF8.GetString(bs)
If html = Nothing Then Return Nothing
' 找到内容边界
Dim startF As Integer = -1, endF As Integer = -1
Using reader As New StreamReader(ms)
ms.Position = 0
Dim m As Match
Do
If reader.EndOfStream Then Exit Do
m = Regex.Match(reader.ReadLine, "^(\w+)\s*\:\s*(.+)$", RegexOptions.IgnoreCase Or RegexOptions.Multiline)
If m.Success Then
Select Case m.Groups(1).Value
Case "StartFragment"
startF = CInt(m.Groups(2).Value)
Case "EndFragment"
endF = CInt(m.Groups(2).Value)
End Select
Else
Exit Do
End If
Loop
End Using
ms.Close()
If startF < 0 OrElse endF < 0 Then
'Throw New Exception("无法找到剪贴内容的边界")
Return Nothing
End If
' 取出相应数据
Dim bsnew(endF - startF - 1) As Byte
Buffer.BlockCopy(bs, startF, bsnew, 0, bsnew.Length)
Dim ret As String = Encoding.UTF8.GetString(bsnew)
Return ret
End Function
最后更新于
这有帮助吗?