解析剪贴板中的HTML内容

https://blog.clso.fun/posts/Parser-Clipboard-HTML.html

Imports System.Text
Imports System.Text.RegularExpressions
Imports System.IO
 
 
Shared Function ParserClipboardHTML() As String
 
    If Not Clipboard.ContainsText(TextDataFormat.Html) Then
        Return Nothing
    End If
 
    ' 获取剪贴板HTML原始数据
    Dim ms As MemoryStream = Clipboard.GetData("Html Format")
    ms.Position = 0
    Dim bs(ms.Length - 1) As Byte
    ms.Read(bs, 0, bs.Length)
 
    Dim html As String = Encoding.UTF8.GetString(bs)
    If html = Nothing Then Return Nothing
 
    ' 找到内容边界
    Dim startF As Integer = -1, endF As Integer = -1
    Using reader As New StreamReader(ms)
        ms.Position = 0
        Dim m As Match
        Do
            If reader.EndOfStream Then Exit Do
 
            m = Regex.Match(reader.ReadLine, "^(\w+)\s*\:\s*(.+)$", RegexOptions.IgnoreCase Or RegexOptions.Multiline)
            If m.Success Then
                Select Case m.Groups(1).Value
                    Case "StartFragment"
                        startF = CInt(m.Groups(2).Value)
                    Case "EndFragment"
                        endF = CInt(m.Groups(2).Value)
                End Select
            Else
                Exit Do
            End If
        Loop
    End Using
    ms.Close()
 
    If startF < 0 OrElse endF < 0 Then
        'Throw New Exception("无法找到剪贴内容的边界")
        Return Nothing
    End If
 
    ' 取出相应数据
    Dim bsnew(endF - startF - 1) As Byte
    Buffer.BlockCopy(bs, startF, bsnew, 0, bsnew.Length)
 
    Dim ret As String = Encoding.UTF8.GetString(bsnew)
    Return ret
End Function

最后更新于

这有帮助吗?