Complete Listing of ParseWordML - VB

[Table of Contents] [Next Topic]

The following code is attached to this page.

This blog is inactive.
New blog: EricWhite.com/blog

Blog TOC

Imports System.IO
Imports System.Xml
Imports System.Text
Imports DocumentFormat.OpenXml.Packaging

Public Class GroupOfAdjacent(Of TElement, TKey)
Implements IEnumerable(Of TElement)

Private _key As TKey
Private _groupList As List(Of TElement)

Public Property GroupList() As List(Of TElement)
Get
Return _groupList
End Get
Set(ByVal value As List(Of TElement))
_groupList = value
End Set
End Property

Public ReadOnly Property Key() As TKey
Get
Return _key
End Get
End Property

Public Function GetEnumerator() As System.Collections.Generic.IEnumerator(Of TElement) _
Implements System.Collections.Generic.IEnumerable(Of TElement).GetEnumerator
Return _groupList.GetEnumerator
End Function

Public Function GetEnumerator1() As System.Collections.IEnumerator _
Implements System.Collections.IEnumerable.GetEnumerator
Return _groupList.GetEnumerator
End Function

Public Sub New(ByVal key As TKey)
_key = key
_groupList = New List(Of TElement)
End Sub
End Class

Module Module1
<System.Runtime.CompilerServices.Extension()> _
Public Function GroupAdjacent(Of TElement, TKey)(ByVal source As IEnumerable(Of TElement), _
ByVal keySelector As Func(Of TElement, TKey)) As List(Of GroupOfAdjacent(Of TElement, TKey))
Dim lastKey As TKey = Nothing
Dim currentGroup As GroupOfAdjacent(Of TElement, TKey) = Nothing
Dim allGroups As List(Of GroupOfAdjacent(Of TElement, TKey)) = New List(Of GroupOfAdjacent(Of TElement, TKey))()
For Each item In source
Dim thisKey As TKey = keySelector(item)
If lastKey IsNot Nothing And Not thisKey.Equals(lastKey) Then
allGroups.Add(currentGroup)
End If
If Not thisKey.Equals(lastKey) Then
currentGroup = New GroupOfAdjacent(Of TElement, TKey)(keySelector(item))
End If
currentGroup.GroupList.Add(item)
lastKey = thisKey
Next
If lastKey IsNot Nothing Then
allGroups.Add(currentGroup)
End If
Return allGroups
End Function

<System.Runtime.CompilerServices.Extension()> _
Public Function GetPath(ByVal el As XElement) As String
Return el _
.AncestorsAndSelf _
.InDocumentOrder _
.Aggregate("", Function(seed, i) seed & "/" & i.Name.LocalName)
End Function

<System.Runtime.CompilerServices.Extension()> _
Function StringConcatenate(ByVal source As IEnumerable(Of String)) _
As String
Return source.Aggregate(New StringBuilder, _
Function(sb, i) sb.Append(i), _
Function(sb) sb.ToString)
End Function

<System.Runtime.CompilerServices.Extension()> _
Function StringConcatenate(Of T) _
(ByVal source As IEnumerable(Of T), ByVal projectionFunc As Func(Of T, String)) _
As String
Return source.Aggregate(New StringBuilder, _
Function(sb, i) sb.Append(projectionFunc(i)), _
Function(sb) sb.ToString)
End Function

Public Function LoadXDocument(ByVal part As OpenXmlPart) _
As XDocument
Using streamReader As StreamReader = New StreamReader(part.GetStream())
Using xmlReader As XmlReader = xmlReader.Create(streamReader)
Return XDocument.Load(xmlReader)
End Using
End Using
End Function

Public Function GetParagraphStyle(ByVal para As XElement, _
ByVal defaultStyle As String) As String
Dim w As XNamespace = _
"https://schemas.openxmlformats.org/wordprocessingml/2006/main"
Dim paraStyle = CStr(para.Elements(w + "pPr") _
.Elements(w + "pStyle") _
.Attributes(w + "val") _
.FirstOrDefault())
If (paraStyle Is Nothing) Then
Return defaultStyle
Else
Return paraStyle
End If
End Function

Public Function GetComment(ByVal commentsDoc As XDocument, ByVal p As XElement) As String
Dim w As XNamespace = _
"https://schemas.openxmlformats.org/wordprocessingml/2006/main"

Dim id = _
CStr(p.Elements(w + "commentRangeStart") _
.First() _
.Attribute(w + "id"))

Dim commentNode = commentsDoc.Root() _
.Elements(w + "comment") _
.Where(Function(c) CStr(c.Attribute(w + "id")) = id) _
.First()

Dim comment = commentNode _
.Elements(w + "p") _
.StringConcatenate(Function(node) node _
.Descendants(w + "t") _
.Select(Function(t) CStr(t)) _
.StringConcatenate() & "\n")

Return comment
End Function

Sub Main()
Dim w As XNamespace = _
"https://schemas.openxmlformats.org/wordprocessingml/2006/main"
Dim filename As String = "SampleDoc.docx"
Using wordDoc As WordprocessingDocument = _
WordprocessingDocument.Open(filename, True)
Dim mainPart As MainDocumentPart = _
wordDoc.MainDocumentPart
Dim styleDefinitionPart As StyleDefinitionsPart = _
mainPart.StyleDefinitionsPart
Dim commentsPart As WordprocessingCommentsPart = _
mainPart.WordprocessingCommentsPart
Dim mainPartDoc As XDocument = LoadXDocument(mainPart)
Dim styleDoc As XDocument = LoadXDocument(styleDefinitionPart)
Dim commentsDoc As XDocument = LoadXDocument(commentsPart)

Dim defaultStyle As String = _
CStr( _
( _
From style In styleDoc.Root _
.Elements(w + "style") _
Where ( _
CStr(style.Attribute(w + "type")) = "paragraph" And _
CStr(style.Attribute(w + "default")) = "1") _
) _
.First() _
.Attribute(w + "styleId") _
)

Dim paragraphs = _
mainPartDoc.Root _
.Element(w + "body") _
.Descendants(w + "p") _
.Select(Function(p) _
New With { _
.ParagraphNode = p, _
.Style = GetParagraphStyle(p, defaultStyle) _
} _
)

Dim r As XName = w + "r"
Dim ins As XName = w + "ins"

Dim paragraphsWithText = _
paragraphs.Select(Function(p) _
New With { _
.ParagraphNode = p.ParagraphNode, _
.Style = p.Style, _
.Text = p.ParagraphNode _
.Elements() _
.Where(Function(z) z.Name = r Or z.Name = ins) _
.Descendants(w + "t") _
.StringConcatenate(Function(s) CStr(s)) _
} _
)

Dim groupedCodeParagraphs = paragraphsWithText _
.GroupAdjacent(Function(p) p.Style) _
.Where(Function(g) g.Key = "Code")

Dim groupedCodeWithComments = _
groupedCodeParagraphs.Select(Function(g) New With { _
.ParagraphGroup = g, _
.Comment = GetComment(commentsDoc, g.First().ParagraphNode) _
} _
)

For Each group In groupedCodeWithComments
Console.WriteLine("Code Block")
Console.WriteLine("==========")
For Each paragraph In group.ParagraphGroup
Console.WriteLine(paragraph.Text)
Next
Console.WriteLine()
Console.WriteLine("Meta Data")
Console.WriteLine("=========")
Console.WriteLine(group.Comment)
Console.WriteLine()
Next
End Using
End Sub
End Module

[Table of Contents] [Next Topic] [Blog Map]

ParseWordML.cs