Accepting Revisions, Removing Comments, and Removing Personal Info from a SharePoint Document Library using Web Services

Now that we’ve covered the basics of making use of SharePoint web services using the Open XML SDK and LINQ to XML (in this and this post), it is pretty trivial to extend the example presented in the last post

This blog is inactive.
New blog: EricWhite.com/blog

Blog TOCNote: at this point, this code only 'cleans' word processing documents.  It is pretty simple to extend this example to work on spreadsheets and presentations.  I'll present this in an upcoming post.

To get started with using SharePoint web services, see:

Getting Started with SharePoint (WSS) Web Services using LINQ to XML

For details about modifying a document in a document library using web services, see:

Modifying Open XML Documents that are in SharePoint Document Libraries using Web Services

For this sample, I used the OpenXmlInfo class that I presented in this post

public static bool InspectForComments(WordprocessingDocument document)
public static bool InspectForRevisions(WordprocessingDocument doc)
public static bool InspectForPersonalInfo(WordprocessingDocument document)

public static void RemoveComments(WordprocessingDocument document)
public static void AcceptRevisions(WordprocessingDocument doc)
public static void RemovePersonalInfo(WordprocessingDocument document)

These methods are based on code that is presented in the following blog posts:

Remove Comments from an Open XML Document

Remove Personal Information from an Open XML Document

Accept Revisions in an Open XML Document

Here is the listing of the sample.  For convenience, the code is also attached to this post.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.IO;
using DocumentFormat.OpenXml.Packaging;
using OpenXmlTechnologyDemo;

namespace CleanDocLib
{
public static class MyExtensions
{
public static XDocument GetXDocument(this OpenXmlPart part)
{
XDocument xdoc = part.Annotation<XDocument>();
if (xdoc != null)
return xdoc;
using (StreamReader sr = new StreamReader(part.GetStream()))
using (XmlReader xr = XmlReader.Create(sr))
xdoc = XDocument.Load(xr);
part.AddAnnotation(xdoc);
return xdoc;
}

public static void PutXDocument(this OpenXmlPart part)
{
XDocument xdoc = part.GetXDocument();
if (xdoc != null)
{
// Serialize the XDocument object back to the package.
using (XmlWriter xw =
XmlWriter.Create(part.GetStream
(FileMode.Create, FileAccess.Write)))
{
xdoc.Save(xw);
}
}
}

public static string StringConcatenate(
this IEnumerable<string> source)
{
return source.Aggregate(
new StringBuilder(),
(s, i) => s.Append(i),
s => s.ToString());
}

public static XElement GetXElement(this XmlNode node)
{
XDocument xDoc = new XDocument();
using (XmlWriter xmlWriter = xDoc.CreateWriter())
node.WriteTo(xmlWriter);
return xDoc.Root;
}

public static XmlNode GetXmlNode(this XElement element)
{
using (XmlReader xmlReader = element.CreateReader())
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load(xmlReader);
return xmlDoc;
}
}

public static string ToStringAlignAttributes(this XElement element)
{
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.OmitXmlDeclaration = true;
settings.NewLineOnAttributes = true;
StringBuilder stringBuilder = new StringBuilder();
using (XmlWriter xmlWriter = XmlWriter.Create(stringBuilder, settings))
element.WriteTo(xmlWriter);
return stringBuilder.ToString();
}
}

class Program
{
static void Main(string[] args)
{
string documentLibraryName = "Open XML Documents";

XNamespace s = "https://schemas.microsoft.com/sharepoint/soap/";
XNamespace rs = "urn:schemas-microsoft-com:rowset";
XNamespace z = "#RowsetSchema";

// Make sure that you use the correct namespace, as well as the correct reference
// name. The namespace (by default) is the same as the name of the application
// when you created it. You specify the reference name in the Add Web Reference
// dialog box.
//
// Namespace Reference Name
// | |
// V V
CleanDocLib.ListsWebService.Lists lists =
new CleanDocLib.ListsWebService.Lists();

// Fix Namespace and Reference Name for the Copy web service too
CleanDocLib.CopyWebService.Copy copy =
new CleanDocLib.CopyWebService.Copy();

// Make sure that you update the following URL to point to the
// Lists web service for your SharePoint site.
lists.Url = "https://localhost/_vti_bin/Lists.asmx";

lists.Credentials = System.Net.CredentialCache.DefaultCredentials;
copy.Credentials = System.Net.CredentialCache.DefaultCredentials;

XElement listCollection = lists.GetListCollection().GetXElement();

// get the node for the library that we want
XElement library = listCollection
.Elements(s + "List")
.Where(l => (string)l.Attribute("Title") == documentLibraryName)
.FirstOrDefault();

if (library == null)
{
Console.WriteLine("Library {0} doesn't exist.", documentLibraryName);
Environment.Exit(0);
}

// get the ID of the library
string libId = (string)library.Attribute("ID");

// get the XElement for the row that contains info about the document
// that we want to modify
XElement queryOptions = new XElement("QueryOptions",
new XElement("Folder"),
new XElement("IncludeMandatoryColumns", false)
);
XElement viewFields = new XElement("ViewFields");
var listItems = lists.GetListItems(libId, "", null,
viewFields.GetXmlNode(), "", queryOptions.GetXmlNode(), "")
.GetXElement()
.Descendants(z + "row")
.Select(x =>
new {
Node = x,
ContentType = (string)x.Attribute("ows_ContentType"),
LinkFilename = (string)x.Attribute("ows_LinkFilename")
}
)
.Where(x => x.ContentType != null &&
x.LinkFilename != null &&
x.ContentType == "Document" &&
x.LinkFilename.ToLower().EndsWith(".docx"));

int[] tabs = { 30, 10, 10, 20, 20 };
Console.WriteLine("{0}{1}{2}{3}{4}",
"Document Name".PadRight(tabs[0]),
"Comments".PadRight(tabs[1]),
"Revisions".PadRight(tabs[2]),
"Personal Info".PadRight(tabs[3]),
"Author");
foreach (var item in listItems)
{
// get the document from the doc library as a byte array
string url = item.Node.Attribute("ows_EncodedAbsUrl").Value;

CleanDocLib.CopyWebService.FieldInformation[] fields;
byte[] byteArray;
copy.GetItem(url, out fields, out byteArray);

// create a memory stream from the byte array
using (MemoryStream mem = new MemoryStream())
{
mem.Write(byteArray, 0, (int)byteArray.Length);
try
{
// create a WordprocessingDocument from the memory stream
using (WordprocessingDocument wordDoc =
WordprocessingDocument.Open(mem, true))
{
XNamespace w =
"https://schemas.openxmlformats.org/wordprocessingml/2006/main";

bool comments = OpenXmlInfo.InspectForComments(wordDoc);
bool revisions = OpenXmlInfo.InspectForRevisions(wordDoc);
bool personalInfo = OpenXmlInfo.InspectForPersonalInfo(wordDoc);
string author = OpenXmlInfo.GetAuthor(wordDoc);
Console.WriteLine("{0}{1}{2}{3}{4}",
item.LinkFilename.PadRight(tabs[0]),
comments.ToString().PadRight(tabs[1]),
revisions.ToString().PadRight(tabs[2]),
personalInfo.ToString().PadRight(tabs[3]),
author);

OpenXmlInfo.RemoveComments(wordDoc);
OpenXmlInfo.AcceptRevisions(wordDoc);
OpenXmlInfo.RemovePersonalInfo(wordDoc);
}

// use the Copy web service to save the document back to the
// document library.
string[] urls = { url };
CleanDocLib.CopyWebService.CopyResult[] copyResults;
copy.CopyIntoItems(url, urls, fields, mem.ToArray(),
out copyResults);
}
catch (System.IO.FileFormatException e)
{
// document is invalid
Console.WriteLine("{0}{1}",
item.LinkFilename.PadRight(tabs[0]),
"*** Document is invalid ***");
}
}
}
}
}
}

CleanDocLib.zip