Introduction
This article describes a methodology to move data from old systems to SharePoint 2013, fast, free in the safest way.
You can also use this technique to move frontpage, static HTML, PHP, etc. to SharePoint 2013.
Prerequisites
Internet Explorer knowledge, C#, SharePoint 2013
Using the Code
Let's start with the classic Program.cs:
namespace SharePoint.Import.SpiderAgent {
static class Program {
[STAThread]
static void Main() {
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new SpiderAgent());
}
}
}
Now we need something to browse old Application Server, we are going to envelop "WebProxy
" class to avoid company security (in this way, you can bypass "legally" all security request):
namespace SharePoint.Import.SpiderAgent
{
public class PersonalWebClient: WebClient
{
public PersonalWebClient(string proxyUser, string proxyPassword)
{
try
{
this.UseDefaultCredentials = true;
if (!string.IsNullOrEmpty(proxyUser))
this.Proxy = setProxy(proxyUser, proxyPassword);
}
catch { }
}
static public WebProxy setProxy(string proxyUser, string proxyPassword)
{
string proxyDomain = "intranet proxy:8080";
WebProxy p = new WebProxy(proxyDomain, true);
p.Credentials = new System.Net.NetworkCredential(proxyUser, proxyPassword);
return p;
}
}
}
Now we need HtmlParse
a cool one :D
This code is an extract of spider, just to give an idea of algorithm:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.IO;
using System.Security.Cryptography;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using mshtml;
using System.Net;
using HtmlAgilityPack;
using System.Web;
using System.Xml.Linq;
using System.Xml;
using System.Net.Mime;
using System.Windows.Forms;
using System.Threading;
using System.Diagnostics;
namespace SharePoint.Import.SpiderAgent
{
public class HtmlParser
{
static SortedList<string, Uri> md5VisitedPages;
input = input.Replace(Environment.NewLine, " ").Replace('\t', ' ').Replace
(@" ", "").Replace(';', ' ').Trim();
RegexOptions options = RegexOptions.None;
Regex regex = new Regex(@"[ ]{2,}", options);
input = regex.Replace(input, @" ");
return input;
}
private void processHTMLBody(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url, LockerType lt, string looker)
{
int pushed = 0;
try
{
foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
string sUrl = att.Value;
string baseUrl = url.ToString();
if (type == "html" || type == "htm")
{
string str = baseUrl.Substring(0, baseUrl.ToLower().LastIndexOf('/') + 1) + sUrl;
FileOrFolder fof = new FileOrFolder(FileOrFolderType.Folder);
fof.title = RemoveSpecialCharacters(att.OwnerNode.InnerText).Trim();
if (String.IsNullOrEmpty(fof.title))
continue;
stack.Peek().Children.Add(fof);
stack.Push(fof);
fof.sourceUrl = url.ToString();
retrieveHTML(str, LockerType.Leaves, "*");
continue;
}
FileOrFolder fs = new FileOrFolder(FileOrFolderType.File);
fs.sourceUrl = baseUrl.Substring(0, baseUrl.LastIndexOf('/') + 1) + sUrl;
string fileTarget = prefixFilePath + "\\";
string strs = sUrl.Replace('/', '\\');
string fileName = strs.Substring(strs.LastIndexOf('\\') + 1);
fileTarget += fileName;
fs.fileName = fileName;
notify("Downloading " + fs.sourceUrl);
try
{
fs.title = RemoveSpecialCharacters(att.OwnerNode.InnerText).Trim();
string str = downloadFileAvoidDuplicates(fs, fileTarget);
if (string.IsNullOrEmpty(str))
continue;
fs.fileName = str;
stack.Peek().Children.Add(fs);
}
catch (Exception exe)
{
notify(exe, url.ToString());
}
}
for (int i = pushed; i > 0; i--)
stack.Pop();
}
catch (Exception exe)
{
try
{
notify(exe, url.ToString());
}
catch { }
}
}
private void processWeb(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url)
{
const string sResult = "/url?q=";
try
{
foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
string sUrl = att.Value;
if (!sUrl.StartsWith(sResult))
continue;
sUrl = sUrl.Substring(sResult.Length);
sUrl = sUrl.Substring(0, sUrl.IndexOf('&'));
if (bGoogle && (sUrl.ToLower().Contains("webcache")
|| sUrl.ToLower().Contains(@"q=related")))
continue;
FileOrFolder fof = new FileOrFolder(FileOrFolderType.Folder);
fof.title = RemoveSpecialCharacters(att.OwnerNode.InnerText).Trim();
if (String.IsNullOrEmpty(fof.title))
continue;
stack.Peek().Children.Add(fof);
stack.Push(fof);
fof.sourceUrl = url.ToString();
notify("Navigating " + sUrl);
retrieveHTML(sUrl, LockerType.Words, null);
stack.Pop();
}
}
catch (Exception exe)
{
try
{
notify(exe, url.ToString());
}
catch { }
}
}
private void processHTMLClass(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url, LockerType lt, string looker)
{
try
{
var nomedocumento = from foo in htmlDoc.DocumentNode.SelectNodes(looker) select foo;
foreach (var nodes in nomedocumento)
{
foreach (var childNode in nodes.ChildNodes)
{
string sValue = childNode.InnerText;
try
{
string link = childNode.InnerHtml;
link = link.Substring(9);
string tagType = string.Empty;
if (link.Contains("pdf.png") || link.Contains("pdf.gif"))
tagType = "pdf";
else if (link.Contains("link.png") ||
link.Contains("link.gif") || link.Contains("folder.gif"))
tagType = "link";
else if (link.Contains("txt.png") || link.Contains("txt.gif"))
tagType = "txt";
int pos = link.IndexOf(">");
link = link.Substring(0, pos - 1);
if (link.EndsWith("\" target=\"_blank"))
{
pos = link.LastIndexOf("\" target=\"_blank");
link = link.Substring(0, pos); { Uri a = new Uri(link, UriKind.Absolute); }
}
else { Uri a = new Uri(link, UriKind.Relative); }
notify("Parsing: " + sValue + " " + tagType + " " + link);
documents.Add(sValue);
documents.Add(tagType);
documents.Add(link);
}
catch
{
documents.Add(sValue);
}
}
notify("Adding: " + nodes.ParentNode.ChildNodes[1].InnerText);
documents.Add(nodes.ParentNode.ChildNodes[1].InnerText);
}
}
catch (Exception exe)
{
try
{
notify(exe, url.ToString());
}
catch { }
}
}
private void processLeaves(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url)
{
FileOrFolder f = new FileOrFolder(FileOrFolderType.Folder);
string[] prs = url.ToString().Split('/');
f.sourceUrl = url.ToString();
f.fileName = prs[prs.Length - 2];
stack.Peek().Children.Add(f);
stack.Push(f);
try
{
FileOrFolder folder = stack.Peek();
foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
string sUrl = url.ToString();
if (att.Value.ToLower().EndsWith("leaft.html"))
continue;
FileOrFolder fs = new FileOrFolder(FileOrFolderType.File);
fs.bLeaf = true;
fs.sourceUrl = sUrl.Substring(0, sUrl.ToLower().LastIndexOf("leaft.html")) + att.Value;
notify("Downloading " + fs.sourceUrl);
try
{
string str = downloadFile(fs.sourceUrl, prefixFilePath + "\\" + att.Value);
if (string.IsNullOrEmpty(str))
continue;
fs.fileName = str;
folder.Children.Add(fs);
}
catch (Exception exe)
{
notify(exe, fs.sourceUrl);
}
}
}
catch (Exception exe)
{
try
{
notify(exe, url.ToString());
}
catch { }
}
stack.Pop();
}
private void processHTMLTable(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url, LockerType lt, string looker)
{
try
{
string selector = "//table[@id='" + looker + "']";
var tableA = from table in htmlDoc.DocumentNode.SelectNodes(selector).Cast<HtmlNode>()
from row in table.SelectNodes("tr").Cast<HtmlNode>()
from cell in row.SelectNodes("th|td").Cast<HtmlNode>()
select cell;
foreach (var childNode in tableA)
{
string sValue = childNode.OuterHtml;
if (sValue.Contains("../immagini/pdf.gif"))
{
}
}
}
catch (Exception exe)
{
try
{
notify(exe, url.ToString());
}
catch { }
}
}
string extractAttribute(HtmlAgilityPack.HtmlNode element)
{
string link = string.Empty;
try
{
link = element.GetAttributeValue("href", null);
link = link.ToLower().Trim();
if (link.EndsWith(".css") ||
link.Contains("googleapis.com") ||
link == @"#"
)
{
link = string.Empty;
}
}
catch (Exception exe)
{
try
{
notify(exe, element.OuterHtml);
}
catch { }
}
return link;
}
private string prepareUrl(string iUrl)
{
if (string.IsNullOrEmpty(iUrl))
return null;
if (string.IsNullOrEmpty(iUrl.Trim()))
return null;
string[] seq = iUrl.Split('.');
string oUrl = null;
switch (seq.Length - 1)
{
case 0:
return null;
case 1:
oUrl = @"http://www." + iUrl;
break;
case 2:
if (iUrl.Contains("http"))
oUrl = iUrl;
else
oUrl = @"http://" + iUrl;
break;
default:
oUrl = iUrl;
break;
}
return oUrl;
}
private string TrasformSharePointUrl(string url)
{
url = url.Replace("Shared%20Documents/", "/");
url = url.Replace(".aspx", "/Forms/AllItems.aspx");
return url;
}
public static bool TestUrl(string proxyUser, string proxyPassword, string url)
{
try
{
using (PersonalWebClient client = new PersonalWebClient(proxyUser, proxyPassword))
using (Stream data = client.OpenRead(new Uri(url)))
using (StreamReader reader = new StreamReader(data))
{
string htmlContent = reader.ReadToEnd();
data.Close();
reader.Close();
}
}
catch
{
return false;
}
return true;
}
}
}
Complete Code and Consultant
On request, I'll send the complete code and I'll support you a couple of hours, please send me something from my Amazon wish list: http://www.amazon.it/registry/wishlist/3DUGGYP0KMLF8