95 lines
3.7 KiB
C#
95 lines
3.7 KiB
C#
using Furion.RemoteRequest.Extensions;
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Net;
|
||
using System.Text;
|
||
using System.Text.RegularExpressions;
|
||
using System.Threading.Tasks;
|
||
|
||
namespace Vote.Services.Tools
|
||
{
|
||
public class HtmlHelper
|
||
{
|
||
|
||
/// <summary>方法一:比较推荐
|
||
/// 用HttpWebRequest取得网页源码
|
||
/// 对于带BOM的网页很有效,不管是什么编码都能正确识别
|
||
/// </summary>
|
||
/// <param name="url">网页地址" </param>
|
||
/// <returns>返回网页源文件</returns>
|
||
public static string GetHtmlSource2(string url)
|
||
{
|
||
//处理内容
|
||
string html = "";
|
||
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
|
||
request.Accept = "*/*"; //接受任意文件
|
||
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
|
||
request.AllowAutoRedirect = true;//是否允许302
|
||
//request.CookieContainer = new CookieContainer();//cookie容器,
|
||
request.Referer = url; //当前页面的引用
|
||
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
|
||
Stream stream = response.GetResponseStream();
|
||
StreamReader reader = new StreamReader(stream, Encoding.Default);
|
||
html = reader.ReadToEnd();
|
||
stream.Close();
|
||
return html;
|
||
}
|
||
//public static string CleanHtml(string strHtml)
|
||
//{
|
||
// var r = new Regex(@"?[^>]*>", RegexOptions.IgnoreCase);
|
||
// Match m;
|
||
// for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
|
||
// {
|
||
// strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
|
||
// }
|
||
// return strHtml.Trim();
|
||
//}
|
||
/// <summary>
|
||
/// 去掉HTML中的所有标签,只留下纯文本
|
||
/// </summary>
|
||
/// <param name="strHtml"></param>
|
||
/// <returns></returns>
|
||
|
||
public static string CleanHtml(string strHtml)
|
||
{
|
||
if (string.IsNullOrEmpty(strHtml)) return strHtml;
|
||
//删除脚本
|
||
//Regex.Replace(strHtml, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase)
|
||
strHtml = Regex.Replace(strHtml, "(\\<script(.+?)\\</script\\>)|(\\<style(.+?)\\</style\\>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||
//删除标签
|
||
var r = new Regex(@"</?[^>]*>", RegexOptions.IgnoreCase);
|
||
Match m;
|
||
for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
|
||
{
|
||
strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
|
||
}
|
||
return strHtml.Trim();
|
||
}
|
||
public static string HttpRequestGetImageBase64(string Url, int TimeOut = 100000)
|
||
{
|
||
try
|
||
{
|
||
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(Url);
|
||
request.ContentType = "application/x-www-form-urlencoded";
|
||
request.Method = "Get";
|
||
request.Timeout = TimeOut;
|
||
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
|
||
Stream responseStream = response.GetResponseStream();
|
||
MemoryStream memoryStream = new MemoryStream();
|
||
responseStream.CopyTo(memoryStream);
|
||
byte[] buff = new byte[memoryStream.Length];
|
||
memoryStream.Position = 0;
|
||
memoryStream.Read(buff, 0, (int)memoryStream.Length);
|
||
memoryStream.Close();
|
||
return Convert.ToBase64String(buff);
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
return ex.Message;
|
||
}
|
||
}
|
||
}
|
||
}
|