Files
number_zj/20220330_Vote/Vote.Services/Tools/HtmlHelper.cs
2023-01-11 15:25:18 +08:00

95 lines
3.7 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using Furion.RemoteRequest.Extensions;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace Vote.Services.Tools
{
public class HtmlHelper
{
/// <summary>方法一:比较推荐
/// 用HttpWebRequest取得网页源码
/// 对于带BOM的网页很有效不管是什么编码都能正确识别
/// </summary>
/// <param name="url">网页地址" </param>
/// <returns>返回网页源文件</returns>
public static string GetHtmlSource2(string url)
{
//处理内容
string html = "";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Accept = "*/*"; //接受任意文件
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
request.AllowAutoRedirect = true;//是否允许302
//request.CookieContainer = new CookieContainer();//cookie容器
request.Referer = url; //当前页面的引用
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
StreamReader reader = new StreamReader(stream, Encoding.Default);
html = reader.ReadToEnd();
stream.Close();
return html;
}
//public static string CleanHtml(string strHtml)
//{
// var r = new Regex(@"?[^>]*>", RegexOptions.IgnoreCase);
// Match m;
// for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
// {
// strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
// }
// return strHtml.Trim();
//}
/// <summary>
/// 去掉HTML中的所有标签,只留下纯文本
/// </summary>
/// <param name="strHtml"></param>
/// <returns></returns>
public static string CleanHtml(string strHtml)
{
if (string.IsNullOrEmpty(strHtml)) return strHtml;
//删除脚本
//Regex.Replace(strHtml, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase)
strHtml = Regex.Replace(strHtml, "(\\<script(.+?)\\</script\\>)|(\\<style(.+?)\\</style\\>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
//删除标签
var r = new Regex(@"</?[^>]*>", RegexOptions.IgnoreCase);
Match m;
for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
{
strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
}
return strHtml.Trim();
}
public static string HttpRequestGetImageBase64(string Url, int TimeOut = 100000)
{
try
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(Url);
request.ContentType = "application/x-www-form-urlencoded";
request.Method = "Get";
request.Timeout = TimeOut;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream responseStream = response.GetResponseStream();
MemoryStream memoryStream = new MemoryStream();
responseStream.CopyTo(memoryStream);
byte[] buff = new byte[memoryStream.Length];
memoryStream.Position = 0;
memoryStream.Read(buff, 0, (int)memoryStream.Length);
memoryStream.Close();
return Convert.ToBase64String(buff);
}
catch (Exception ex)
{
return ex.Message;
}
}
}
}