增加下载微信公众号文章功能

2023-01-11 15:25:18 +08:00
parent ac3999951d
commit 57129b3861
732 changed files with 23827 additions and 1 deletions
--- a/20220330_Vote/Vote.Services/Tools/HtmlHelper.cs
+++ b/20220330_Vote/Vote.Services/Tools/HtmlHelper.cs
@@ -0,0 +1,94 @@
+using Furion.RemoteRequest.Extensions;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Net;
+using System.Text;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+
+namespace Vote.Services.Tools
+{
+    public class HtmlHelper
+    {
+
+        /// <summary>方法一：比较推荐
+        /// 用HttpWebRequest取得网页源码
+        /// 对于带BOM的网页很有效，不管是什么编码都能正确识别
+        /// </summary>
+        /// <param name="url">网页地址" </param>
+        /// <returns>返回网页源文件</returns>
+        public static string GetHtmlSource2(string url)
+        {
+            //处理内容
+            string html = "";
+            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
+            request.Accept = "*/*"; //接受任意文件
+            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
+            request.AllowAutoRedirect = true;//是否允许302
+            //request.CookieContainer = new CookieContainer();//cookie容器，
+            request.Referer = url; //当前页面的引用
+            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
+            Stream stream = response.GetResponseStream();
+            StreamReader reader = new StreamReader(stream, Encoding.Default);
+            html = reader.ReadToEnd();
+            stream.Close();
+            return html;
+        }
+        //public static string CleanHtml(string strHtml)
+        //{
+        //    var r = new Regex(@"?[^>]*>", RegexOptions.IgnoreCase);
+        //    Match m;
+        //    for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
+        //    {
+        //        strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
+        //    }
+        //    return strHtml.Trim();
+        //}
+        /// <summary>
+        /// 去掉HTML中的所有标签,只留下纯文本
+        /// </summary>
+        /// <param name="strHtml"></param>
+        /// <returns></returns>
+
+        public static string CleanHtml(string strHtml)
+        {
+            if (string.IsNullOrEmpty(strHtml)) return strHtml;
+            //删除脚本
+            //Regex.Replace(strHtml, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase)
+            strHtml = Regex.Replace(strHtml, "(\\<script(.+?)\\</script\\>)|(\\<style(.+?)\\</style\\>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
+            //删除标签
+            var r = new Regex(@"</?[^>]*>", RegexOptions.IgnoreCase);
+            Match m;
+            for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
+            {
+                strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
+            }
+            return strHtml.Trim();
+        }
+        public static string HttpRequestGetImageBase64(string Url, int TimeOut = 100000)
+        {
+            try
+            {
+                HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(Url);
+                request.ContentType = "application/x-www-form-urlencoded";
+                request.Method = "Get";
+                request.Timeout = TimeOut;
+                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
+                Stream responseStream = response.GetResponseStream();
+                MemoryStream memoryStream = new MemoryStream();
+                responseStream.CopyTo(memoryStream);
+                byte[] buff = new byte[memoryStream.Length];
+                memoryStream.Position = 0;
+                memoryStream.Read(buff, 0, (int)memoryStream.Length);
+                memoryStream.Close();
+                return Convert.ToBase64String(buff);
+            }
+            catch (Exception ex)
+            {
+                return ex.Message;
+            }
+        }
+    }
+}