增加下载微信公众号文章功能

This commit is contained in:
范露尧
2023-01-11 15:25:18 +08:00
parent ac3999951d
commit 57129b3861
732 changed files with 23827 additions and 1 deletions

View File

@@ -0,0 +1,87 @@
using Aspose.Words;
using Furion.DynamicApiController;
using HtmlAgilityPack;
using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.Mvc.ViewFeatures;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Vote.Services.Entities;
using Vote.Services.Tools;
namespace Vote.Services.ApiController
{
/// <summary>
/// 微信
/// </summary>
[ApiDescriptionSettings("Vote", Order = 0)]
[Route("/wx")]
public class WxService : IDynamicApiController
{
/// <summary>
///
/// </summary>
/// <returns></returns>
public async Task<Article> GetArticle(string url)
{
if (!string.IsNullOrWhiteSpace(url))
{
var html = Tools.HtmlHelper.GetHtmlSource2(url);// "https://mp.weixin.qq.com/s/9O8RYvm3nCZfc06yXggfPQ");
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
var article = new Article();
var allContent = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='rich_media_wrp']");
if (allContent != null)
{
//var postitemsNodes = allContent.SelectNodes("//div");
article.Title = allContent.SelectSingleNode("//h1[@id='activity-name']").InnerText.Replace(" ", "").Replace("\n", "");
//ViewBag.Detail = article.Detail = Vote.Services.Tools.HtmlHelper.CleanHtml(allContent.SelectSingleNode("//div[@id='js_content']").InnerHtml);
var contents = allContent.SelectSingleNode("//div[@id='js_content']");
var sections = contents.SelectNodes("//section");
if (sections != null)
{
foreach (HtmlNode item in sections)
{
item.Name = "p";//data-src
}
}
var imgs = contents.SelectNodes("//img");
foreach (HtmlNode img in imgs)
{
var src = img.Attributes["src"];
var datasrc = img.Attributes["data-src"];
if (src == null || string.IsNullOrWhiteSpace(src.Value))
{
if (datasrc != null && !string.IsNullOrWhiteSpace(datasrc.Value))
{
img.SetAttributeValue("src", "data:image/jpeg;base64," + Tools.HtmlHelper.HttpRequestGetImageBase64(datasrc.Value));
//src.Value = datasrc.Value;
datasrc.Remove();
//img.SetAttributeValue("style", "width:100%;");
}
}
}
article.Detail = allContent.SelectSingleNode("//div[@id='js_content']").InnerHtml;
}
else
article.Detail = htmlDoc.DocumentNode.InnerHtml;
return article;
}
return null;
}
public async Task<string> SaveDoc(string title, string html)
{
Aspose.Words.Document doc = new Aspose.Words.Document();
DocumentBuilder build = new(doc);
Aspose.Words.Font font = build.Font;
font.Name = "宋体";
build.InsertHtml(html);
var path = Path.Combine(Environment.CurrentDirectory, title + ".docx");
doc.Save(path, SaveFormat.Docx);
return path;
}
}
}

View File

@@ -0,0 +1,55 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Vote.Services.Entities
{
/// <summary>
///
/// </summary>
public class Article
{
/// <summary>
///
/// </summary>
public string Id { get; set; }
/// <summary>
/// 标题
/// </summary>
public string Title { get; set; }
/// <summary>
/// 概要
/// </summary>
public string Summary { get; set; }
/// <summary>
/// 文章链接
/// </summary>
public string Url { get; set; }
/// <summary>
/// 推荐数
/// </summary>
public long Diggit { get; set; }
/// <summary>
/// 评论数
/// </summary>
public long Comment { get; set; }
/// <summary>
/// 阅读数
/// </summary>
public long View { get; set; }
/// <summary>
///明细
/// </summary>
public string Detail { get; set; }
/// <summary>
///作者
/// </summary>
public string Author { get; set; }
/// <summary>
/// 作者链接
/// </summary>
public string AuthorUrl { get; set; }
}
}

View File

@@ -0,0 +1,94 @@
using Furion.RemoteRequest.Extensions;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace Vote.Services.Tools
{
public class HtmlHelper
{
/// <summary>方法一:比较推荐
/// 用HttpWebRequest取得网页源码
/// 对于带BOM的网页很有效不管是什么编码都能正确识别
/// </summary>
/// <param name="url">网页地址" </param>
/// <returns>返回网页源文件</returns>
public static string GetHtmlSource2(string url)
{
//处理内容
string html = "";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Accept = "*/*"; //接受任意文件
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
request.AllowAutoRedirect = true;//是否允许302
//request.CookieContainer = new CookieContainer();//cookie容器
request.Referer = url; //当前页面的引用
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
StreamReader reader = new StreamReader(stream, Encoding.Default);
html = reader.ReadToEnd();
stream.Close();
return html;
}
//public static string CleanHtml(string strHtml)
//{
// var r = new Regex(@"?[^>]*>", RegexOptions.IgnoreCase);
// Match m;
// for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
// {
// strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
// }
// return strHtml.Trim();
//}
/// <summary>
/// 去掉HTML中的所有标签,只留下纯文本
/// </summary>
/// <param name="strHtml"></param>
/// <returns></returns>
public static string CleanHtml(string strHtml)
{
if (string.IsNullOrEmpty(strHtml)) return strHtml;
//删除脚本
//Regex.Replace(strHtml, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase)
strHtml = Regex.Replace(strHtml, "(\\<script(.+?)\\</script\\>)|(\\<style(.+?)\\</style\\>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
//删除标签
var r = new Regex(@"</?[^>]*>", RegexOptions.IgnoreCase);
Match m;
for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
{
strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
}
return strHtml.Trim();
}
public static string HttpRequestGetImageBase64(string Url, int TimeOut = 100000)
{
try
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(Url);
request.ContentType = "application/x-www-form-urlencoded";
request.Method = "Get";
request.Timeout = TimeOut;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream responseStream = response.GetResponseStream();
MemoryStream memoryStream = new MemoryStream();
responseStream.CopyTo(memoryStream);
byte[] buff = new byte[memoryStream.Length];
memoryStream.Position = 0;
memoryStream.Read(buff, 0, (int)memoryStream.Length);
memoryStream.Close();
return Convert.ToBase64String(buff);
}
catch (Exception ex)
{
return ex.Message;
}
}
}
}

View File

@@ -9,7 +9,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Aspose.Words" Version="23.1.0" />
<PackageReference Include="Furion" Version="4.5.0" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
</ItemGroup>
<ItemGroup>

View File

@@ -45,6 +45,17 @@
</summary>
<returns></returns>
</member>
<member name="T:Vote.Services.ApiController.WxService">
<summary>
微信
</summary>
</member>
<member name="M:Vote.Services.ApiController.WxService.GetArticle(System.String)">
<summary>
</summary>
<returns></returns>
</member>
<member name="P:Vote.Services.Dto.ProjectsInput.type">
<summary>
项目类型
@@ -120,6 +131,61 @@
项目类型
</summary>
</member>
<member name="T:Vote.Services.Entities.Article">
<summary>
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Id">
<summary>
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Title">
<summary>
标题
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Summary">
<summary>
概要
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Url">
<summary>
文章链接
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Diggit">
<summary>
推荐数
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Comment">
<summary>
评论数
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.View">
<summary>
阅读数
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Detail">
<summary>
明细
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.Author">
<summary>
作者
</summary>
</member>
<member name="P:Vote.Services.Entities.Article.AuthorUrl">
<summary>
作者链接
</summary>
</member>
<member name="T:Vote.Services.Entities.Experts">
<summary>
专家表
@@ -262,5 +328,20 @@
</summary>
<returns></returns>
</member>
<member name="M:Vote.Services.Tools.HtmlHelper.GetHtmlSource2(System.String)">
<summary>方法一:比较推荐
用HttpWebRequest取得网页源码
对于带BOM的网页很有效不管是什么编码都能正确识别
</summary>
<param name="url">网页地址" </param>
<returns>返回网页源文件</returns>
</member>
<member name="M:Vote.Services.Tools.HtmlHelper.CleanHtml(System.String)">
<summary>
去掉HTML中的所有标签,只留下纯文本
</summary>
<param name="strHtml"></param>
<returns></returns>
</member>
</members>
</doc>