增加下载微信公众号文章功能
This commit is contained in:
87
20220330_Vote/Vote.Services/ApiController/WxService.cs
Normal file
87
20220330_Vote/Vote.Services/ApiController/WxService.cs
Normal file
@@ -0,0 +1,87 @@
|
||||
using Aspose.Words;
|
||||
using Furion.DynamicApiController;
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.AspNetCore.Mvc.ViewFeatures;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Vote.Services.Entities;
|
||||
using Vote.Services.Tools;
|
||||
|
||||
namespace Vote.Services.ApiController
|
||||
{
|
||||
/// <summary>
|
||||
/// 微信
|
||||
/// </summary>
|
||||
[ApiDescriptionSettings("Vote", Order = 0)]
|
||||
[Route("/wx")]
|
||||
public class WxService : IDynamicApiController
|
||||
{
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public async Task<Article> GetArticle(string url)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(url))
|
||||
{
|
||||
var html = Tools.HtmlHelper.GetHtmlSource2(url);// "https://mp.weixin.qq.com/s/9O8RYvm3nCZfc06yXggfPQ");
|
||||
var htmlDoc = new HtmlDocument();
|
||||
htmlDoc.LoadHtml(html);
|
||||
var article = new Article();
|
||||
var allContent = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='rich_media_wrp']");
|
||||
if (allContent != null)
|
||||
{
|
||||
//var postitemsNodes = allContent.SelectNodes("//div");
|
||||
article.Title = allContent.SelectSingleNode("//h1[@id='activity-name']").InnerText.Replace(" ", "").Replace("\n", "");
|
||||
//ViewBag.Detail = article.Detail = Vote.Services.Tools.HtmlHelper.CleanHtml(allContent.SelectSingleNode("//div[@id='js_content']").InnerHtml);
|
||||
var contents = allContent.SelectSingleNode("//div[@id='js_content']");
|
||||
var sections = contents.SelectNodes("//section");
|
||||
if (sections != null)
|
||||
{
|
||||
foreach (HtmlNode item in sections)
|
||||
{
|
||||
item.Name = "p";//data-src
|
||||
}
|
||||
}
|
||||
var imgs = contents.SelectNodes("//img");
|
||||
foreach (HtmlNode img in imgs)
|
||||
{
|
||||
var src = img.Attributes["src"];
|
||||
var datasrc = img.Attributes["data-src"];
|
||||
if (src == null || string.IsNullOrWhiteSpace(src.Value))
|
||||
{
|
||||
if (datasrc != null && !string.IsNullOrWhiteSpace(datasrc.Value))
|
||||
{
|
||||
img.SetAttributeValue("src", "data:image/jpeg;base64," + Tools.HtmlHelper.HttpRequestGetImageBase64(datasrc.Value));
|
||||
//src.Value = datasrc.Value;
|
||||
datasrc.Remove();
|
||||
//img.SetAttributeValue("style", "width:100%;");
|
||||
}
|
||||
}
|
||||
}
|
||||
article.Detail = allContent.SelectSingleNode("//div[@id='js_content']").InnerHtml;
|
||||
}
|
||||
else
|
||||
article.Detail = htmlDoc.DocumentNode.InnerHtml;
|
||||
return article;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public async Task<string> SaveDoc(string title, string html)
|
||||
{
|
||||
Aspose.Words.Document doc = new Aspose.Words.Document();
|
||||
DocumentBuilder build = new(doc);
|
||||
Aspose.Words.Font font = build.Font;
|
||||
font.Name = "宋体";
|
||||
build.InsertHtml(html);
|
||||
var path = Path.Combine(Environment.CurrentDirectory, title + ".docx");
|
||||
doc.Save(path, SaveFormat.Docx);
|
||||
return path;
|
||||
}
|
||||
}
|
||||
}
|
||||
55
20220330_Vote/Vote.Services/Entities/Article.cs
Normal file
55
20220330_Vote/Vote.Services/Entities/Article.cs
Normal file
@@ -0,0 +1,55 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Vote.Services.Entities
|
||||
{
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
public class Article
|
||||
{
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
public string Id { get; set; }
|
||||
/// <summary>
|
||||
/// 标题
|
||||
/// </summary>
|
||||
public string Title { get; set; }
|
||||
/// <summary>
|
||||
/// 概要
|
||||
/// </summary>
|
||||
public string Summary { get; set; }
|
||||
/// <summary>
|
||||
/// 文章链接
|
||||
/// </summary>
|
||||
public string Url { get; set; }
|
||||
/// <summary>
|
||||
/// 推荐数
|
||||
/// </summary>
|
||||
public long Diggit { get; set; }
|
||||
/// <summary>
|
||||
/// 评论数
|
||||
/// </summary>
|
||||
public long Comment { get; set; }
|
||||
/// <summary>
|
||||
/// 阅读数
|
||||
/// </summary>
|
||||
public long View { get; set; }
|
||||
/// <summary>
|
||||
///明细
|
||||
/// </summary>
|
||||
public string Detail { get; set; }
|
||||
/// <summary>
|
||||
///作者
|
||||
/// </summary>
|
||||
public string Author { get; set; }
|
||||
/// <summary>
|
||||
/// 作者链接
|
||||
/// </summary>
|
||||
public string AuthorUrl { get; set; }
|
||||
}
|
||||
}
|
||||
94
20220330_Vote/Vote.Services/Tools/HtmlHelper.cs
Normal file
94
20220330_Vote/Vote.Services/Tools/HtmlHelper.cs
Normal file
@@ -0,0 +1,94 @@
|
||||
using Furion.RemoteRequest.Extensions;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Vote.Services.Tools
|
||||
{
|
||||
public class HtmlHelper
|
||||
{
|
||||
|
||||
/// <summary>方法一:比较推荐
|
||||
/// 用HttpWebRequest取得网页源码
|
||||
/// 对于带BOM的网页很有效,不管是什么编码都能正确识别
|
||||
/// </summary>
|
||||
/// <param name="url">网页地址" </param>
|
||||
/// <returns>返回网页源文件</returns>
|
||||
public static string GetHtmlSource2(string url)
|
||||
{
|
||||
//处理内容
|
||||
string html = "";
|
||||
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
|
||||
request.Accept = "*/*"; //接受任意文件
|
||||
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
|
||||
request.AllowAutoRedirect = true;//是否允许302
|
||||
//request.CookieContainer = new CookieContainer();//cookie容器,
|
||||
request.Referer = url; //当前页面的引用
|
||||
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
|
||||
Stream stream = response.GetResponseStream();
|
||||
StreamReader reader = new StreamReader(stream, Encoding.Default);
|
||||
html = reader.ReadToEnd();
|
||||
stream.Close();
|
||||
return html;
|
||||
}
|
||||
//public static string CleanHtml(string strHtml)
|
||||
//{
|
||||
// var r = new Regex(@"?[^>]*>", RegexOptions.IgnoreCase);
|
||||
// Match m;
|
||||
// for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
|
||||
// {
|
||||
// strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
|
||||
// }
|
||||
// return strHtml.Trim();
|
||||
//}
|
||||
/// <summary>
|
||||
/// 去掉HTML中的所有标签,只留下纯文本
|
||||
/// </summary>
|
||||
/// <param name="strHtml"></param>
|
||||
/// <returns></returns>
|
||||
|
||||
public static string CleanHtml(string strHtml)
|
||||
{
|
||||
if (string.IsNullOrEmpty(strHtml)) return strHtml;
|
||||
//删除脚本
|
||||
//Regex.Replace(strHtml, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase)
|
||||
strHtml = Regex.Replace(strHtml, "(\\<script(.+?)\\</script\\>)|(\\<style(.+?)\\</style\\>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
//删除标签
|
||||
var r = new Regex(@"</?[^>]*>", RegexOptions.IgnoreCase);
|
||||
Match m;
|
||||
for (m = r.Match(strHtml); m.Success; m = m.NextMatch())
|
||||
{
|
||||
strHtml = strHtml.Replace(m.Groups[0].ToString(), "");
|
||||
}
|
||||
return strHtml.Trim();
|
||||
}
|
||||
public static string HttpRequestGetImageBase64(string Url, int TimeOut = 100000)
|
||||
{
|
||||
try
|
||||
{
|
||||
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(Url);
|
||||
request.ContentType = "application/x-www-form-urlencoded";
|
||||
request.Method = "Get";
|
||||
request.Timeout = TimeOut;
|
||||
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
|
||||
Stream responseStream = response.GetResponseStream();
|
||||
MemoryStream memoryStream = new MemoryStream();
|
||||
responseStream.CopyTo(memoryStream);
|
||||
byte[] buff = new byte[memoryStream.Length];
|
||||
memoryStream.Position = 0;
|
||||
memoryStream.Read(buff, 0, (int)memoryStream.Length);
|
||||
memoryStream.Close();
|
||||
return Convert.ToBase64String(buff);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return ex.Message;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,9 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Aspose.Words" Version="23.1.0" />
|
||||
<PackageReference Include="Furion" Version="4.5.0" />
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -45,6 +45,17 @@
|
||||
</summary>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="T:Vote.Services.ApiController.WxService">
|
||||
<summary>
|
||||
微信
|
||||
</summary>
|
||||
</member>
|
||||
<member name="M:Vote.Services.ApiController.WxService.GetArticle(System.String)">
|
||||
<summary>
|
||||
|
||||
</summary>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Dto.ProjectsInput.type">
|
||||
<summary>
|
||||
项目类型
|
||||
@@ -120,6 +131,61 @@
|
||||
项目类型
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Vote.Services.Entities.Article">
|
||||
<summary>
|
||||
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Id">
|
||||
<summary>
|
||||
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Title">
|
||||
<summary>
|
||||
标题
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Summary">
|
||||
<summary>
|
||||
概要
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Url">
|
||||
<summary>
|
||||
文章链接
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Diggit">
|
||||
<summary>
|
||||
推荐数
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Comment">
|
||||
<summary>
|
||||
评论数
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.View">
|
||||
<summary>
|
||||
阅读数
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Detail">
|
||||
<summary>
|
||||
明细
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.Author">
|
||||
<summary>
|
||||
作者
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:Vote.Services.Entities.Article.AuthorUrl">
|
||||
<summary>
|
||||
作者链接
|
||||
</summary>
|
||||
</member>
|
||||
<member name="T:Vote.Services.Entities.Experts">
|
||||
<summary>
|
||||
专家表
|
||||
@@ -262,5 +328,20 @@
|
||||
</summary>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="M:Vote.Services.Tools.HtmlHelper.GetHtmlSource2(System.String)">
|
||||
<summary>方法一:比较推荐
|
||||
用HttpWebRequest取得网页源码
|
||||
对于带BOM的网页很有效,不管是什么编码都能正确识别
|
||||
</summary>
|
||||
<param name="url">网页地址" </param>
|
||||
<returns>返回网页源文件</returns>
|
||||
</member>
|
||||
<member name="M:Vote.Services.Tools.HtmlHelper.CleanHtml(System.String)">
|
||||
<summary>
|
||||
去掉HTML中的所有标签,只留下纯文本
|
||||
</summary>
|
||||
<param name="strHtml"></param>
|
||||
<returns></returns>
|
||||
</member>
|
||||
</members>
|
||||
</doc>
|
||||
|
||||
Reference in New Issue
Block a user