CrawlerHandler.cs 7.3 KB


  1. using System;
  2. using System.IO;
  3. using System.Linq;
  4. using System.Net;
  5. using System.Web;
  6. namespace SiteCore.ueditor
  7. {
  8. /// <summary>
  9. /// Crawler 的摘要说明
  10. /// </summary>
  11. public class CrawlerHandler : Handler
  12. {
  13. private string[] Sources;
  14. private Crawler[] Crawlers;
  15. public CrawlerHandler(HttpContext context) : base(context) { }
  16. public override void Process()
  17. {
  18. Sources = Request.Form.GetValues("source[]");
  19. if (Sources == null || Sources.Length == 0)
  20. {
  21. WriteJson(new
  22. {
  23. state = "参数错误:没有指定抓取源"
  24. });
  25. return;
  26. }
  27. Crawlers = Sources.Select(x => new Crawler(x, Server).Fetch()).ToArray();
  28. WriteJson(new
  29. {
  30. state = "SUCCESS",
  31. list = Crawlers.Select(x => new
  32. {
  33. state = x.State,
  34. source = x.SourceUrl,
  35. url = x.ServerUrl
  36. })
  37. });
  38. }
  39. }
  40. public class Crawler
  41. {
  42. public string SourceUrl { get; set; }
  43. public string ServerUrl { get; set; }
  44. public string State { get; set; }
  45. private HttpServerUtility Server { get; set; }
  46. public Crawler(string sourceUrl, HttpServerUtility server)
  47. {
  48. this.SourceUrl = sourceUrl;
  49. this.Server = server;
  50. }
  51. public Crawler Fetch()
  52. {
  53. if (!IsExternalIPAddress(this.SourceUrl))
  54. {
  55. State = "INVALID_URL";
  56. return this;
  57. }
  58. HttpWebRequest request = null;
  59. if (this.SourceUrl.IndexOf("&tp=webp") != -1)
  60. {
  61. request = HttpWebRequest.Create(this.SourceUrl.Replace("&tp=webp", "")) as HttpWebRequest;
  62. //var request = HttpWebRequest.Create(this.SourceUrl) as HttpWebRequest;
  63. //this.SourceUrl = this.SourceUrl.Replace("&tp=webp","");
  64. }
  65. else
  66. {
  67. request = HttpWebRequest.Create(this.SourceUrl) as HttpWebRequest;
  68. }
  69. //var request = HttpWebRequest.Create(this.SourceUrl) as HttpWebRequest;
  70. using (var response = request.GetResponse() as HttpWebResponse)
  71. {
  72. if (response.StatusCode != HttpStatusCode.OK)
  73. {
  74. State = "Url returns " + response.StatusCode + ", " + response.StatusDescription;
  75. return this;
  76. }
  77. if (response.ContentType.IndexOf("image") == -1)
  78. {
  79. State = "Url is not an image";
  80. return this;
  81. }
  82. string url = this.SourceUrl;
  83. int sIdx = this.SourceUrl.IndexOf("?");
  84. string surl = this.SourceUrl;
  85. if (sIdx != -1) surl = this.SourceUrl.Substring(0, sIdx);
  86. string serUrl = "";
  87. serUrl = PathFormatter.Format(Path.GetFileName(surl), Config.GetString("catcherPathFormat"));
  88. //var savePath = Server.MapPath(ServerUrl);
  89. //ServerUrl = ServerUrl.Replace("d/um", webConfig.umPicPath);
  90. string savePath = serUrl.Replace("d/um", webConfig.umPicPath);
  91. if (url.IndexOf("wx_fmt=") != -1)
  92. {
  93. int fm = url.IndexOf("wx_fmt=");
  94. string fmt = url.Substring(fm);
  95. int idx = fmt.IndexOf("&");
  96. fmt = fmt.Substring(0, idx);
  97. string ext = fmt.Split('=')[1];
  98. //if (url.IndexOf("tp=webp") != -1)
  99. //{
  100. // ext = "webp";
  101. //}
  102. if (savePath.IndexOf(".") == -1)
  103. {
  104. savePath += "." + ext;
  105. serUrl += "." + ext;
  106. ServerUrl = serUrl;
  107. }
  108. }
  109. else
  110. {
  111. ServerUrl = serUrl;
  112. }
  113. if (savePath.IndexOf(".") == -1)
  114. {
  115. this.SourceUrl = url;
  116. return this;
  117. }
  118. if (!Directory.Exists(Path.GetDirectoryName(savePath)))
  119. {
  120. Directory.CreateDirectory(Path.GetDirectoryName(savePath));
  121. }
  122. try
  123. {
  124. var stream = response.GetResponseStream();
  125. var reader = new BinaryReader(stream);
  126. byte[] bytes;
  127. using (var ms = new MemoryStream())
  128. {
  129. byte[] buffer = new byte[4096];
  130. int count;
  131. while ((count = reader.Read(buffer, 0, buffer.Length)) != 0)
  132. {
  133. ms.Write(buffer, 0, count);
  134. }
  135. bytes = ms.ToArray();
  136. }
  137. File.WriteAllBytes(savePath, bytes);
  138. State = "SUCCESS";
  139. }
  140. catch (Exception e)
  141. {
  142. State = "抓取错误:" + e.Message;
  143. }
  144. return this;
  145. }
  146. }
  147. private bool IsExternalIPAddress(string url)
  148. {
  149. var uri = new Uri(url);
  150. switch (uri.HostNameType)
  151. {
  152. case UriHostNameType.Dns:
  153. var ipHostEntry = Dns.GetHostEntry(uri.DnsSafeHost);
  154. foreach (IPAddress ipAddress in ipHostEntry.AddressList)
  155. {
  156. byte[] ipBytes = ipAddress.GetAddressBytes();
  157. if (ipAddress.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
  158. {
  159. if (!IsPrivateIP(ipAddress))
  160. {
  161. return true;
  162. }
  163. }
  164. }
  165. break;
  166. case UriHostNameType.IPv4:
  167. return !IsPrivateIP(IPAddress.Parse(uri.DnsSafeHost));
  168. }
  169. return false;
  170. }
  171. private bool IsPrivateIP(IPAddress myIPAddress)
  172. {
  173. if (IPAddress.IsLoopback(myIPAddress)) return true;
  174. if (myIPAddress.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
  175. {
  176. byte[] ipBytes = myIPAddress.GetAddressBytes();
  177. // 10.0.0.0/24
  178. if (ipBytes[0] == 10)
  179. {
  180. return true;
  181. }
  182. // 172.16.0.0/16
  183. else if (ipBytes[0] == 172 && ipBytes[1] == 16)
  184. {
  185. return true;
  186. }
  187. // 192.168.0.0/16
  188. else if (ipBytes[0] == 192 && ipBytes[1] == 168)
  189. {
  190. return true;
  191. }
  192. // 169.254.0.0/16
  193. else if (ipBytes[0] == 169 && ipBytes[1] == 254)
  194. {
  195. return true;
  196. }
  197. }
  198. return false;
  199. }
  200. }
  201. }