人工智能:文本相似度分析
通常我们会遇到这么一个问题,就是用户在评论、发文的时候,会时不时的发一些高度相似的内容,显然这是没有任何意义的,这时候我们就可以用一些算法来确定文本的相似度究竟是多少,据此我们可以做出一些回应策略。实现这个功能可以用多种语言来完成,你可以点我去获得其他语言的做法,这里罗列下如何用go语言来实现这个功能:
package main
import (
"io/ioutil"
"net/http"
"net/url"
"fmt"
"strings"
)
//配置您申请的appKey和openId
const APP_KEY ="yours";
const OPEN_ID ="yours";
func requestContent(requestUrl string,params url.Values,method string)(rs[]byte ,err error){
if strings.ToUpper(method)=="GET"{
return get(requestUrl,params)
}
return post(requestUrl,params)
}
// get 网络请求
func get(requestUrl string,params url.Values)(rs[]byte ,err error){
var Url *url.URL
Url,err=url.Parse(requestUrl)
if err!=nil{
fmt.Printf("解析url错误:\r\n%v",err)
return nil,err
}
//如果参数中有中文参数,这个方法会进行URLEncode
Url.RawQuery=params.Encode()
resp,err:=http.Get(Url.String())
if err!=nil{
fmt.Println("err:",err)
return nil,err
}
defer resp.Body.Close()
return ioutil.ReadAll(resp.Body)
}
// post 网络请求 ,params 是url.Values类型
func post(requestUrl string, params url.Values)(rs[]byte,err error){
resp,err:=http.PostForm(requestUrl, params)
if err!=nil{
return nil ,err
}
defer resp.Body.Close()
return ioutil.ReadAll(resp.Body)
}
func main(){
domain :="http://api.xiaocongjisuan.com/"
servlet :="data/contentsimilarity/analysis"
method :="get"
requestUrl:=domain+servlet
//初始化参数
params:=url.Values{}
params.Set("appKey",APP_KEY)
params.Set("openId",OPEN_ID)
//变动部分
params.Set("content1","我是最可爱的小伙子")
params.Set("content2","我是最漂亮的小姑娘")
//发送请求
data,err:=requestContent(requestUrl,params,method)
fmt.Println(string(data))
if err!=nil{
fmt.Printf("解析url错误:\r\n%v",err)
}
}
c#语言的实现方式也非常的简单,罗列在下面:
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
private static string appKey="yours";
private static string openId = "yours";
static string getResponseAsString(HttpWebResponse rsp, Encoding encoding)
{
System.IO.Stream stream = null;
StreamReader reader = null;
try
{
// 以字符流的方式读取HTTP响应
stream = rsp.GetResponseStream();
reader = new StreamReader(stream, encoding);
return reader.ReadToEnd();
}
finally
{
// 释放资源
if (reader != null) reader.Close();
if (stream != null) stream.Close();
if (rsp != null) rsp.Close();
}
}
/*
* parameters 参数
* encode 编码
*/
static string buildQuery(IDictionary<string,object> parameters, string encode)
{
StringBuilder postData = new StringBuilder();
bool hasParam = false;
IEnumerator<KeyValuePair<string, object>> dem = parameters.GetEnumerator();
while (dem.MoveNext())
{
string name = dem.Current.Key;
string value = dem.Current.Value.ToString(); ;
// 忽略参数名或参数值为空的参数
if (!string.IsNullOrEmpty(name))//&& !string.IsNullOrEmpty(value)
{
if (hasParam)
{
postData.Append("&");
}
postData.Append(name);
postData.Append("=");
if (encode == "gb2312")
{
postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.GetEncoding("gb2312")));
}
else if (encode == "utf8")
{
postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.UTF8));
}
else
{
postData.Append(value);
}
hasParam = true;
}
}
return postData.ToString();
}
/**
*
* @param url 请求地址
* @param params 请求参数
* @param method 请求方法
* @return 请求结果
* @throws Exception
*/
static string requestContent(string url, IDictionary<string,object> parameters, string method)
{
if (method.ToLower() == "post")
{
HttpWebRequest req = null;
HttpWebResponse rsp = null;
System.IO.Stream reqStream = null;
try
{
req = (HttpWebRequest)WebRequest.Create(url);
req.Method = method;
req.KeepAlive = false;
req.ProtocolVersion = HttpVersion.Version10;
req.Timeout = 5000;
req.ContentType = "application/x-www-form-urlencoded;charset=utf-8";
byte[] postData = Encoding.UTF8.GetBytes(buildQuery(parameters, "utf8"));
reqStream = req.GetRequestStream();
reqStream.Write(postData, 0, postData.Length);
rsp = (HttpWebResponse)req.GetResponse();
Encoding encoding = Encoding.GetEncoding(rsp.CharacterSet);
return getResponseAsString(rsp, encoding);
}
catch (Exception ex)
{
return ex.Message;
}
finally
{
if (reqStream != null) reqStream.Close();
if (rsp != null) rsp.Close();
}
}
else
{
//创建请求
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + "?" + buildQuery(parameters, "utf8"));
//GET请求
request.Method = "GET";
request.ReadWriteTimeout = 5000;
request.ContentType = "text/html;charset=UTF-8";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream myResponseStream = response.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
//返回内容
string retString = myStreamReader.ReadToEnd();
return retString;
}
}
static void Main(string[] args)
{
String domain = "http://api.xiaocongjisuan.com/";
domain = "http://127.0.0.1:8080/xiaocongjisuan/";
String servlet = "data/contentsimilarity/analysis";
String method = "get";
String url = domain + servlet;
var parameters = new Dictionary<string,object>();
parameters.Add("appKey", appKey);
parameters.Add("openId", openId);
//变动部分
parameters.Add("content1", "我是最可爱的小伙子");
parameters.Add("content2", "我是最漂亮的小姑娘");
string result = requestContent(url, parameters, method);
Console.WriteLine(result);
Console.Read();
}
}
}
版权声明:本文为huangxie原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。