SpringBoot2.0+ElasticSearch网盘搜索实现
1、ES是如何实现分布式高并发全文检索
2、简单介绍ES分片Shards分片技术
3、为什么ES主分片对应的备分片不在同一台节点存放
4、索引的主分片定义好后为什么不能做修改
5、ES如何实现高可用容错方案
6、搭建Linux上环境三台ES高可用集群环境
7、基于ES网盘搜索引擎实现
网盘搜索引擎,写个Job 去网上爬虫 存储起来 以JSON的格式 存放在 ES服务器
网盘搜索引擎的原理:
采用ES存储抓取的数据 抓取服务器
测试数据:
##先创建索引
PUT /clouddisk
##测试数据maping
POST /clouddisk/_mapping/disk
{
"disk":{
"properties":{
"name":{
"type":"text",
"analyzer":"ik_smart",
"search_analyzer":"ik_smart"
},
"source":{
"type":"keyword"
},
"describe":{
"type":"text",
"analyzer":"ik_smart",
"search_analyzer":"ik_smart"
},
"shartime":{
"type":"date"
},
"browsetimes":{
"type":"long"
},
"filesize":{
"type":"float"
},
"sharpeople":{
"type":"keyword"
},
"collectiontime":{
"type":"date"
},
"baiduaddres":{
"type":"keyword"
}
}
}
}
POST /clouddisk/disk
{
"name": "2018史上最全美剧",
"source": "百度云盘",
"describe": "该课程由小明提供",
"shartime": "2018-10-10",
"browsetimes": 100000,
"filesize": 4.35,
"sharpeople": "美国东部",
"collectiontime": "2018-11-24",
"baiduaddres": "https://pan.baidu.com/s/1VQxFq6JnKh0KP-5aMq-WpA#list/path=%2F"
}
POST /clouddisk/disk
{
"name": "2018史上最全韩剧",
"source": "百度云盘",
"describe": "该课程小丽提供",
"shartime": "2018-10-12",
"browsetimes": 100000,
"filesize": 6.35,
"sharpeople": "韩国釜山",
"collectiontime": "2018-11-24",
"baiduaddres": "https://pan.baidu.com/s/1VQxFq6JnKh0KP-5aMq-WpA#list/path=%2F"
}
POST /clouddisk/disk
{
"name": "老友记",
"source": "百度云盘",
"describe": "该课程由张三提供",
"shartime": "2018-10-10",
"browsetimes": 100000,
"filesize": 1.35,
"sharpeople": "美国",
"collectiontime": "2018-11-24",
"baiduaddres": "https://pan.baidu.com/s/1VQxFq6JnKh0KP-5aMq-WpA#list/path=%2F"
}
POST /clouddisk/disk
{
"name": "英语雅思",
"source": "百度云盘",
"describe": "该课程由大海提供",
"shartime": "2018-10-10",
"browsetimes": 100000,
"filesize": 1.35,
"sharpeople": "大海",
"collectiontime": "2018-11-24",
"baiduaddres": "https://pan.baidu.com/s/1VQxFq6JnKh0KP-5aMq-WpA#list/path=%2F"
}
POST /clouddisk/disk
{
"name": "小猪佩奇",
"source": "百度云盘",
"describe": "该课程由朱佩琦出品",
"shartime": "2018-10-10",
"browsetimes": 100000,
"filesize": 1.35,
"sharpeople": "佩奇",
"collectiontime": "2018-11-24",
"baiduaddres": "https://pan.baidu.com/s/1VQxFq6JnKh0KP-5aMq-WpA#list/path=%2F"
}
maven:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.toov5</groupId> <artifactId>springboot-esPan</artifactId> <version>0.0.1-SNAPSHOT</version> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>2.0.0.RELEASE</version> <relativePath /> <!-- lookup parent from repository --> </parent> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-elasticsearch</artifactId> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> </dependency> <dependency> <groupId>com.google.collections</groupId> <artifactId>google-collections</artifactId> <version>1.0-rc2</version> </dependency> <!-- springboot整合freemarker --> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-freemarker</artifactId> </dependency> </dependencies> </project>
Dao:
package com.toov5.dao; import org.springframework.data.elasticsearch.repository.ElasticsearchRepository; import com.toov5.entity.CloudDiskEntity; public interface CloudDiskDao extends ElasticsearchRepository<CloudDiskEntity, String> { }
Entity:
package com.toov5.entity; import org.springframework.data.annotation.Id; import org.springframework.data.elasticsearch.annotations.Document; import lombok.Data; @Data @Document(indexName = "clouddisk", type = "disk") public class CloudDiskEntity { @Id private String id; // 名称 private String name; // 来源 private String source; // 描述 private String describe; // 分享时间 private String shartime; // 浏览次数 private Long browsetimes; // 文件大小 private Double filesize; // 分享人 private String sharpeople; // 收录时间 private String collectiontime; // 地址 private String baiduaddres; }
Controller:
package com.toov5.controller; import java.util.List; import java.util.Optional; import org.apache.commons.lang.StringUtils; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import com.google.common.collect.Lists; import com.toov5.dao.CloudDiskDao; import com.toov5.entity.CloudDiskEntity; //SpringBoot 整合 ES @RestController public class CloudDiskController { @Autowired private CloudDiskDao cloudDiskDao; @RequestMapping("/findById/{id}") public Optional<CloudDiskEntity> findById(@PathVariable String id) { Optional<CloudDiskEntity> findById = cloudDiskDao.findById(id); return findById; } @RequestMapping("/search") public List<CloudDiskEntity> search(String keyWord,String describe) { //创建查询 查询所有的 BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); //创建查询 if (!StringUtils.isEmpty(keyWord)) { //模糊查询 一定要用ik的中文分词插件! MatchQueryBuilder matchQuery = QueryBuilders.matchQuery("name", keyWord); boolQuery.must(matchQuery); } if (!StringUtils.isEmpty(describe)) { //模糊查询 一定要用ik的中文分词插件! MatchQueryBuilder matchQuery = QueryBuilders.matchQuery("describe", describe); boolQuery.must(matchQuery); } Iterable<CloudDiskEntity> search = cloudDiskDao.search(boolQuery); //查询所有的数据 return Lists.newArrayList(search); //通过这个api可以进行转换 //这样查询除了所有的 然后添加match } }
yml:
spring: data: elasticsearch: ####集群名称 cluster-name: myes ####地址 cluster-nodes: 192.168.91.7:9300 freemarker: # 设置模板后缀名 suffix: .ftl # 设置文档类型 content-type: text/html # 设置页面编码格式 charset: UTF-8 # 设置页面缓存 cache: false # 设置ftl文件路径 template-loader-path: - classpath:/templates # 设置静态文件路径,js,css等 mvc: static-path-pattern: /static/**
启动类:
package com.toov5; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.data.elasticsearch.repository.config.EnableElasticsearchRepositories; @SpringBootApplication @EnableElasticsearchRepositories(basePackages="com.toov5.dao") //dao 代码没有@Component的情况下可以这么玩儿 而且这样比较节省代码哈哈 public class AppEs { public static void main(String[] args) { SpringApplication.run(AppEs.class, args); } }
关键字查询时候,可以进行关键字查询,比like强悍!
分页查询: 很简单 传入参数 Pageable 就OK了 封装好了
GET /clouddisk/disk/_search { "from": 0, "size":2, "query": { "match": { "name": "2018" } } }
page= 0 ,请求页数从0开始 2代表size 每一页代表多少条数据
前端传来参数是第几页
package com.toov5.controller; import java.util.List; import java.util.Optional; import org.apache.commons.lang.StringUtils; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.web.PageableDefault; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import com.toov5.dao.CloudDiskDao; import com.toov5.entity.CloudDiskEntity; //SpringBoot 整合 ES @RestController public class CloudDiskController { @Autowired private CloudDiskDao cloudDiskDao; @RequestMapping("/findById/{id}") public Optional<CloudDiskEntity> findById(@PathVariable String id) { Optional<CloudDiskEntity> findById = cloudDiskDao.findById(id); return findById; } @RequestMapping("/search") //写死了 默认值 public Page<CloudDiskEntity> search(String keyWord,@PageableDefault(page=0,value=1) Pageable pageable) { //创建查询 查询所有的 BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); //创建查询 if (!StringUtils.isEmpty(keyWord)) { //模糊查询 一定要用ik的中文分词插件! MatchQueryBuilder matchQuery = QueryBuilders.matchQuery("name", keyWord); boolQuery.must(matchQuery); } Page<CloudDiskEntity> search = cloudDiskDao.search(boolQuery,pageable); return search; } }
访问:
也可以传入 页数 size数
整合到前端展示:
Controller:
package com.toov5.controller; import javax.servlet.http.HttpServletRequest; import org.apache.commons.lang.StringUtils; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.web.PageableDefault; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import com.toov5.dao.CloudDiskDao; import com.toov5.entity.CloudDiskEntity; @Controller public class PageController { @Autowired private CloudDiskDao cloudDiskDao; @RequestMapping("/search") //写死了 默认值 public String search(String keyWord,@PageableDefault(page=0,value=2) Pageable pageable, HttpServletRequest req) { Long starTime = System.currentTimeMillis(); //创建查询 查询所有的 BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); //创建查询 if (!StringUtils.isEmpty(keyWord)) { //模糊查询 一定要用ik的中文分词插件! MatchQueryBuilder matchQuery = QueryBuilders.matchQuery("name", keyWord); boolQuery.must(matchQuery); } Page<CloudDiskEntity> page = cloudDiskDao.search(boolQuery,pageable); req.setAttribute("page", page); req.setAttribute("total", page.getTotalElements()); req.setAttribute("keyword", keyWord); Long endTime = System.currentTimeMillis(); req.setAttribute("time", endTime-starTime ); return "search"; } }
页面展示:
页面:
<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:th="http://www.thymeleaf.org"> <head> <meta charset="UTF-8"> <title>Toov5|ES网盘搜索引擎</title> <!-- 新 Bootstrap 核心 CSS 文件 --> <link href="https://cdn.bootcss.com/bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet"> <!-- 可选的Bootstrap主题文件(一般不使用) --> <script src="https://cdn.bootcss.com/bootstrap/3.3.7/css/bootstrap-theme.min.css"></script> <!-- jQuery文件。务必在bootstrap.min.js 之前引入 --> <script src="https://cdn.bootcss.com/jquery/2.1.1/jquery.min.js"></script> <!-- 最新的 Bootstrap 核心 JavaScript 文件 --> <script src="https://cdn.bootcss.com/bootstrap/3.3.7/js/bootstrap.min.js"></script> </head> <body style="display: block; margin: 0 auto; width: 50%; " > <div style="width:100%;height:60px;" align="center"> <h2 style="color:#985f0d;">Toov5|ES网盘搜索引擎</h2> </div> <br/> <div align="center"> <span style="font-size: 18px;" >检索出${total}条数据,耗时:${time}毫秒</span> </div> <br/> <br/> <div class="bs-example" data-example-id="striped-table"> <table class="table table-bordered table-hover"> <thead> <tr> <th style="text-align:center;" scope="row">链接名称</th> <th style="text-align:center;">文件大小GB</th> <th style="text-align:center;">分享人</th> <th style="text-align:center;">云盘地址</th> </tr> </thead> <tbody> <#list page.content as p> <tr > <th style="text-align: left;" > <#if keyword??> ${p.name?replace(keyword, \'<span style="color: red">${keyword}</span>\')} <#else> ${p.name} </#if> </th> <th style="text-align: center;">${p.filesize}</th> <th style="text-align: center;">${p.sharpeople}</th> <th style="text-align: center;"><a href="${p.baiduaddres}">云盘地址</a> </th> </tr> </#list> </tbody> </table> <!-- <div style="font-size: 21px;"> <#list 1..totalPage as i> <#if keyword??> <a href="/search?keyword=${keyword}&page=${i-1}" >${i}</a> <#else> <a href="/search?page=${i-1}" >${i}</a> </#if> </#list> 页 </div> --> </div> </body> </html>
分页展示数据:
package com.toov5.controller; import javax.servlet.http.HttpServletRequest; import org.apache.commons.lang.StringUtils; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.web.PageableDefault; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import com.toov5.dao.CloudDiskDao; import com.toov5.entity.CloudDiskEntity; @Controller public class PageController { @Autowired private CloudDiskDao cloudDiskDao; @RequestMapping("/search") //写死了 默认值 public String search(String keyWord,@PageableDefault(page=0,value=2) Pageable pageable, HttpServletRequest req) { Long starTime = System.currentTimeMillis(); //创建查询 查询所有的 BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); //创建查询 if (!StringUtils.isEmpty(keyWord)) { //模糊查询 一定要用ik的中文分词插件! MatchQueryBuilder matchQuery = QueryBuilders.matchQuery("name", keyWord); boolQuery.must(matchQuery); } Page<CloudDiskEntity> page = cloudDiskDao.search(boolQuery,pageable); // 计算查询总数 long total = page.getTotalElements(); req.setAttribute("total", page.getTotalElements()); // 计算分页数 int totalPage = (int) ((total - 1) / pageable.getPageSize() + 1); req.setAttribute("totalPage", totalPage); req.setAttribute("page", page); req.setAttribute("total", page.getTotalElements()); req.setAttribute("keyword", keyWord); Long endTime = System.currentTimeMillis(); req.setAttribute("time", endTime-starTime ); return "search"; } }
效果: