帮助中心

搭建Sphinx搜索引擎架构详解

发布日期:2013-12-13 发布人:

Sphinx+MySQL5.1x+mmseg中文分词

搜索引擎架构搭建

 
本文以CentOS5.5+mysql-5.1.55+sphinx-0.9.9(-3.2.14.tar.gz目前最新稳定版)为例介绍
安装前准备文件
 
Sphinx+MySQL5.1x+mmseg中文分词搜索引擎架构搭建过程。
 
通过yum命令更新依赖包(与php环境搭建依赖包一起更新了)
yum -y install gcc g++ gcc-c++ libjpeg libjpeg-devel libpng libpng-devel freetype freetype-devel libxml2 libxml2-devel zlib zlib-devel glibc glibc-devel glib2 glib2-devel bzip2 bzip2-devel ncurses ncurses-devel curl curl-devel e2fsprogs e2fsprogs-devel krb5 krb5-devel libidn libidn-devel openssl openssl-devel openldap openldap-devel nss_ldap openldap-clients openldap-servers patch libtool automake imake mysql-devel expat-devel
安装MySQL+SphinxSE,进入软件包目录
tar zxvf mysql-5.1.55.tar.gz
tar zxvf sphinx-0.9.9.tar.gz
cp -r sphinx-0.9.9/mysqlse/ mysql-5.1.55/storage/sphinx
cd mysql-5.1.55
./BUILD/autorun.sh
./configure --prefix=/usr/local/webserver/mysql/ --enable-assembler --with-extra-charsets=complex --enable-thread-safe-client --with-big-tables --with-readline --with-ssl --with-embedded-server --enable-local-infile --with-plugins=partition,innobase,myisammrg,sphinx
make
make install
/*************************
如果在编译中出现类似下面错误:
collect2: ld returned 1 exit status
make['2']: *** ['indexer'] Error 1
make['2']: Leaving directory `/home/jling/sphinx-0.9.9/src'
make['1']: *** ['all'] Error 2
make['1']: Leaving directory `/home/jling/sphinx-0.9.9/src'
make: *** ['all-recursive'] Error 1
 
打开 coreseek-3.2.14/csft-3.2.14/config/config.h
将#define USE_LIBICONV 1
修改为#define USE_LIBICONV 0
然后重新执行make
*************************/


在此按照以下过程进行安装Mysql:

/usr/sbin/groupadd mysql
/usr/sbin/useradd -g mysql mysql
 
chmod +w /usr/local/webserver/mysql
chown -R mysql:mysql /usr/local/webserver/mysql
cd ../
 
创建目录
mkdir -p /data0/mysql/3306/data/
mkdir -p /data0/mysql/3306/binlog/
mkdir -p /data0/mysql/3306/relaylog/
chown -R mysql:mysql /data0/mysql/
 
以mysql用户帐号的身份建立数据表:
 
/usr/local/webserver/mysql/bin/mysql_install_db --basedir=/usr/local/webserver/mysql --datadir=/data0/mysql/3306/data --user=mysql
vi /data0/mysql/3306/my.cnf
 
输入 
['client']
character-set-server = utf8
port    = 3306
socket  = /tmp/mysql.sock
 
['mysqld']
character-set-server = utf8
replicate-ignore-db = mysql
replicate-ignore-db = test
replicate-ignore-db = information_schema
user    = mysql
port    = 3306
socket  = /tmp/mysql.sock
basedir = /usr/local/webserver/mysql
datadir = /data0/mysql/3306/data
log-error = /data0/mysql/3306/mysql_error.log
pid-file = /data0/mysql/3306/mysql.pid
open_files_limit    = 10240
back_log = 600
max_connections = 5000
max_connect_errors = 6000
table_cache = 614
external-locking = FALSE
max_allowed_packet = 4M
sort_buffer_size = 1M
join_buffer_size = 1M
thread_cache_size = 300
#thread_concurrency = 8
query_cache_size = 4M
query_cache_limit = 1M
query_cache_min_res_unit = 2k
default-storage-engine = MyISAM
thread_stack = 192K
transaction_isolation = READ-COMMITTED
tmp_table_size = 4M
max_heap_table_size = 16M
long_query_time = 3
log-slave-updates
log-bin = /data0/mysql/3306/binlog/binlog
binlog_cache_size = 1M
binlog_format = MIXED
max_binlog_cache_size = 2M
max_binlog_size = 1G
relay-log-index = /data0/mysql/3306/relaylog/relaylog
relay-log-info-file = /data0/mysql/3306/relaylog/relaylog
relay-log = /data0/mysql/3306/relaylog/relaylog
expire_logs_days = 30
key_buffer_size = 8M
read_buffer_size = 1M
read_rnd_buffer_size = 2M
bulk_insert_buffer_size = 4M
myisam_sort_buffer_size = 16M
myisam_max_sort_file_size = 1G
myisam_repair_threads = 1
myisam_recover
 
interactive_timeout = 120
wait_timeout = 120
 
skip-name-resolve
#master-connect-retry = 10
slave-skip-errors = 1032,1062,126,1114,1146,1048,1396
 
#master-host     =   192.168.1.2
#master-user     =   username
#master-password =   password
#master-port     =  3306
 
server-id = 1
 
innodb_additional_mem_pool_size = 8M
innodb_buffer_pool_size = 64M
innodb_data_file_path = ibdata1:256M:autoextend
innodb_file_io_threads = 4
innodb_thread_concurrency = 8
innodb_flush_log_at_trx_commit = 2
innodb_log_buffer_size = 4M
innodb_log_file_size = 32M
innodb_log_files_in_group = 3
innodb_max_dirty_pages_pct = 90
innodb_lock_wait_timeout = 120
innodb_file_per_table = 0
 
#log-slow-queries = /data0/mysql/3306/slow.log
#long_query_time = 10
 
['mysqldump']
quick
max_allowed_packet = 8M
 
 
===========================================================================
创建shell管理脚本
vi /data0/mysql/3306/mysql
输入
#!/bin/sh
 
mysql_port=3306
mysql_username="root"
mysql_password="12345678"
 
function_start_mysql()
{
    printf "Starting MySQL...\n"
    /bin/sh /usr/local/webserver/mysql/bin/mysqld_safe --defaults-file=/data0/mysql/${mysql_port}/my.cnf 2>&1 > /dev/null &
}
 
function_stop_mysql()
{
    printf "Stoping MySQL...\n"
    /usr/local/webserver/mysql/bin/mysqladmin -u ${mysql_username} -p${mysql_password} -S /tmp/mysql.sock shutdown
}
 
function_restart_mysql()
{
    printf "Restarting MySQL...\n"
    function_stop_mysql
    sleep 5
    function_start_mysql
}
 
function_kill_mysql()
{
    kill -9 $(ps -ef | grep 'bin/mysqld_safe' | grep ${mysql_port} | awk '{printf $2}')
    kill -9 $(ps -ef | grep 'libexec/mysqld' | grep ${mysql_port} | awk '{printf $2}')
}
 
if [ "$1" = "start" ]; then
    function_start_mysql
elif [ "$1" = "stop" ]; then
    function_stop_mysql
elif [ "$1" = "restart" ]; then
function_restart_mysql
elif [ "$1" = "kill" ]; then
function_kill_mysql
else
    printf "Usage: /data0/mysql/${mysql_port}/mysql {start|stop|restart|kill}\n"
fi
 
============================================================================================================
chmod +x /data0/mysql/3306/mysql
启动mysql
/data0/mysql/3306/mysql start
登陆mysql
/usr/local/webserver/mysql/bin/mysql -u root -p -S /tmp/mysql.sock
创建用户并赋予权限
GRANT ALL PRIVILEGES ON *.* TO 'root'@'localhost' IDENTIFIED BY '12345678';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'127.0.0.1' IDENTIFIED BY '12345678';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '';
 
 
======================================================================================
安装完成启动MySQL后查看sphinx存储引擎是否安装成功
在mysql命令行下执行
show engines;
如果出现如下图红色方框内的信息说明SphinxSE已经安装成功!

 
 
安装Sphinx全文检索服务器
Sphinx默认不支持中文索引及检索, 以前用Coreseek的补丁来解决,目前Coreseek 不单独提供补丁文件,而基于sphinx开发了Coreseek 全文检索服务器,Coreseek应该是现在用的最多的sphinx中文全文检索,它提供了为Sphinx设计的中文分词包LibMMSeg包含mmseg中文分词,其实coreseek-3.2.14.tar.gz中已经包含了sphinx,前面安装SphinxSE时也可以使用这个压缩包里的mysqlse。
我们来看一下的安装过程:
安装autoconf
tar zxvf autoconf-2.64.tar.gz
cd autoconf-2.64
./configure --prefix=/usr
make
make install
cd ..
安装Coreseek
tar zxvf coreseek-3.2.14.tar.gz
cd coreseek-3.2.14
cd mmseg-3.2.14/
./bootstrap
./configure --prefix=/usr/local/mmseg3
make
make install
cd ../csft-3.2.14/
sh buildconf.sh
./configure --prefix=/usr/local/coreseek --without-python --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql --host=arm
make
make install
cd /usr/local/coreseek/etc
进入配置目录通过命令ls可以看到3个文件
example.sql  sphinx.conf.dist  sphinx-min.conf.dist
其中example.sql是示例sql脚本我们将其导入到数据库中的test数据库中作为测试数据(会创建两张表 documents和tags)
注:  一下内容中数据源source里的sql_query数据中的表名前缀应与安装时的表前缀一致
sql_db应与安装时的数据库名一致
vi sphinx.conf
输入以下内容
source src1
{
type = mysql
sql_host                        = localhost
sql_user                        = root
sql_pass                       = 12345678
sql_db                            = izhancms
sql_port                          = 3306 # optional, default is 3306
sql_sock                        = /tmp/mysql.sock
sql_query_pre              = SET NAMES utf8
sql_query                       = SELECT * FROM mo_maintable  LEFT JOIN  mo_article  \                      on mo_maintable.id = mo_article.maintable_id
sql_attr_uint                  = group_id
sql_attr_timestamp     = date_added
sql_query_info              = SELECT * FROM documents WHERE id=$id
}
index maintable
{
source                             = src1
path                                  = /usr/local/coreseek/var/data/maintable
docinfo                            = extern
charset_type                  = zh_cn.utf-8
mlock                               = 0
morphology                    = none
min_word_len               = 1
html_strip                       = 0
charset_dictpath           = /usr/local/mmseg3/etc/
ngram_len                     = 0
}
indexer
{
mem_limit                     = 32M
}
searchd
{
port                                  = 9312
log                                   = /usr/local/coreseek/var/log/searchd.log
query_log                      = /usr/local/coreseek/var/log/query.log
read_timeout                = 5
max_children                = 30
pid_file                            = /usr/local/coreseek/var/log/searchd.pid
max_matches               = 1000
seamless_rotate          = 1
preopen_indexes         = 0
unlink_old                      = 1
}
 
说明:
 
代码段source src1{***} 代表数据源里面主要包含了数据库的配置信息,src1表示数据源名字,可以随便写。
代码段index maintable{***} 代表为哪个数据源创建索引,与source *** 是成对出现的,其中的source参数的值必须是某一个数据源的名字。
 
启动sphinx命令:
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/sphinx.conf

生成索引命令:
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/sphinx.conf --all --rotate
其中参数--all表示生成所有索引

 
 
声明:
 
如果使用Sphinx请在后台系统设置>>全站搜索>>Sphinx配置里重新设置Sphinx的服务器IP地址,端口号就为默认的 9312