实验三:Cassandra 安装与 CQL 操作实践
一、实验环境(Docker 部署 Cassandra)
1. 前提条件
- 已安装 Docker Desktop
- 操作系统:Windows 10/11
- 网络可访问国内 Docker 镜像仓库
2. 拉取 Cassandra 镜像(国内华为云镜像)
docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/cassandra:3.11.6

3. 为镜像打标签(简化使用)
docker tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/cassandra:3.11.6 docker.io/library/cassandra:3.11.6
4. 启动 Cassandra 容器(单节点)
docker run -d --name my-cassandra -p 9042:9042 cassandra:3.11.6

5. 查看容器启动状态
docker ps
6. 查看 Cassandra 启动日志
docker logs -f my-cassandra
出现以下日志表示服务启动成功:
Starting listening for CQL clients on /0.0.0.0:9042
Created default superuser role 'cassandra'

7. 进入 Cassandra 容器并连接 CQLSH
docker exec -it my-cassandra cqlsh
成功进入后显示:
cqlsh>

二、键空间与列族操作
以下命令均在 cqlsh 中运行
1. 创建键空间
CREATE KEYSPACE teaching WITH replication =
{'class':'SimpleStrategy','replication_factor':1};
进入键空间
USE teaching;
查看键空间的信息
DESCRIBE KEYSPACE teaching;

2. 创建用户定义类型(UDT)
CREATE TYPE teaching.contact_info (
phone text,
city text
);
3. 创建复杂类型表
CREATE TABLE teaching.student_profile (
student_id text,
tags set<text>,
notes list<text>,
score_parts map<text,int>,
emergency frozen<contact_info>,
PRIMARY KEY ((student_id))
);
4. 批量插入复杂类型数据
INSERT INTO teaching.student_profile (student_id, tags, notes, score_parts, emergency)
VALUES ('2023001', {'必修','核心'}, ['已预习','课堂签到'], {'平时':88,'期中':84,'期末':92},
{phone:'13800000001', city:'南京'});INSERT INTO teaching.student_profile (student_id, tags, notes, score_parts, emergency)
VALUES ('2023002', {'选修'}, ['预习'], {'平时':80,'期中':78,'期末':85}, {phone:'13800000002',
city:'上海'});INSERT INTO teaching.student_profile (student_id, tags, notes, score_parts, emergency)
VALUES ('2023003', {'必修'}, ['课堂签到'], {'平时':90,'期中':92,'期末':88},
{phone:'13800000003', city:'北京'});INSERT INTO teaching.student_profile (student_id, tags, notes, score_parts, emergency)
VALUES ('2023004', {'核心'}, ['作业完成'], {'平时':75,'期中':80,'期末':70},
{phone:'13800000004', city:'广州'});INSERT INTO teaching.student_profile (student_id, tags, notes, score_parts, emergency)
VALUES ('2023005', {'选修','核心'}, ['已预习','作业完成'], {'平时':88,'期中':90,'期末':85},
{phone:'13800000005', city:'深圳'});
5. 查询复杂类型数据
SELECT * FROM teaching.student_profile;
SELECT student_id, tags, notes FROM teaching.student_profile;

三、核心表建模与操作
1. 按学生查询选课表
CREATE TABLE teaching.enrollments_by_student (
student_id text,
semester text,
course_id text,
course_name text,
teacher text,
credits int,
score int,
status text,
selected_at timestamp,
student_name text static,
major text static,
PRIMARY KEY ((student_id), semester, course_id)
) WITH CLUSTERING ORDER BY (semester DESC, course_id ASC);
2. 按课程查询学生表
CREATE TABLE teaching.students_by_course (
course_id text,
semester text,
student_id text,
student_name text,
major text,
score int,
status text,
PRIMARY KEY ((course_id, semester), student_id)
);
3. 批量插入样例数据
USE teaching;BEGIN BATCH
INSERT INTO enrollments_by_student (student_id, semester, course_id, course_name, teacher,
credits, score, status, selected_at, student_name, major)
VALUES ('2023001','2025-Fall','DB101','数据库概论','李老师',3,92,'已结课','2025-09-01 10:00:00','张三','数据科学');INSERT INTO enrollments_by_student (student_id, semester, course_id, course_name, teacher,
credits, score, status, selected_at, student_name, major)
VALUES ('2023002','2025-Fall','DB101','数据库概论','李老师',3,85,'已结课','2025-09-01 10:05:00','李四','软件工程');INSERT INTO enrollments_by_student (student_id, semester, course_id, course_name, teacher,
credits, score, status, selected_at, student_name, major)
VALUES ('2023003','2025-Fall','DB101','数据库概论','李老师',3,89,'已结课','2025-09-01 10:10:00','王五','人工智能');INSERT INTO enrollments_by_student (student_id, semester, course_id, course_name, teacher,
credits, score, status, selected_at, student_name, major)
VALUES ('2023004','2025-Fall','DS102','数据结构','王老师',4,75,'已结课','2025-09-01 11:00:00','赵六','数据科学');INSERT INTO enrollments_by_student (student_id, semester, course_id, course_name, teacher,
credits, score, status, selected_at, student_name, major)
VALUES ('2023005','2025-Fall','DS102','数据结构','王老师',4,88,'已结课','2025-09-01 11:05:00','钱七','软件工程');INSERT INTO students_by_course (course_id, semester, student_id, student_name, major, score,
status)
VALUES ('DB101','2025-Fall','2023001','张三','数据科学',92,'已结课');INSERT INTO students_by_course (course_id, semester, student_id, student_name, major, score,
status)
VALUES ('DB101','2025-Fall','2023002','李四','软件工程',85,'已结课');INSERT INTO students_by_course (course_id, semester, student_id, student_name, major, score,
status)
VALUES ('DB101','2025-Fall','2023003','王五','人工智能',89,'已结课');INSERT INTO students_by_course (course_id, semester, student_id, student_name, major, score,
status)
VALUES ('DS102','2025-Fall','2023004','赵六','数据科学',75,'已结课');INSERT INTO students_by_course (course_id, semester, student_id, student_name, major, score,
status)
VALUES ('DS102','2025-Fall','2023005','钱七','软件工程',88,'已结课');
APPLY BATCH;
4. 查询操作
SELECT * FROM enrollments_by_student WHERE student_id='2023001';SELECT * FROM students_by_course WHERE course_id='DB101' AND semester='2025-Fall';SELECT course_id, student_id, score FROM students_by_course WHERE course_id='DS102' AND
semester='2025-Fall' ORDER BY student_id ASC;


5. 更新、删除操作
-- 更新前查看值
SELECT * FROM enrollments_by_student WHERE student_id='2023001' AND semester='2025-Fall' AND
course_id='DB101';-- 更新语句
UPDATE enrollments_by_student SET score=95 WHERE student_id='2023001' AND semester='2025-Fall' AND course_id='DB101';-- 更新之后再查看一次
SELECT * FROM enrollments_by_student WHERE student_id='2023001' AND semester='2025-Fall' AND
course_id='DB101';-- 删除前查看
SELECT * FROM enrollments_by_student WHERE student_id='2023005' AND semester='2025-Fall'
AND course_id='DS102';-- 删除语句
DELETE FROM enrollments_by_student WHERE student_id='2023005' AND semester='2025-Fall' AND
course_id='DS102';-- 删除后再查看一次
SELECT * FROM enrollments_by_student WHERE student_id='2023005' AND semester='2025-Fall'
AND course_id='DS102';

6. TTL 与 WRITETIME
-- 设置time to live
UPDATE enrollments_by_student USING TTL 86400 SET status='待归档' WHERE student_id='2023002'
AND semester='2025-Fall' AND course_id='DB101';-- 查看剩余时间
SELECT status, TTL(status) FROM enrollments_by_student WHERE student_id='2023002' AND
semester='2025-Fall' AND course_id='DB101';-- 查询时输出WRITETIME
SELECT score, WRITETIME(score) FROM enrollments_by_student WHERE student_id='2023001' AND
semester='2025-Fall' AND course_id='DB101';

四、命令编写题
1. 创建 teachers 表
USE teaching;
CREATE TABLE teachers (
teacher_id int,
name text,
courses list<text>,
PRIMARY KEY (teacher_id)
);
2. 插入两位老师信息
INSERT INTO teachers (teacher_id, name, courses) VALUES (1001, 'Zhang Wei', ['Java', 'Python']);
INSERT INTO teachers (teacher_id, name, courses) VALUES (1002, 'Li Ming', ['Cassandra', 'Redis']);
3. 查询 teachers 表中所有老师教授的课程列表
SELECT name, courses FROM teachers;

4. 查询 students_by_course 表中所有成绩大于85分的学生姓名、专业和成绩
SELECT student_name, major, score FROM students_by_course WHERE score > 85 ALLOW FILTERING;

5. 更新 students_by_course 表中“软件工程”专业学生的成绩 +5 分
UPDATE students_by_course SET score = score + 5 WHERE major = '软件工程' ALLOW FILTERING;

五、巩固练习题(课后作业)
5.1 概念理解题
-
列族数据库与关系数据库的差异是什么?
关系型数据库采用二维表结构,有严格表结构、支持外键、事务、复杂关联查询,数据模型固定;列族数据库(Cassandra)为分布式列式存储,无严格固定表结构,横向扩展能力强,侧重高可用、高写入性能,不支持传统事务与外键,查询依赖主键设计,适合海量数据、高并发场景。 -
Cassandra 的主键是什么?有什么作用?
Cassandra 主键分为分区键和聚簇列;分区键用于决定数据存储在集群哪个节点,实现数据分片;聚簇列用于分区内数据排序与范围查询。主键是数据唯一标识,增删改查必须依托主键定位数据。 -
解释 FROZEN 关键字的作用
frozen用于冻结集合、UDT 等复杂数据类型,将其视为整体不可拆分。被冻结的字段无法单独更新内部元素,只能整体修改,可用于建立索引,提升查询效率。 -
描述 R、W、N 与一致性、可用性之间关系
N 为副本数量,R 为读一致性级别,W 为写一致性级别。
- R+W > N:保证强一致性,牺牲部分可用性;
- R+W ≤ N:保证高可用性,无法做到强一致性;
调整三者数值可在一致性与可用性之间做权衡。
- Gossip 协议的作用是什么?
Gossip 是节点间的通信协议,Cassandra 集群中节点互相周期性同步状态、元数据、拓扑信息,让所有节点感知集群整体状态,实现集群自发现、故障感知与数据同步。
5.2 CQL 命令理解题
-
SELECT * FROM enrollments_by_student WHERE student_id='2023001';
作用:根据分区键student_id查询该学生的所有选课记录,返回该行所有字段数据。
结果:输出学号为 2023001 学生的全部选课信息。 -
UPDATE enrollments_by_student SET score=88 WHERE student_id='2023002' AND semester='2026-Spring';
作用:修改指定学生、指定学期课程的成绩为88分。
结果:表中无2023002+2026-Spring这条数据,执行后无数据被更新。 -
DELETE FROM enrollments_by_student WHERE student_id='2023003';
作用:根据分区键删除该学生下所有对应数据。
结果:学号 2023003 的全部选课记录被删除。 -
CREATE INDEX idx_course ON enrollments_by_student (course_id);
作用:针对course_id字段创建二级索引。
结果:成功创建名为idx_course的索引,可基于课程编号做条件查询。