首页 / 数据库 / MySQL / 使用Django来清理数据库中的数据
数据库中的数据清理问题描述:在我所使用的系统中,由于历史和各种原因,导致数据库中表里面记录的数据,是有问题的,有重复的和不完整的数据解决方案:首先,由于这些数据的量还是挺大的,手工的清理肯定不行,然后,我就想写SQL脚本来按照约定的规则进行更新,可以利用游标来完成表中的记录的遍历,但是SQL是面向结构化的查询语言,不是面向过程的,所以虽然可以但是没有C和python这样的面向过程的使用方便,后来,我想直接在我的项目中新建一个方法,然后通过浏览器的地址栏来调用,就可以了。PS:虽然说Django的orm很方便,但是自己使用起来还是非常的尴尬,一些筛选条件和语法规则,我还是得在网上查找例子才知道怎么用幸好提供了直接执行SQL语句方法,我在清理的过程中,用的就是执行原生的SQL语句。代码如下:
def datetimestr():
return datetime.now().strftime("%Y%m%d-%H%M%S")+">>>" def update_dcData(req):
log_path="apps/dc/l10n_reports"
update_dcData_log=open(log_path+"updateDcdataLog.log","w+") sql_getProjectIDs="select a.project_id from"
" (select count(*) num,project_id from dc_data where lableName=%s group by project_id) a,"
" (select count(*) num ,project_id from management_project_target_lang group by project_id) b"
" where a.project_id=b.project_id and a.num!=b.num order by project_id"
sql_getAllProjectIDs="select project_id from management_project"
sql_getLanguageSections="select b.name from management_project_target_lang a,management_l10nlanguage b"
" where a.project_id=%s and a.l10nlanguage_id=b.id and b.id!="1""
sql_getRecords="select id, languageSection,value from dc_data where lableName =%s and project_id=%s and important="1""
sql_addRecordByID="insert into dc_data(lableName,languageSection,type,value,project_id,task_id,"
"important,unit,settlement,workload) "
"select lableName,languageSection,type,value,project_id,task_id,important,unit,settlement,workload "
"from dc_data where id=%s"
sql_updateLgs="update dc_data set languageSection=%s where id=%s"
sql_getLableNames="select lableName from dc_data where lableName like "%%_all" group by lableName"
update_dcData_log.write(datetimestr()+"sql_getLableNames"+">>>"+sql_getLableNames+"
")
update_dcData_log.write(datetimestr()+"sql_getRecords"+">>>"+sql_getRecords+"
")
update_dcData_log.write(datetimestr()+"sql_getProjectIDs"+">>>"+sql_getProjectIDs+"
")
update_dcData_log.write(datetimestr()+"sql_getLanguageSections"+">>>"+sql_getLanguageSections+"
")
update_dcData_log.write(datetimestr()+"sql_addRecordByID"+">>>"+sql_addRecordByID+"
")
update_dcData_log.write(datetimestr()+"sql_updateLgs"+">>>"+sql_updateLgs+"
")
context=Context({"msg":"Success"})
resp=render_to_response("report/clean_data.html", context,
context_instance=RequestContext(req))
cursor=connection.cursor()
try:
cursor.execute(sql_getLableNames)
lableNames=cursor.fetchall()
except Exception,e:
update_dcData_log.write(datetimestr()+"execute sql_getLableNames error "+str(e)+"
")
context=Context({"msg":"Error"})
return render_to_response("report/clean_data.html", context,
context_instance=RequestContext(req))
for lableName in lableNames:
try:
cursor.execute(sql_getProjectIDs,[lableName[0]])
projectIDs=cursor.fetchall()
except Exception,e:
update_dcData_log.write(datetimestr()+"execute sql_getProjectIDs error "+str(e)+"
")
context=Context({"msg":"Error"})
return render_to_response("report/clean_data.html", context,
context_instance=RequestContext(req))
for pid in projectIDs:
try:
cursor.execute(sql_getRecords,[lableName[0],str(pid[0])])
records=cursor.fetchall()
cursor.execute(sql_getLanguageSections,[str(pid[0])])
languageSections=cursor.fetchall()
except Exception,e:
update_dcData_log.write(datetimestr()+"execute sql_getRecords or sql_getLanguageSections error "+str(e)+"
")
context=Context({"msg":"Error"})
return render_to_response("report/clean_data.html", context,
context_instance=RequestContext(req))
values,lgs=[],[]
baseValue=str(records[0][2])
baseID=str(records[0][0])
for item in records:
lgs.append(str(item[1]))
values.append(str(item[2]))
if baseValue!=str(item[2]):
baseValue="false"
targetLgs=[str(item[0]) for item in languageSections]
if len(lgs)<1 or len(targetLgs)<1:
baseValue=="false"
if "all" not in lgs:
try:
cursor.execute(sql_addRecordByID,[baseID])
cursor.execute(sql_updateLgs,["all",baseID])
transaction.commit_unless_managed()
except Exception,e:
update_dcData_log.write(datetimestr()+"execute sql_addRecordByID or sql_updateLgs error (all)"+str(e)+"
")
context=Context({"msg":"Error"})
return render_to_response("report/clean_data.html", context,
context_instance=RequestContext(req))
update_dcData_log.write(datetimestr()+"all record is add into dc_data,the lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+"
")
if baseValue=="false":
update_dcData_log.write(datetimestr()+"please update this record mutually,the lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+"
")
else:
if len(lgs)>len(targetLgs):
update_dcData_log.write(datetimestr()+"the lableName languageSection length is longer than target numbers lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+"
")
else:
for lg in targetLgs:
if lg not in lgs:
try:
cursor.execute(sql_addRecordByID,[baseID])
cursor.execute(sql_updateLgs,[lg,baseID])
transaction.commit_unless_managed()
except Exception,e:
update_dcData_log.write(datetimestr()+"execute sql_addRecordByID or sql_updateLgs error (lg)"+str(e)+"
")
context=Context({"msg":"Error"})
return render_to_response("report/clean_data.html", context,
context_instance=RequestContext(req))
update_dcData_log.write(datetimestr()+lg+" record is add into dc_data,the lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+"
")
update_dcData_log.close()
return resp --------------------------------------分割线 --------------------------------------Ubuntu Server 12.04 安装Nginx+uWSGI+Django环境 http://www.linuxidc.com/Linux/2012-05/60639.htmDjango+Nginx+uWSGI 部署 http://www.linuxidc.com/Linux/2013-02/79862.htmDjango实战教程 http://www.linuxidc.com/Linux/2013-09/90277.htmDjango Python MySQL Linux 开发环境搭建 http://www.linuxidc.com/Linux/2013-09/90638.htmDjango 的详细介绍:请点这里
Django 的下载地址:请点这里本文永久更新链接地址