Welcome 微信登录

首页 / 数据库 / MySQL / 使用Django来清理数据库中的数据

数据库中的数据清理问题描述:在我所使用的系统中,由于历史和各种原因,导致数据库中表里面记录的数据,是有问题的,有重复的和不完整的数据解决方案:首先,由于这些数据的量还是挺大的,手工的清理肯定不行,然后,我就想写SQL脚本来按照约定的规则进行更新,可以利用游标来完成表中的记录的遍历,但是SQL是面向结构化的查询语言,不是面向过程的,所以虽然可以但是没有C和python这样的面向过程的使用方便,后来,我想直接在我的项目中新建一个方法,然后通过浏览器的地址栏来调用,就可以了。PS:虽然说Django的orm很方便,但是自己使用起来还是非常的尴尬,一些筛选条件和语法规则,我还是得在网上查找例子才知道怎么用幸好提供了直接执行SQL语句方法,我在清理的过程中,用的就是执行原生的SQL语句。代码如下: 
def datetimestr():
    return datetime.now().strftime("%Y%m%d-%H%M%S")+">>>"  def update_dcData(req):
   
    log_path="apps/dc/l10n_reports"
    update_dcData_log=open(log_path+"updateDcdataLog.log","w+")    sql_getProjectIDs="select a.project_id from"
                     " (select count(*) num,project_id from dc_data where lableName=%s group by project_id) a,"
                     " (select count(*) num ,project_id from management_project_target_lang group by project_id) b"
                     " where a.project_id=b.project_id and a.num!=b.num order by project_id"
   
    sql_getAllProjectIDs="select project_id from management_project"
   
    sql_getLanguageSections="select b.name from management_project_target_lang a,management_l10nlanguage b"
                            " where a.project_id=%s and a.l10nlanguage_id=b.id and b.id!="1""
                           
    sql_getRecords="select id, languageSection,value from dc_data where lableName =%s and project_id=%s and important="1""
   
    sql_addRecordByID="insert into dc_data(lableName,languageSection,type,value,project_id,task_id,"
                "important,unit,settlement,workload) "
                "select lableName,languageSection,type,value,project_id,task_id,important,unit,settlement,workload "
                "from dc_data where id=%s"
    sql_updateLgs="update dc_data set languageSection=%s where id=%s"
   
    sql_getLableNames="select lableName from dc_data where lableName like "%%_all" group by lableName"
   
    update_dcData_log.write(datetimestr()+"sql_getLableNames"+">>>"+sql_getLableNames+" ")
    update_dcData_log.write(datetimestr()+"sql_getRecords"+">>>"+sql_getRecords+" ")
    update_dcData_log.write(datetimestr()+"sql_getProjectIDs"+">>>"+sql_getProjectIDs+" ")
    update_dcData_log.write(datetimestr()+"sql_getLanguageSections"+">>>"+sql_getLanguageSections+" ")   
    update_dcData_log.write(datetimestr()+"sql_addRecordByID"+">>>"+sql_addRecordByID+" ")   
    update_dcData_log.write(datetimestr()+"sql_updateLgs"+">>>"+sql_updateLgs+" ")   
    context=Context({"msg":"Success"})
    resp=render_to_response("report/clean_data.html", context,
                              context_instance=RequestContext(req))   
    cursor=connection.cursor()
    try:
        cursor.execute(sql_getLableNames)
        lableNames=cursor.fetchall()
    except Exception,e:
        update_dcData_log.write(datetimestr()+"execute sql_getLableNames error "+str(e)+" ")
        context=Context({"msg":"Error"})
        return render_to_response("report/clean_data.html", context,
                              context_instance=RequestContext(req))
    for lableName in lableNames:
        try:
            cursor.execute(sql_getProjectIDs,[lableName[0]])
            projectIDs=cursor.fetchall()
        except Exception,e:
            update_dcData_log.write(datetimestr()+"execute sql_getProjectIDs error "+str(e)+" ")
            context=Context({"msg":"Error"})
            return render_to_response("report/clean_data.html", context,
                              context_instance=RequestContext(req))
        for pid in projectIDs:
            try:
                cursor.execute(sql_getRecords,[lableName[0],str(pid[0])])
                records=cursor.fetchall()
                cursor.execute(sql_getLanguageSections,[str(pid[0])])
                languageSections=cursor.fetchall()
            except Exception,e:
                update_dcData_log.write(datetimestr()+"execute sql_getRecords or sql_getLanguageSections error "+str(e)+" ")
                context=Context({"msg":"Error"})
                return render_to_response("report/clean_data.html", context,
                              context_instance=RequestContext(req))
            values,lgs=[],[]
            baseValue=str(records[0][2])
            baseID=str(records[0][0])
            for item in records:
                lgs.append(str(item[1]))
                values.append(str(item[2]))
                if baseValue!=str(item[2]):
                    baseValue="false"
            targetLgs=[str(item[0]) for item in languageSections]
            if len(lgs)<1 or len(targetLgs)<1:
                baseValue=="false"
            if "all" not in lgs:
                try:
                    cursor.execute(sql_addRecordByID,[baseID])
                    cursor.execute(sql_updateLgs,["all",baseID])
                    transaction.commit_unless_managed()
                except Exception,e:
                    update_dcData_log.write(datetimestr()+"execute sql_addRecordByID or sql_updateLgs error (all)"+str(e)+" ")
                    context=Context({"msg":"Error"})
                    return render_to_response("report/clean_data.html", context,
                                  context_instance=RequestContext(req))
                       
                    update_dcData_log.write(datetimestr()+"all record is add into dc_data,the lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+" ")
           
            if baseValue=="false":
                update_dcData_log.write(datetimestr()+"please update this record mutually,the lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+" ")
            else:
                if len(lgs)>len(targetLgs):
                    update_dcData_log.write(datetimestr()+"the lableName languageSection length is longer than target numbers lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+" ")
                else:
                    for lg in targetLgs:
                        if lg not in lgs:
                            try:
                                cursor.execute(sql_addRecordByID,[baseID])
                                cursor.execute(sql_updateLgs,[lg,baseID])
                                transaction.commit_unless_managed()
                            except Exception,e:
                                update_dcData_log.write(datetimestr()+"execute sql_addRecordByID or sql_updateLgs error (lg)"+str(e)+" ")
                                context=Context({"msg":"Error"})
                                return render_to_response("report/clean_data.html", context,
                                  context_instance=RequestContext(req))
                               
                            update_dcData_log.write(datetimestr()+lg+" record is add into dc_data,the lableName and projectID were "+str(lableName[0])+"-"+str(pid[0])+" ")
                       
                   
    update_dcData_log.close()
           
           
    return  resp  --------------------------------------分割线 --------------------------------------Ubuntu Server 12.04 安装Nginx+uWSGI+Django环境 http://www.linuxidc.com/Linux/2012-05/60639.htmDjango+Nginx+uWSGI 部署 http://www.linuxidc.com/Linux/2013-02/79862.htmDjango实战教程 http://www.linuxidc.com/Linux/2013-09/90277.htmDjango Python MySQL Linux 开发环境搭建 http://www.linuxidc.com/Linux/2013-09/90638.htmDjango 的详细介绍:请点这里
Django 的下载地址:请点这里本文永久更新链接地址