当前位置:   article > 正文

mysql标记上具有语法错误_ProgrammingError:(1064,'您的SQL语法有错误;请查看与MySQL服务器版本相对应的手册以获得正确的语法...

sql检查与mysql服务器版本sno char(5)对应手册

我正在用python编写一个简单的爬行程序。所以,我用了MySQL和Python。但当我执行这个简单的程序时,会出现一个错误。然后,从web上爬网的内容不会在MySQL表上更新。此错误消息显示编程错误和语法错误。但我不认为我输入了错误的代码。因为问题点有HTML标记。错误消息中出现HTML选项卡的原因。我认为MySQL和Python之间有问题。这是错误信息。Traceback (most recent call last):

File "crawl.py", line 237, in

parseArticle( u )

File "crawl.py", line 166, in parseArticle

db.updateURL( url , contents )

File "crawl.py", line 206, in updateURL

self.cursor.execute("UPDATE urls SET state=%d,content='%s' WHERE url='%s'"%(state,content,url))

File "/usr/lib/python2.7/dist-packages/MySQLdb/cursors.py", line 174, in execute

self.errorhandler(self, exc, value)

File "/usr/lib/python2.7/dist-packages/MySQLdb/connections.py", line 36, in defaulterrorhandler

raise errorclass, errorvalue

ProgrammingError: (1064, 'You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'\xeb\x8f\x8b\xec\x9b\x80\', dotum, sans-serif; }\r\n\t//-->\r\n\t\n

def __init__(self):

self.conn = MySQLdb.connect(db='crawlDB', user='root', passwd='qltkd')

self.conn.query("set character_set_connection=utf8;")

self.conn.query("set character_set_server=utf8;")

self.conn.query("set character_set_client=utf8;")

self.conn.query("set character_set_results=utf8;")

self.conn.query("set character_set_database=utf8;")

self.cursor = self.conn.cursor()

self.cursor.execute('CREATE TABLE IF NOT EXISTS urls(url CHAR(150), state INT, content TEXT)')

def commit(self):

self.conn.commit()

def __del__(self):

self.conn.commit()

self.cursor.close()

def insertURL(self, url, state=0, content=None):

#'/' delete

if url[-1]=='/': url=url[:-1]

try:

self.cursor.execute("INSERT INTO urls VALUES ('%s',%d,'%s')"%(url,state,content))

except:

return 0

else:

return 1

def selectUncrawledURL(self):

self.cursor.execute("SELECT * FROM urls where state=0")

return [ row[0] for row in self.cursor.fetchall() ]

def updateURL(self, url, content, state=1):

if url[-1]=='/': url=url[:-1]

self.cursor.execute("UPDATE urls SET state=%d,content='%s' WHERE url='%s'"%(state,content,url))

def isCrawledURL(self, url):

if url[-1]=='/': url=url[:-1]

self.cursor.execute("SELECT COUNT(*) FROM urls WHERE url='%s' AND state=1"%url)

ret = self.cursor.fetchone()

return ret[0]

db = DB()

if __name__=='__main__':

print 'starting crawl.py...'

contents = getContent( mainpage )

URLs = getArticleInfo( BeautifulSoup( contents ) )

nSuccess = 0

for u in URLs:

nSuccess += db.insertURL( u )

print 'inserted %d new pages.'%nSuccess

while 1:

uncrawled_urls = db.selectUncrawledURL()

if not uncrawled_urls: break

for u in uncrawled_urls:

print 'downloading %s'%u

try:

parseArticle( u )

except:

traceback.print_exc()

db.updateURL( u, -1 )

db.commit()

#bs.UpdateIndex()

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Gausst松鼠会/article/detail/508982
推荐阅读
相关标签
  

闽ICP备14008679号