#!/usr/bin/env python # -*- coding: cp936 -*- # # Copyright 2007 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import webapp2 #from lxml import etree from google.appengine.api import urlfetch import re class MainHandler(webapp2.RequestHandler): def get(self): #self.response.write(urlfetch.fetch(\'https://www.cnblogs.com/\').content)#Get form_data = \'ssss=aaa&bb=ccc\' result = urlfetch.fetch(url=\'https://www.cnblogs.com\', payload=form_data, method=urlfetch.POST, follow_redirects=False, headers={\'Content-Type\': \'application/x-www-form-urlencoded\'}) #self.response.headers[\'Content-Type\'] = \'application/json\' html="<a href=http://www.likecs.com/\'javascript:void(0)\' onclick=http://www.likecs.com/\'opennewpage(\'54999\')\'>ddd</a><a onclickk=http://www.likecs.com/\'opennewpage(\'123456\')\'>aaa</a>" reg=r"opennewpage\(\'(\d+)\'\)"#正则 result=\'Result:<br />\' for m in list(set(re.findall(reg,html))): result=result+ m+\'<br />\' self.response.write(result) #ids= result.content.xpath("//a[start-with(@onclick,\'opennewpage\')]") #for i in ids: # print(i.text) #self.response.write(result.content) app = webapp2.WSGIApplication([ (\'/\', MainHandler) ], debug=True)
在GAE中用Python编写webapp进行Post数据采集
内容版权声明:除非注明,否则皆为本站原创文章。