词云: Python爬取国际时事

前置工具

python
wordcloud
jieba
BeautifulSoup
matplotlib
scipy

第一步: 爬取国际时事列表

待爬地址: http://m.sohu.com/cr/57/?page=1&v=2

首先我们可以观察到,每点击列表中的下一页时, page 会加一

然后我们就可以确认,想获取多少条信息,直接替换page属性的值即可

然后我们观察想要爬取的内容:

审查元素:

我们发现文本都是在 div(class=”bd3 pb1″) -> div -> p -> a 标签下的:

编写代码

爬取数据并保存在data.txt中:

# coding: utf-8

from wordcloud import WordCloud
import requests
from bs4 import BeautifulSoup
import re

def getHTMLText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        pass

def has_p_a(tag):
    pass

def getWannaData(stockURL,res):
    html = getHTMLText(stockURL)
    soup = BeautifulSoup(html,'html.parser')
    p = soup.find('div',class_="bd3 pb1").find_all('a')
    for q in p:
        res.append(q.text)

res = []
maxn = 100
for i in range(1,maxn):
    getWannaData('http://m.sohu.com/cr/57/?page='+str(i)+'&v=2',res)

file = open('data.txt','a+')
for q in res:
    file.write(q+'\n')

其中maxn是控制爬取多少页的

data.txt 部分内容:

第二步: 生成词云

前置

因为要进行中文分词,所以要用jieba
注意再打开data.txt编码问题
还有ttf不能保存在有中文的路径下

背景图片

我们选择 水伊布.png

生成词云

容我说一句,在中国相对封闭的网络环境中,已经可以看到世界如此的乱了,全部的词条大部分是消极的…看起来大规模战争结束的时间太久了…(还是说,世界就没有安宁过)

这张图可以找到安倍

机器学习实战(一) K-近邻

K临近算法概述

简单地说,k临近算法就是采用不同的特征值之间的距离方法进行分类.
通过数据与数据集间的距离进行分类,以及断定新数据的类别.

这里我们选择使用欧氏距离来当做两点间的距离.

实现KNN算法

伪码

对未知类别属性的数据集中的每个点依次执行以下操作

计算已知类别数据集中的点

按照距离递增次序排序

选取与当前点距离最小的k个点

确定前k个点所在的类别的出现频率

返回前k个点出现频率最高的类别作为当前点的预测分类

实现算法前

我们来学习一下需要用到的一些库函数.

numpy

1.list转array

from numpy import *
array([1,1])

2.zeros()初始化向量

import numpy
from numpy import *
a=(3,4)
zeros(a)
# 初始化一个3行四列的0矩阵

3.矩阵操作

import numpy
from numpy import *

Mat = array([[1,2],[3,4]])

# 每行最小
Mat.min(0)
# 每列最小
Mat.min(1)
# 每行和
Mat.sum(0)
# 上面传递的参数都是axis=1 or 0,0代表行,1代表列

# shape返回一个tuple,代表矩阵的行数和列数
Mat.shape

3.1矩阵排序argsort()

import numpy
from numpy import *

k = array([1,2,8.5,-1,0])
t = k.argsort()
# 输出升序排序后每位数字的下标数组

输出升序排序后每位数字的下标数组,比如上面那个输出是:

array([3,4,0,1,2],dtype=int64)
# 第一个是k[3],第二个是k[4]

4.tile

import numpy
from numpy import *

# 有两个参数,第一个参数是初始矩阵,第二个参数是一个tuple,代表
# 向行拓展次数,以及向列拓展次数,具体调用一下就知道了
tile([1,2],(1))# 原矩阵
tile([1,2],(2,2))# 行两倍,列两倍

5.运算
直接使用运算符号,是相当于每行与每列进行运算.
真正的矩阵运算需要通过库来实现.

数据读取

与本例相关的数据集地址:
datingTestSet2.txt

# 打开数据文件
fr = open('datingTestSet2.txt')
# 按行读取
arrayOfLines = fr.readlines()
arrayOfLines

from numpy import *
numberOfLines = len(arrayOfLines)
# 生成与数据集相同列数的矩阵
returnMat = zeros((numberOfLines,3))
returnMat
# 格式化读入,存储到矩阵中
for line in arrayOfLines:
    line = line.strip()
    print(line.split('\t'))
    print(int(line.split('\t')[-1]))

matplotlib散点图

import matplotlib
import matplotlib.pyplot as plt
import numpy
from numpy import *
# 生成plt
fig = plt.figure()
# 规定最多111个点
ax = fig.add_subplot(111)
# 创建一个矩阵,第三个代表类别
Mat = array([[1,123,2],[10,256,1],[7,321,3]])
# 获取类别矩阵
Label = Mat[:,2]
# 第一个参数横坐标,第二个参数纵坐标,第三个参数,颜色矩阵,第三个参数,大小矩阵
ax.scatter(Mat[:,0],Mat[:,1],15.0*Label,15.0*Label)
# 绘制
plt.show()

代码实现

对于代码的解释我都注释在代码中了

# K-近邻
'''
算法思想:

计算已知类别数据集中的点

按照距离递增次序排序

选取与当前点距离最小的k个点

确定前k个点所在的类别的出现频率

返回前k个点出现频率最高的类别作为当前点的预测分类
'''

def classify0(inX,dataSet,labels,k):
    '''
    k-邻近算法
    inX:测试数据 - array
    dataSet:样本数据集 - array
    labels:标签向量 - array
    k: 选举前k个 - int
    '''
    # 获取数据集的列数
    dataSetSize = dataSet.shape[0]
    # 新建一个矩阵,将测试数据inX复制到每列上,以便计算距离
    diffMat = tile(inX,(dataSetSize,1)) - dataSet
    # 对每个指标的距离进行平方
    sqDiffMat = diffMat**2
    # 把每个指标的差方相加
    sqDistance = sqDiffMat.sum(0)
    # 计算inX与每个点的距离
    distance = sqDistance**0.5
    # 升序排序,返回排序后的下标矩阵
    sortedDistIndicies = distance.argsort()

    # 选择距离最小的k个点
    classCount = {}
    for i in range(k):
        # 选取前k个距离最近的点中的第i个
        voteIlabel = labels[sortedDistIndicies[i]]
        # 映射到dict中,其中get的第二个参数是如果不存在的默认值
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
    # dict.items()返回一个字典列表(dict_items)类型,即dict的原始插入顺序的list
    # 可以直接用sorted排序
    # 升序,其中operator.itemgetter(index)代表按照待排列表的第几个元素排序.
    # reverse=True即变成了降序
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
    # 返回分类中频率最高的那个的标签
    return sortedClassCount[0][0]

可视化分析

import numpy
from numpy import *
# 将测试数据转换为需要的类型
def file2matrix(filename):
    '''
    对于datingTestSet2.txt返回值类型
    returnMat: [里程数,百分比,公升数]
    --每年获得的飞行常客里程数
    --玩视频游戏所耗时间百分比
    --每周消耗的冰淇淋公升数
    classLabelVector: [标签]
    --1,2,3分别代表最好,其次,最次
    '''
    fr = open(filename)
    arrayOLines = fr.readlines()
    # 得到文件行数
    numberOfLines = len(arrayOLines)
    # 新建(文件行数,3列)的0 array
    returnMat = zeros((numberOfLines,3))
    classLabelVector = []
    index = 0
    # 处理数据
    for line in arrayOLines:
        line = line.strip()
        listFromLine = line.split('\t')
        # 将数据加入返回的列表中
        returnMat[index,:] = listFromLine[:3]
        # 标签列表
        classLabelVector.append(int(listFromLine[-1]))
        index+=1
    return returnMat,classLabelVector

import matplotlib
import matplotlib.pyplot as plt

datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:,0],datingDataMat[:,1],15.0*array(datingLabels),15.0*array(datingLabels))
plt.show()

最后的结果如下:

归一化数值

我们可以发现,在数据集中,每种类的数据极差差距都很大,比如飞行常客里程数的极差,和每周消费冰淇淋公升数的极差相距交大.

所以我们尝试将不同的数据集按照相同的区间范围进行计算.

计算公式(和百分制化为150分制的道理一样):
newValue = (OldValue-min)/(max-min)
其中min和max代表数据集中的最小特征值和最大特征值.
是用这个公式后的数值将统一变成0~1或者-1~1之间.

代码

def autoNorm(dataSet):
    '''
    归一化数值
    返回值
    normDataSet:归一化后数值 - array
    ranges:每类特征极差 - array
    minVals:每类特征最小值 - array
    '''
    # numpy数组 .min(0)每列最小值
    # .min(1)每行最小值
    minVals = dataSet.min(0)
    maxVals = dataSet.max(0)
    ranges = maxVals - minVals
    normDataSet = zeros(shape(dataSet))
    m = dataSet.shape[0]
    normDataSet = dataSet - tile(minVals,(m,1))
    normDataSet = normDataSet/tile(ranges,(m,1))
    return normDataSet,ranges,minVals

对约会网站的测试

最后我们对之前的datingTestSet2.txt进行误差测试
其中hoRatio代表对数据集的测试普及率.
这里用0.1即1000*0.1=100个样本数据进行测试.

def datingClassTest():
    hoRatio = 0.10
    datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')
    normMat,ranges,minVals = autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m*hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],\
                                    datingLabels[numTestVecs:m],3)
        print('the classifier came back with: %d,the real answer is: %d'\
              % (classifierResult,datingLabels[i]))
        if(classifierResult != datingLabels[i]): errorCount += 1.0
    print('the total error rate is: %f' % (errorCount/float(numTestVecs)))

datingClassTest()

测试结果:

the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 2,the real answer is: 1
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 3,the real answer is: 2
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 2,the real answer is: 2
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 3,the real answer is: 2
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 2,the real answer is: 1
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 2,the real answer is: 3
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 3,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 2,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 3,the real answer is: 3
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 2,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 2,the real answer is: 1
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 1,the real answer is: 1
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 3
the classifier came back with: 1,the real answer is: 2
the classifier came back with: 3,the real answer is: 1
the classifier came back with: 1,the real answer is: 1
the total error rate is: 0.600000

Done,And thank you for watching!

Pygame constants.py

一些按键相关,以及一些屏幕处理的常量

# encoding: utf-8
# module pygame.constants
# from D:\Software\AnAconda\lib\site-packages\pygame\constants.cp36-win_amd64.pyd
# by generator 1.145
""" Constants defined by SDL and needed in Pygame. """
# no imports

# Variables with simple values

ACTIVEEVENT = 1

ANYFORMAT = 268435456

ASYNCBLIT = 4

AUDIO_S16 = 32784
AUDIO_S16LSB = 32784
AUDIO_S16MSB = 36880
AUDIO_S16SYS = 32784
AUDIO_S8 = 32776
AUDIO_U16 = 16
AUDIO_U16LSB = 16
AUDIO_U16MSB = 4112
AUDIO_U16SYS = 16
AUDIO_U8 = 8

BIG_ENDIAN = 4321

BLEND_ADD = 1
BLEND_MAX = 5
BLEND_MIN = 4
BLEND_MULT = 3
BLEND_PREMULTIPLIED = 17

BLEND_RGBA_ADD = 6
BLEND_RGBA_MAX = 16
BLEND_RGBA_MIN = 9
BLEND_RGBA_MULT = 8
BLEND_RGBA_SUB = 7

BLEND_RGB_ADD = 1
BLEND_RGB_MAX = 5
BLEND_RGB_MIN = 4
BLEND_RGB_MULT = 3
BLEND_RGB_SUB = 2

BLEND_SUB = 2

BUTTON_X1 = 6
BUTTON_X2 = 7

DOUBLEBUF = 1073741824

FULLSCREEN = -2147483648

GL_ACCELERATED_VISUAL = 15

GL_ACCUM_ALPHA_SIZE = 11

GL_ACCUM_BLUE_SIZE = 10

GL_ACCUM_GREEN_SIZE = 9

GL_ACCUM_RED_SIZE = 8

GL_ALPHA_SIZE = 3

GL_BLUE_SIZE = 2

GL_BUFFER_SIZE = 4

GL_DEPTH_SIZE = 6

GL_DOUBLEBUFFER = 5

GL_GREEN_SIZE = 1

GL_MULTISAMPLEBUFFERS = 13
GL_MULTISAMPLESAMPLES = 14

GL_RED_SIZE = 0

GL_STENCIL_SIZE = 7

GL_STEREO = 12

GL_SWAP_CONTROL = 16

HAT_CENTERED = 0
HAT_DOWN = 4
HAT_LEFT = 8
HAT_LEFTDOWN = 12
HAT_LEFTUP = 9
HAT_RIGHT = 2
HAT_RIGHTDOWN = 6
HAT_RIGHTUP = 3
HAT_UP = 1

HWACCEL = 256
HWPALETTE = 536870912
HWSURFACE = 1

IYUV_OVERLAY = 1448433993

JOYAXISMOTION = 7
JOYBALLMOTION = 8
JOYBUTTONDOWN = 10
JOYBUTTONUP = 11
JOYHATMOTION = 9

KEYDOWN = 2
KEYUP = 3

KMOD_ALT = 768
KMOD_CAPS = 8192
KMOD_CTRL = 192
KMOD_LALT = 256
KMOD_LCTRL = 64
KMOD_LMETA = 1024
KMOD_LSHIFT = 1
KMOD_META = 3072
KMOD_MODE = 16384
KMOD_NONE = 0
KMOD_NUM = 4096
KMOD_RALT = 512
KMOD_RCTRL = 128
KMOD_RMETA = 2048
KMOD_RSHIFT = 2
KMOD_SHIFT = 3

K_0 = 48
K_1 = 49
K_2 = 50
K_3 = 51
K_4 = 52
K_5 = 53
K_6 = 54
K_7 = 55
K_8 = 56
K_9 = 57
K_a = 97
K_AMPERSAND = 38
K_ASTERISK = 42
K_AT = 64
K_b = 98
K_BACKQUOTE = 96
K_BACKSLASH = 92
K_BACKSPACE = 8
K_BREAK = 318
K_c = 99
K_CAPSLOCK = 301
K_CARET = 94
K_CLEAR = 12
K_COLON = 58
K_COMMA = 44
K_d = 100
K_DELETE = 127
K_DOLLAR = 36
K_DOWN = 274
K_e = 101
K_END = 279
K_EQUALS = 61
K_ESCAPE = 27
K_EURO = 321
K_EXCLAIM = 33
K_f = 102
K_F1 = 282
K_F10 = 291
K_F11 = 292
K_F12 = 293
K_F13 = 294
K_F14 = 295
K_F15 = 296
K_F2 = 283
K_F3 = 284
K_F4 = 285
K_F5 = 286
K_F6 = 287
K_F7 = 288
K_F8 = 289
K_F9 = 290
K_FIRST = 0
K_g = 103
K_GREATER = 62
K_h = 104
K_HASH = 35
K_HELP = 315
K_HOME = 278
K_i = 105
K_INSERT = 277
K_j = 106
K_k = 107
K_KP0 = 256
K_KP1 = 257
K_KP2 = 258
K_KP3 = 259
K_KP4 = 260
K_KP5 = 261
K_KP6 = 262
K_KP7 = 263
K_KP8 = 264
K_KP9 = 265

K_KP_DIVIDE = 267
K_KP_ENTER = 271
K_KP_EQUALS = 272
K_KP_MINUS = 269
K_KP_MULTIPLY = 268
K_KP_PERIOD = 266
K_KP_PLUS = 270

K_l = 108
K_LALT = 308
K_LAST = 323
K_LCTRL = 306
K_LEFT = 276
K_LEFTBRACKET = 91
K_LEFTPAREN = 40
K_LESS = 60
K_LMETA = 310
K_LSHIFT = 304
K_LSUPER = 311
K_m = 109
K_MENU = 319
K_MINUS = 45
K_MODE = 313
K_n = 110
K_NUMLOCK = 300
K_o = 111
K_p = 112
K_PAGEDOWN = 281
K_PAGEUP = 280
K_PAUSE = 19
K_PERIOD = 46
K_PLUS = 43
K_POWER = 320
K_PRINT = 316
K_q = 113
K_QUESTION = 63
K_QUOTE = 39
K_QUOTEDBL = 34
K_r = 114
K_RALT = 307
K_RCTRL = 305
K_RETURN = 13
K_RIGHT = 275
K_RIGHTBRACKET = 93
K_RIGHTPAREN = 41
K_RMETA = 309
K_RSHIFT = 303
K_RSUPER = 312
K_s = 115
K_SCROLLOCK = 302
K_SEMICOLON = 59
K_SLASH = 47
K_SPACE = 32
K_SYSREQ = 317
K_t = 116
K_TAB = 9
K_u = 117
K_UNDERSCORE = 95
K_UNKNOWN = 0
K_UP = 273
K_v = 118
K_w = 119
K_x = 120
K_y = 121
K_z = 122

LIL_ENDIAN = 1234

MOUSEBUTTONDOWN = 5
MOUSEBUTTONUP = 6
MOUSEMOTION = 4

NOEVENT = 0
NOFRAME = 32

NUMEVENTS = 32

OPENGL = 2
OPENGLBLIT = 10

PREALLOC = 16777216

QUIT = 12

RESIZABLE = 16

RLEACCEL = 16384
RLEACCELOK = 8192

SCRAP_BMP = 'image/bmp'
SCRAP_CLIPBOARD = 0
SCRAP_PBM = 'image/pbm'
SCRAP_PPM = 'image/ppm'
SCRAP_SELECTION = 1
SCRAP_TEXT = 'text/plain'

SRCALPHA = 65536
SRCCOLORKEY = 4096

SWSURFACE = 0

SYSWMEVENT = 13

TIMER_RESOLUTION = 10

USEREVENT = 24

USEREVENT_DROPFILE = 4096

UYVY_OVERLAY = 1498831189

VIDEOEXPOSE = 17
VIDEORESIZE = 16

YUY2_OVERLAY = 844715353

YV12_OVERLAY = 842094169

YVYU_OVERLAY = 1431918169

# no functions
# no classes
# variables with complex values

__loader__ = None # (!) real value is ''

__spec__ = None # (!) real value is ''


python 机器学习 科学计算库

Jupyter Notebook

math Last Checkpoint: a few seconds ago (autosaved) [Python 3]

Python 3

Code:

import numpy as np

np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

a=np.arange(10)

#可以直接对数组进行运算

a = a ** 2

a

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)

​

#Scipy

#用来做高等数学等计算的包

from scipy import linalg

#生成一个二维数组

A = np.array([[1,2],[3,4]])

A

array([[1, 2],
       [3, 4]])

#计算行列式的值

#1*4-2*3

linalg.det(A)

​

-2.0

#Pandas

#是一种构建于Numpy的高级数据结构和精巧工具,快速简单的处理数据

import pandas as pd

#序列

s = pd.Series([1,3,5,np.nan,6,8])

s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

#时间数组,起始时间到六天

dates = pd.date_range('20130101',periods=6)

dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

#生成表格

#index:行标识

#columns:列标识

#rand是0-1的均匀分布,randn是均值为0方差为1的正态分布;

#rand(n)或randn(n)生成n*n的随机数矩阵。

#rand(n,m)或randn(m,n)生成m*n的随机数矩阵。

df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))

df

    A   B   C   D
2013-01-01  1.210884    0.616424    0.961066    0.173936
2013-01-02  0.358245    0.506724    -0.047834   0.587061
2013-01-03  -0.508396   0.012049    -0.114224   -1.195929
2013-01-04  2.303441    0.536666    -1.013810   -0.574154
2013-01-05  -1.327828   -0.003089   0.662432    0.038886
2013-01-06  1.379826    1.554135    -0.681174   -0.816094

#通过B列降序排序

df.sort_values(by='B')

#从上到下多少行

#df.head()

#从下到上多少行

#df.tail()

#所有值和描述

#df.describe()

#转置

#df.T

​

    A   B   C   D
2013-01-01  1.210884    0.616424    0.961066    0.173936
2013-01-02  0.358245    0.506724    -0.047834   0.587061
2013-01-03  -0.508396   0.012049    -0.114224   -1.195929
2013-01-04  2.303441    0.536666    -1.013810   -0.574154
2013-01-05  -1.327828   -0.003089   0.662432    0.038886

#绘图

import matplotlib.pyplot as plt

plt.plot([1,2,3])

plt.ylabel('some numbers')

plt.show()

​

python web django

Django的模板语法,如下:

{{}} :用于变量替换。
{%for i in content%}{{i}}{%endfor%} : for 循环
{%extends "base.html"%} : 用于指定父模板文件
{%block name %} ··· {%endblock%} :用于指定被替换的内容,内容名称为 name.
{%if 条件%} .....{% elif 条件%} ....{%else%}... {%endif%} :用于 if 语句

常用的 ORM 查询操作:

ModelName.object.all() #返回 model 的所有数据集

ModelName.object.filter(**kwargs) #返回符合筛选条件的数据集

ModelName.object.exclude(**kwargs) #返回不符合筛选条件的数据集

ModelName.object.get(**kwargs) #用于查询单条记录

创建数据

# 第一种方式
# models.UserInfo.objects.create(username="root",password="123")
# 第二种方式
# obj = models.UserInfo(username='fzh', password="iajtag")
# obj.save()
# 第三种方式
# dic = {'username':'fgf', 'password':'666'}
# models.UserInfo.objects.create(**dic)

查询数据

# result = models.UserInfo.objects.all()  # 查询所有,为QuerySet类型,可理解成列表
# result = models.UserInfo.objects.filter(username="fgf",password="666")  # 列表
# result = models.UserInfo.objects.filter(username="fgf").first()  # 对象
# 条件查询。filter 相当于where查询条件,里面的","会组成and条件
# for row in result:  # 打印查询到数据。
#     print(row.id,row.username,row.password)

# 查看QuerySet类型具体做了什么事情,可以: print(result.query)

删除数据

# models.UserInfo.objects.all().delete()  # 删除所有
# models.UserInfo.objects.filter(id=4).delete()  # 删除所有

更新数据

# models.UserInfo.objects.all().update(password=8888)
# models.UserInfo.objects.filter(id=3).update(password=888888)

数据库(ORM)操作相关:
如果想要完全删除migrations(使用sqllite时),需要将所有缓存文件/夹都删掉
然后新建一个空的migrations

python manage.py makemigrations –empty (你的app名字)
python manage.py makemigrations
python manage.py migrate
解决!

pygame 碰撞检测

碰撞检测 即两个Sprite是否碰撞-重叠之类的(你懂得)
:horse_racing:

github: https://github.com/834930269/Pygame-Learn/tree/master/EatApple

吃苹果游戏:

# MyLibrary.py
from pygame import *
import sys, time, random, math, pygame

def print_text(font,x,y,text,color=(255,255,255)):
    imgText = font.render(text,True,color)
    #req'd when function moved into MyLibrary
    screen = pygame.display.get_surface()
    #移动
    screen.blit(imgText,(x,y))

class MySprite(pygame.sprite.Sprite):
    def __init__(self):
        pygame.sprite.Sprite.__init__(self)
        self.master_image = None
        self.frame = 0
        self.old_frame = -1
        self.frame_width = 1
        self.frame_height = 1
        self.first_frame = 0
        self.last_frame = 0
        self.columns = 1
        self.last_time = 0
        self.row = 1
        self.direction = 0
        self.velocity = Point(0.0,0.0)

    #x property
    def _getx(self): return self.rect.x
    def _setx(self,value): self.rect.x = value
    X = property(_getx,_setx)

    #y property
    def _gety(self): return self.rect.y
    def _sety(self,value): self.rect.y = value
    Y = property(_gety,_sety)

    # position property
    def _getpos(self): return self.rect.topleft
    def _setpos(self,pos):self.rect.topleft = pos
    position = property(_getpos,_setpos)

    # filename 帧图宽度,帧图高度,帧图行,帧图列
    def load(self,filename,width,height,columns,row):
        self.master_image = pygame.image.load(filename).convert_alpha()
        self.frame_width = width
        self.frame_height = height
        self.rect = Rect(0,0,width,height)
        self.columns = columns
        self.row = row
        rect = self.master_image.get_rect()
        self.last_frame = (rect.width // width)*(rect.height // height) - 1

    def update(self,current_time,rate=30):
        if current_time > self.last_time +rate:
            self.frame += 1
            if self.frame > self.last_frame:
                self.frame = self.first_frame
            self.last_frame = current_time

        if self.frame != self.old_frame:
            frame_x = (self.frame % self.columns) * self.frame_width
            #添加了row以后每次计算对row取余即可计算出当前列数
            frame_y = (self.frame // self.columns) % self.row * self.frame_height
            #如果把frame_y置为0,则表示始终都是在位图的第一行
            # 多行的话,可以通过frame_y来解决,但是要注意,y=0则返回
            rect = Rect(frame_x, frame_y, self.frame_width, self.frame_height)
            self.image = self.master_image.subsurface(rect)
            self.old_frame = self.frame

    def __str__(self):
        return str(self.frame) + "," + str(self.first_frame) + \
            "," + str(self.last_frame) + "," + str(self.frame_width) + \
            "," + str(self.frame_height) + "," + str(self.columns) + \
            "," + str(self.rect)

#Point class
class Point(object):
    def __init__(self,x,y):
        self.__x = x
        self.__y = y
    #X property
    def getx(self): return self.__x
    def setx(self, x): self.__x = x
    x = property(getx, setx)

    #Y property
    def gety(self): return self.__y
    def sety(self, y): self.__y = y
    y = property(gety, sety)

    def __str__(self):
        return "{X:" + "{:.0f}".format(self.__x) + \
            ",Y:" + "{:.0f}".format(self.__y) + "}"
#app.py
import itertools, sys, time, random, math, pygame
from pygame import *
from MyLibrary import *

def calc_velocity(direction,vel=1.0):
    velocity = Point(0,0)
    if direction == 0 :#上
        velocity.y = -vel
    elif direction == 2:#右
        velocity.x=vel
    elif direction == 4:#下
        velocity.y=vel
    elif direction == 6:#左
        velocity.x=-vel
    return velocity

pygame.init()
screen = pygame.display.set_mode((800,600))
pygame.display.set_caption("吃苹果")
font = pygame.font.Font(None,36)
timer = pygame.time.Clock()

#创建精灵组
player_group = pygame.sprite.Group()
food_group = pygame.sprite.Group()

#初始化玩家精灵组
player = MySprite()
player.load("farmer walk.png",96,96,8,8)
#初始位置
player.position = 80,80
#初始方向
player.direction = 4
#添加进组
player_group.add(player)

#初始化食物精灵组
for n in range(1,50):
    food = MySprite()
    food.load("food_low.png",35,35,1,1)
    #随机分布
    food.position = random.randint(0,780),random.randint(0,580)
    food_group.add(food)

game_over=False
player_moving = False
player_health = 0

while True:
    timer.tick(30)
    ticks = pygame.time.get_ticks()

    for event in pygame.event.get():
        if event.type == QUIT:
            pygame.quit()
            sys.exit()
    #获取当前按键
    keys = pygame.key.get_pressed()
    if keys[K_ESCAPE]:
        sys.exit()
    elif keys[K_UP] or keys[K_w]:#上
        player.direction = 0
        player_moving = True
    elif keys[K_RIGHT] or keys[K_d]:#右
        player.direction = 2
        player_moving = True
    elif keys[K_DOWN] or keys[K_s]:#下
        player.direction = 4
        player_moving = True
    elif keys[K_LEFT] or keys[K_a]:#左
        player.direction = 6
        player_moving = True
    else:
        player_moving = False

    if not game_over:
        #根据角色方向不同,使用不同的动画帧
        player.first_frame = player.direction * player.columns
        player.last_frame = player.first_frame + player.columns-1
        if player.frame < player.first_frame:
            player.frame = player.first_frame

        if not player_moving:
            # 当停止按键(即人物停止移动的时候),停止更新动画帧
            player.frame = player.first_frame = player.last_frame
        else:
            player.velocity = calc_velocity(player.direction,1.5)
            player.velocity.x *= 1.5
            player.velocity.y *= 1.5

        #更新玩家精灵组
        player_group.update(ticks, 50)
        # 移动玩家
        if player_moving:
            player.X += player.velocity.x
            player.Y += player.velocity.y
            if player.X < 0:
                player.X = 0
            elif player.X > 700:
                player.X = 700
            if player.Y < 0:
                player.Y = 0
            elif player.Y > 500:
                player.Y = 500

        attacker = None
        #判断是否有碰撞产生
        attacker = pygame.sprite.spritecollideany(player,food_group)
        if attacker!=None:
            #有,增加精度
            if pygame.sprite.collide_circle_ratio(0.65)(player,attacker):
                player_health += 2
                food_group.remove(attacker)
        if player_health > 100:
            player_health = 100
        #更新食物精灵组
        food_group.update(ticks,50)
        if len(food_group)==0:
            game_over = True

    #清屏
    screen.fill((50,50,100))
    # 绘制精灵
    food_group.draw(screen)
    player_group.draw(screen)

    # 绘制玩家血量条
    pygame.draw.rect(screen, (50, 150, 50, 180), Rect(300, 570, player_health * 2, 25))
    pygame.draw.rect(screen, (100, 200, 100, 180), Rect(300, 570, 200, 25), 2)

    if game_over:
        print_text(font, 300, 100, "GAME OVER")

    pygame.display.update()

接下来才是难中之难…如果想要实现人物随场景移动.

估计要自己撸框架了.

大体思路如下:
1:近景(左侧)1/2前景不随人物移动
2:1/2以后景随人物移动
3:远景(右侧)1/2前景不随人物移动
4:景采用大地图形式,造可封装类(障碍物,场景地图,宽度,高度等信息)
5:考虑如何移动场景,数学公式想好,-移动方向,移动速度,景中心…
6:加油!

pygame sprite

自定义Sprite类:
让嗷大喵动起来!

以上图片叫做帧图,背景为空,和二维数组一样的用法.
这个帧图的行为4,列为1.

对网上别人自定义的MySprite类做了修改,使其变成列行同时循环.

学习项目地址: pygame-learn-sprite

class MySprite

class MySprite(pygame.sprite.Sprite):
    def __init__(self,target):
        pygame.sprite.Sprite.__init__(self)
        self.target_surface = target
        self.image = None
        self.master_image = None
        self.rect = None
        self.topleft = 0,0
        self.frame = 0
        self.old_frame = -1
        self.frame_width = 1
        self.frame_height = 1
        self.first_frame = 0
        self.last_frame = 0
        self.columns = 1
        self.last_time = 0
        self.row = 1

    # filename 帧图宽度,帧图高度,帧图行,帧图列
    def load(self,filename,width,height,columns,row):
        self.master_image = pygame.image.load(filename).convert_alpha()
        self.frame_width = width
        self.frame_height = height
        self.rect = 0,0,width,height
        self.columns = columns
        self.row = row
        rect = self.master_image.get_rect()
        self.last_frame = (rect.width // width)*(rect.height // height) - 1

    def update(self,current_time,rate=60):
        if current_time > self.last_time +rate:
            self.frame += 1
            if self.frame > self.last_frame:
                self.frame = self.first_frame
            self.last_frame = current_time

        if self.frame != self.old_frame:
            frame_x = (self.frame % self.columns) * self.frame_width
            #添加了row以后每次计算对row取余即可计算出当前列数
            frame_y = (self.frame // self.columns) % self.row * self.frame_height
            #如果把frame_y置为0,则表示始终都是在位图的第一行
            # 多行的话,可以通过frame_y来解决,但是要注意,y=0则返回
            rect = (frame_x, frame_y, self.frame_width, self.frame_height)
            self.image = self.master_image.subsurface(rect)
            self.old_frame = self.frame

让嗷大喵动起来吧!!

import pygame
from pygame import *

class MySprite(pygame.sprite.Sprite):
    def __init__(self,target):
        pygame.sprite.Sprite.__init__(self)
        self.target_surface = target
        self.image = None
        self.master_image = None
        self.rect = None
        self.topleft = 0,0
        self.frame = 0
        self.old_frame = -1
        self.frame_width = 1
        self.frame_height = 1
        self.first_frame = 0
        self.last_frame = 0
        self.columns = 1
        self.last_time = 0
        self.row = 1

    # filename 帧图宽度,帧图高度,帧图行,帧图列
    def load(self,filename,width,height,columns,row):
        self.master_image = pygame.image.load(filename).convert_alpha()
        self.frame_width = width
        self.frame_height = height
        self.rect = 0,0,width,height
        self.columns = columns
        self.row = row
        rect = self.master_image.get_rect()
        self.last_frame = (rect.width // width)*(rect.height // height) - 1

    def update(self,current_time,rate=60):
        if current_time > self.last_time +rate:
            self.frame += 1
            if self.frame > self.last_frame:
                self.frame = self.first_frame
            self.last_frame = current_time

        if self.frame != self.old_frame:
            frame_x = (self.frame % self.columns) * self.frame_width
            #添加了row以后每次计算对row取余即可计算出当前列数
            frame_y = (self.frame // self.columns) % self.row * self.frame_height
            #如果把frame_y置为0,则表示始终都是在位图的第一行
            # 多行的话,可以通过frame_y来解决,但是要注意,y=0则返回
            rect = (frame_x, frame_y, self.frame_width, self.frame_height)
            self.image = self.master_image.subsurface(rect)
            self.old_frame = self.frame

pygame.init()
screen = pygame.display.set_mode((800,600),0,32)
pygame.display.set_caption("精灵测试")
font = pygame.font.Font(None,18)
framerate = pygame.time.Clock()

cat = MySprite(screen)
cat.load("mdm.png",100,100,4,1)
group = pygame.sprite.Group()
group.add(cat)

while True:
    framerate.tick(100)
    ticks = pygame.time.get_ticks()

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            exit()
    key = pygame.key.get_pressed()
    if key[pygame.K_ESCAPE]:
        exit()

    screen.fill((0,0,100))

    group.update(ticks)
    group.draw(screen)
    pygame.display.update()