阅读背景:

c简单实现爬取网页

来源:互联网 

实例:

#include <sys/socket.h>  
#include <sys/types.h>  
#include <stdio.h>  
#include <fcntl.h>  
#include<netinet/in.h>  
#include<unistd.h>  
#include<arpa/inet.h>  
#include<errno.h>  
#include<stdlib.h>  
#include <netdb.h>  
#include <string.h>  
      
int main()  
{  
	int sockfd;  
	struct sockaddr_in sin;  
	struct hostent *ht;  
	char GETPAGE[] = "GET /chinabhlt/article/details/43670871 HTTP/1.1\nHost:blog.csdn.net\n\n";  
        
	char bufpage[100000];  
	if((ht = gethostbyname("blog.csdn.net")) == NULL)  
	{  
		perror("no website");  
		return -1;  
	}  
	//printf("host:%s\n", ht->h_name);     
	sin.sin_port = htons(80);  
	sin.sin_addr.s_addr = *(unsigned long*)ht->h_addr_list[0];  
	sin.sin_family = AF_INET;  
          
	if((sockfd = socket(AF_INET, SOCK_STREAM, 0))<0)  
	{  
		perror("create socket error");  
		return -1;  
	}  
      
	socklen_t socklen = sizeof(struct sockaddr_in);  
	if(connect(sockfd, (struct sockaddr*)&sin, socklen) == -1)  
	{  
		perror("connect server error");  
		return -1;  
	}  
	if(send(sockfd,GETPAGE, strlen(GETPAGE), 0) < 0)  
	{  
		return -1;  
	}
	//deal the reponse message from webserver
	int done = 0;
	char buf[1];
	int recvlen = 0;
	int charas = 0;
	while(done == 0)
	{
		recvlen = recv(sockfd, buf, 1, 0);
		switch(*buf)
		{
		case '\r':
			break;
		case '\n':
			if(charas == 0)  //请求响应结尾\n\n  
				done = 1;
			charas = 0;
			break;
		default:
			++charas;
			break;
		}
		printf("%c", *buf);
	}
	
	if((recvlen = recv(sockfd, bufpage, 10000, 0)) < 0)  
	{  
		return -1;  
	}  
          
	printf("start write file \n");  
	int i = 0;  
	FILE *fp = fopen("./haha.html", "w+");  
	for( i=0; i<recvlen; ++i)  
	{  
		fputc(bufpage[i], fp);  
	}  
	printf("end write file \n");  
	return 0;  
}  
#include <sys/socket.h>  
#include <sy



你的当前访问异常,请进行认证后继续阅读剩余内容。

分享到: