本文介绍火车头采集器AI伪原创,对于新媒体从业者来说,会写文章是最基本的职业技能,而伪原创是我们经常使用的技能。今天我要讲的是SEO标兵如何在伪原创上创作文章。
首先,原创性永远是最好的,更受读者欢迎。伪原创的出现是因为我们可能不了解这个行业,这取决于我们第一次进入的行业。采用伪原创策略可以节省时间。或者,有的站长采用百度采集+伪原创插件的方式,批量采集伪原创。
火车头采集器AI伪原创PHP源码:
<?php
header("Content-type: text/html; charset=gb2312");
set_time_limit(0);
error_reporting(E_ALL);
ini_set('display_errors', '1');
define ("CUR_DIR", '../');
define('TITLE_SEPAR', 'xxxxx');
// 这里是你的API地址,需要到 www.xiaofamao.com 申请
define('API_URL', 'http://api.xiaofamao.com/api.php?json=0&v=2&key=yuyu');
// 待执行目录
define('CUR_FOLDER', CUR_DIR.'word');
// 执行后保存目录
define('DEST_FOLDER', CUR_DIR.'xiaofamao');
// 执行后保存目录 失败
define('DEST_FOLDER_FAIL', CUR_DIR.'xiaofamao_fail');
$one_file = get_one_file();
// 判断是否还有任务
if (!$one_file){
echo PHP_EOL.'* 任务数为0, 程序已退出';
exit;
}
while ($one_file) {
echo PHP_EOL.'* 正在执行:' . PHP_EOL. file_path($one_file);
echo PHP_EOL.'* 请耐心等候...';
$data_arr = get_contents_filter(file_path($one_file));
$title = $data_arr['title'];
$title_src = $data_arr['title'];
$content = $data_arr['content'];
$content = replace_shuminghao($content);
$content = get_utf8_data($content);
$new_content = curl_request(API_URL, array('wenzhang'=>$content));
$new_content = get_gbk_data($new_content);
$new_content = restore_shuminghao($new_content);
#var_dump($new_content);
$new_content = content_format($new_content);
$new_content = $new_content;
#$new_title = get_ai_title($title);
$title = get_utf8_data($title);
$new_title = $title;
//$new_title =curl_request(API_URL, array('wenzhang'=>$title));
//$new_title = get_gbk_data($new_title);
#echo $new_title;
# 如果标题伪原创成功
if ($new_title === 'xx'.$title) {
$new_title = get_gbk_data($title);
//save_new_file(DEST_FOLDER_FAIL.DIRECTORY_SEPARATOR.$new_title.'.txt', $new_content);
save_new_file(DEST_FOLDER_FAIL.DIRECTORY_SEPARATOR.$title_src.'.txt', $new_content);
}
else {
$new_title = strip_tags($new_title);
$new_title = str_replace(':', ":", $new_title);
$new_title = str_replace('?', "?", $new_title);
$new_title = str_replace(array('*','"','<','>','|'),'_', $new_title);
$new_title = get_gbk_data($new_title);
#$title = get_gbk_data($title);
#save_new_file(DEST_FOLDER.DIRECTORY_SEPARATOR.$new_title.'.txt', $new_content);
save_new_file(DEST_FOLDER.DIRECTORY_SEPARATOR.$title_src.'.txt', $new_content);
#save_new_file(DEST_FOLDER.DIRECTORY_SEPARATOR.$new_title.'.txt', $content);
}
//
//
#save_new_file(DEST_FOLDER.DIRECTORY_SEPARATOR.$title.'.txt', $new_content);
file_ok($one_file);
//show_info_gb2312('伪原创结果:'.PHP_EOL. $final_data);
//save_new_file(DEST_FOLDER.DIRECTORY_SEPARATOR.$new_title, $new_content);
//var_dump($final_data);
// sleep(3);
$one_file = get_one_file();
// show_reflesh();
}
show_info_gb2312(PHP_EOL.'任务完成'.PHP_EOL);
function replace_shuminghao($content) {
$content = str_replace('![](', '<(', $content);
$count = preg_match_all("/<\((.*?)\)/", $content, $matches);
#var_dump($matches[0]);
if (isset($matches[0][0]))
{
foreach ($matches[0] as $key => $value) {
#echo $value;
$new_val = '《'.$matches[1][$key].'》';
$content = str_replace($value, $new_val, $content);
}
}
#echo $content;
return $content;
}
function restore_shuminghao($content) {
$count = preg_match_all("/《http(.*?)》/", $content, $matches);
#var_dump($matches[0]);
if (isset($matches[0][0]))
{
foreach ($matches[0] as $key => $value) {
$new_val = '![](http'.$matches[1][$key].')';
$content = str_replace($value, $new_val, $content);
}
#return $matches[0];
}
return $content;
}
function content_rewrite($content) {
$data = curl_request(API_URL, array('wenzhang'=>$content));
return $data;
}
function content_format($data) {
$data = fix_newline($data);
$data_arr = explode(PHP_EOL, $data);
$ret_str = '';
foreach($data_arr as $key => $value) {
#echo $value;
$ret_str .= ' '.$value.PHP_EOL.PHP_EOL;
#$ret_str .= 'bbbbbbbbbbbbbbbbbb';
}
return $ret_str;
}
function title_content_rewrite($title, $content) {
$article_src = $title.TITLE_SEPAR.PHP_EOL.$content;
$data = curl_request(API_URL, array('wenzhang'=>$article_src));
$data = str_replace("Xx", "xx", $data);
$data = str_replace("\n", "\r\n", $data);
$data = str_replace("<p>\r\n", "<p>", $data);
$data = ltrim($data);
#var_dump($data);
$temp = explode(TITLE_SEPAR, $data);
#var_dump($temp);
$temp[0] = trim($temp[0]);
$temp[1] = ltrim($temp[1]);
$temp[1] = ltrim($temp[1], '</p>');
$temp[1] = ltrim($temp[1]);
$temp[1] = ltrim($temp[1]);
return $temp;
}
function show_info($str) {
echo '<p>' . $str . '</p>';
}
function get_utf8_data($data) {
$encode = mb_detect_encoding($data, array('ASCII', 'UTF-8', 'GB2312', 'GBK','BIG5'));
if ($encode !== 'UTF-8') {
//show_info_gb2312('文件非 UTF-8,正在转成UTF-8');
$utf8_data = iconv($encode, 'UTF-8//IGNORE', $data);
return $utf8_data;
}
return $data;
}
function get_gbk_data($data) {
$encode = mb_detect_encoding($data, array('ASCII', 'UTF-8', 'GB2312', 'GBK','BIG5'));
if ($encode === 'UTF-8') {
//show_info_gb2312('文件非 UTF-8,正在转成UTF-8');
$gbk_data = iconv('UTF-8//IGNORE', 'GBK//IGNORE', $data);
return $gbk_data;
}
return $data;
}
function get_ai_title($title) {
$title = urlencode($title);
return curl_request($url);
}
function show_info_gb2312($string) {
echo get_gbk_data($string);
newline();
}
function get_img_arr($array) {
$ret_arr = array();
foreach ($array[1] as $key => $value) {
if (stripos($value, 'C:\\') === 0) {
$ret_arr[0][] = $value;
}
else {
$ret_arr[1][] = $value;
}
}
asort($ret_arr);
return $ret_arr;
}
function str_replace_once($search, $replace, $subject) {
$firstChar = strpos($subject, $search);
if($firstChar !== false) {
$beforeStr = substr($subject,0,$firstChar);
$afterStr = substr($subject, $firstChar + strlen($search));
return $beforeStr.$replace.$afterStr;
} else {
return $subject;
}
}
//var_dump(str_replace_once('**x**', 'xxxxxxxxxx', 'aaa**x**bbb'));
//exit;
function put_img_arr($array, $string) {
$string = str_replace('**X**', '**x**', $string);
foreach ($array as $key => $value) {
$string = str_replace_once('**x**', $value, $string);
}
return $string;
}
function get_rewrite_title($data) {
// var_dump($data);
$data = reset_newline($data);
$data_arr = explode(PHP_EOL, $data);
$title = str_replace(array('<P>', '<p>'), array('',''), $data_arr[0]);
array_shift($data_arr);
$contents = '<p>'.implode(PHP_EOL, $data_arr);
$new_arr = array($title, $contents);
return $new_arr;
}
/*
foreach ($news_list as $key => $value) {
echo $value."<br>".PHP_EOL;
$value = iconv("utf-8", "gb2312", $value);
$data = file_get_contents($value);
$data = iconv("GBK", "utf-8", $data);
// echo $data;
$data = rewrite($data);
$data = str_replace("\r\n", "\n", $data);
$data = str_replace("\r", "\n", $data);
$data = str_replace("\n", "\r\n", $data);
sleep(2);
$data = auto_typeset($data);
save_new_file('b'.$value, $data);
}
*/
function auto_typeset($data) {
$data = str_replace(
array('<P>'."\r\n", '<P>', '<p>'),
array('<p>', '<p>', '<p> '),
$data
);
return $data;
}
function rewrite($article) {
$url = API_URL;
return curl_request($url, array('wenzhang'=>$article ));
}
function save_new_file($filename, $data) {
if (! data_exists($filename)) {
file_put_contents($filename, $data);
}
}
// 缓存数据是否存在
function data_exists($filename) {
$filename = str_replace('\\','/', $filename);
// var_dump($filename);
// var_dump(dirname($filename));
if (!is_dir(dirname($filename))) {
// echo 'aaaaaaaaaaa';
mkdir(dirname($filename),0755,true);
}
// var_dump(dirname(dirname($filename)));
if (!is_dir(dirname(dirname($filename)))) {
// echo 'bbbbbbbbbb';
mkdir(dirname(dirname($filename)),0755,true);
}
if (file_exists($filename)) {
return true;
}
return false;
}
function get_news_list($floder) {
$news_list = array();
if($handle = opendir($floder)){
while (false !== ($file = readdir($handle))){
//$dir = iconv("utf-8", "gb2312", $file);
$file = iconv("gb2312", "utf-8", $file);
if ($file !== '..' && $file !== '.')
{
$news_list[] = $floder.DIRECTORY_SEPARATOR.$file;
}
// echo "$file\n";
}
closedir($handle);
}
return $news_list;
}
//参数1:访问的URL,参数2:post数据(不填则为GET),参数3:提交的$cookies,参数4:是否返回$cookies
function curl_request($url,$post='',$cookie='', $returnCookie=0){
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)');
if (ini_get('open_basedir') == '' && strtolower(ini_get('safe_mode')) != 'on'){
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
}
curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
curl_setopt($curl, CURLOPT_REFERER, "http://XXX");
if($post) {
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post));
}
if($cookie) {
curl_setopt($curl, CURLOPT_COOKIE, $cookie);
}
curl_setopt($curl, CURLOPT_HEADER, $returnCookie);
curl_setopt($curl, CURLOPT_TIMEOUT, 150);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($curl);
if (curl_errno($curl)) {
return curl_error($curl);
}
curl_close($curl);
if($returnCookie){
list($header, $body) = explode("\r\n\r\n", $data, 2);
preg_match_all("/Set\-Cookie:([^;]*);/", $header, $matches);
$info['cookie'] = substr($matches[1][0], 1);
$info['content'] = $body;
return $info;
}else{
return $data;
}
}
// 计算中文字符串长度
function utf8_strlen($string = null) {
// 将字符串分解为单元
preg_match_all("/./us", $string, $match);
// 返回单元个数
return count($match[0]);
}
function get_one_file() {
$arr_all = get_all_files();
#var_dump( $arr_all );
$arr_ok = get_ok_files();
//var_dump($arr_ok);
foreach($arr_all as $v) {
if (array_search($v, $arr_ok)===false) {
return $v;
}
}
return false;
}
function get_all_files() {
return listDir(CUR_FOLDER);
}
function get_ok_files() {
//var_dump(CUR_DIR.'ok.txt');
$data = file_get_contents(CUR_DIR.'ok.txt');
$data = str_replace("\xef\xbb\xbf", '', $data);
$data_arr = explode(PHP_EOL, $data);
return $data_arr;
}
function file_ok($file_name) {
file_put_contents(CUR_DIR.'ok.txt', $file_name.PHP_EOL, FILE_APPEND);
}
function listDir($dir)
{
// echo $dir;
$reta = array();
if(is_dir($dir))
{
if ($dh = opendir($dir))
{
while (($file = readdir($dh)) !== false)
{
if((is_dir($dir."/".$file)) && $file!="." && $file!="..")
{
//echo 'aa';
//$file = iconv("gb2312", "utf-8//IGNORE", $file);
// $file = ConvertToUTF8($file);
#$file = mb_convert_encoding($file, 'UTF-8', 'GBK');
$reta[] = $file;
// echo "<b><font color='red'>文件名:</font></b>",$file,"<br><hr>";
listDir($dir."/".$file."/");
}
else
{
if($file!="." && $file!="..")
{
// echo $file."<br>";
if (stripos($file, '.txt')!==false) {
#$file = iconv("GBK", "utf-8//IGNORE", $file);
// echo $file.'<br>';
$reta[] = $file;
}
}
}
}
closedir($dh);
}
}
//var_dump($reta);
return $reta;
}
function file_path($filename) {
return CUR_FOLDER.DIRECTORY_SEPARATOR.$filename;
}
function short_filename($filename) {
$filename = str_replace(CUR_FOLDER.DIRECTORY_SEPARATOR, '', $filename);
$filename = str_replace('.txt', '', $filename);
return $filename;
}
function get_contents_filter($file_name) {
$file_name = get_gbk_data($file_name);
// $file_name = mb_convert_encoding($file_name, 'UTF-8', 'ISO-8859-15');
/// $file_name = iconv("utf-8", "GBK//IGNORE", $file_name);
// echo $file_name.'<br>';
$data = file_get_contents($file_name);
$data = get_gbk_data($data);
$data = str_replace("\t", '', $data);
// 优化换行符
$data = fix_newline($data);
$data_title = short_filename($file_name);
//var_dump(get_utf8_data($data));
return array('title'=>$data_title, 'content'=>$data);
}
function fix_newline($data) {
$data = str_replace("\r", "\n", $data);
while(strpos($data, "\n\n") !== false) {
$data = str_replace("\n\n", "\n", $data);
}
$data = str_replace("\n", PHP_EOL, $data);
return $data;
}
function show_reflesh() {
$str = '<script language="JavaScript">
function myrefresh()
{
window.location.reload();
}
setTimeout("myrefresh()",1000);
</script>';
echo $str;
}
function after ($this, $inthat)
{
if (!is_bool(strpos($inthat, $this)))
return substr($inthat, strpos($inthat,$this)+strlen($this));
};
function after_last ($this, $inthat)
{
if (!is_bool(strrevpos($inthat, $this)))
return substr($inthat, strrevpos($inthat, $this)+strlen($this));
};
function before ($this, $inthat)
{
return substr($inthat, 0, strpos($inthat, $this));
};
function before_last ($this, $inthat)
{
return substr($inthat, 0, strrevpos($inthat, $this));
};
function between ($this, $that, $inthat)
{
return before ($that, after($this, $inthat));
};
function between_last ($this, $that, $inthat)
{
return after_last($this, before_last($that, $inthat));
};
// use strrevpos function in case your php version does not include it
function strrevpos($instr, $needle)
{
$rev_pos = strpos (strrev($instr), strrev($needle));
if ($rev_pos===false) return false;
else return strlen($instr) - $rev_pos - strlen($needle);
};
function recurse_copy($src,$dst) {
$dir = opendir($src);
@mkdir($dst);
while(false !== ( $file = readdir($dir)) ) {
if (( $file != '.' ) && ( $file != '..' )) {
if ( is_dir($src . '/' . $file) ) {
recurse_copy($src . '/' . $file,$dst . '/' . $file);
}
else {
if (strpos($file, '.txt') === false) {
copy($src . '/' . $file,$dst . '/' . $file);
}
}
}
}
closedir($dir);
}
function ConvertToUTF8($text){
$encoding = mb_detect_encoding($text, mb_detect_order(), false);
if($encoding == "UTF-8")
{
$text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
}
$out = iconv(mb_detect_encoding($text, mb_detect_order(), false), "UTF-8//IGNORE", $text);
return $out;
}
function newline() {
echo PHP_EOL;
}
function sanitizeUTF8($value)
{
if (self::getIsIconvEnabled()) {
// NEW ----------------------------------------------------------------
$encoding = mb_detect_encoding($value, mb_detect_order(), false);
if($encoding == "UTF-8")
{
$value = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
}
$value = iconv(mb_detect_encoding($value, mb_detect_order(), false), "UTF-8//IGNORE", $value);
// --------------------------------------------------------------------
// OLD --------------------------------------
// $value = @iconv('UTF-8', 'UTF-8', $value);
// -------------------------------------------
return $value;
}
$value = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
return $value;
}
?>
我们所做的一切都有一个目的。如果你想写好伪原创,你就要问自己为什么要伪原创。
如果一个搜索引擎的网站索引是这样的,那么增加伪原创上的文章索引当然是可以的。
推荐工具:小狗ai伪原创插件(无需下载,智能分析替换关键词)
对于自媒体平台(百家号、今日头条等),建议不要改变伪原创的语言,尽量理解文章的意思,重新调整语言来写作,增加。这些平台上自媒体平台的内容质量是经过人和机器审核的,很容易识别一篇文章是否是伪原创,还包含字段等元素。垂直度。你的帐户如果你想拥有良好的声誉,你必须创建自己的帐户,所以伪原创在这里不可行。 (标兵SEO会写一篇关于今日头条推荐机制的文章,感兴趣的朋友可以关注)
如何写出高质量的文章伪原创
选择主题:您可以选择一些您关注并在行业内定期更新的作者,也可以选择一些高流量的主题来撰写。如果您是行业内部人士,您可以提前预测并计划接下来会发生什么。
大发猫标题:如果没有人看到我的标题,没有人会阅读我的内容,因此我需要优先考虑标题而不是内容。这里可能会因平台而异。如果目标是找到一个搜索词,标题可以根据用户的搜索量创建诸如什么是***、什么是***等问题。如果你的平台属于APP自媒体用户群,你的新闻是机器推荐的,你可能需要用类似的方式写你的新闻才能上头条。当然,假设它符合平台名称规范。
论智图片:高分辨率,3张图片,引人注目。我们选择照片的原则是,即使客户没有阅读标题或看到照片,他们也想点击一篇文章。
小猫AI内容:您可以将高质量的原创内容重新组合,并添加您自己的观点和评论。对于没有态度的自媒体人来说,用户最好还是去官方媒体。还要注意您的SEO 技能。文章内容应均匀涵盖相关行业术语。这样增加了文章的垂直度,拓宽了系统建议的范围。随着现在技术的升级,搜索引擎算法的更新,让伪原创的文章越来越有辨识度。与此同时,基于用户投票机制的伪原创的出现使其难以生存。