Skip to content

提取文档纯文本内容

将文档内的纯文本内容提取出来

基本信息

请求方法:POST

请求路径:/api/v1/openapi/office/extract/plaintext

请求主机:developer.kdocs.cn

限流频次

应用类型限额
测试应用
100 次/天
正式应用
200 次/天

目前支持的文档类型

文档类型扩展名
演示
.pptx .ppt
WPS 文字
.docx .doc
WPS 表格
.xlsx .xls
WPS PDF
.pdf
参数必须类型说明
Date
string
使用 RFC1123 时间格式的当前时间
Content-Md5
string
HTTP Body 中数据的 MD5 值十六进制表达方式, 必需小写, 如果是 get 请求一律使用 URI 计算 MD5
Content-Type
string
目前固定为: application/json
Authorization
string
"WPS-2:" + app_id + ":" + sha1( app_key + Content-Md5 + Content-Type + DATE)

Body 参数

参数必须类型说明
url
string
文档下载地址
filename
string
文档名称,包含扩展名,例如:文字文稿.docx
password
string
文档打开密码(如果文档有加密,该项则必填

返回参数

参数必须类型说明
code
integer
错误码
+
data
data {}
响应数据

示例

请求示例

curl --request POST \
	--url https://developer.kdocs.cn/api/v1/openapi/office/extract/plaintext \
	--header 'Authorization: WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a' \
	--header 'Content-Md5: d41d8cd98f00b204e9800998ecf8427e' \
	--header 'Content-Type: application/json' \
	--header 'Date: Wed, 23 Jan 2013 06:43:08 GMT' \
	--data '{"url":"https://xxx.com/xxx","filename":"使用说明.docx"}'
OkHttpClient client = new OkHttpClient();

MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\"url\":\"https://xxx.com/xxx\",\"filename\":\"使用说明.docx\"}");
Request request = new Request.Builder()
	.url("https://developer.kdocs.cn/api/v1/openapi/office/extract/plaintext")
	.post(body)
	.addHeader("Date", "Wed, 23 Jan 2013 06:43:08 GMT")
	.addHeader("Content-Md5", "d41d8cd98f00b204e9800998ecf8427e")
	.addHeader("Content-Type", "application/json")
	.addHeader("Authorization", "WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a")
	.build();

Response response = client.newCall(request).execute();
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io/ioutil"
)

func main() {

	url := "https://developer.kdocs.cn/api/v1/openapi/office/extract/plaintext"

	payload := strings.NewReader("{\"url\":\"https://xxx.com/xxx\",\"filename\":\"使用说明.docx\"}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Date", "Wed, 23 Jan 2013 06:43:08 GMT")
	req.Header.Add("Content-Md5", "d41d8cd98f00b204e9800998ecf8427e")
	req.Header.Add("Content-Type", "application/json")
	req.Header.Add("Authorization", "WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := ioutil.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
import http.client

conn = http.client.HTTPSConnection("developer.kdocs.cn")

payload = "{\"url\":\"https://xxx.com/xxx\",\"filename\":\"使用说明.docx\"}"

headers = {
    'Date': "Wed, 23 Jan 2013 06:43:08 GMT",
    'Content-Md5': "d41d8cd98f00b204e9800998ecf8427e",
    'Content-Type': "application/json",
    'Authorization': "WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a"
    }

conn.request("POST", "/api/v1/openapi/office/extract/plaintext", payload, headers)

res = conn.getresponse()
data = res.read()

print(data.decode("utf-8"))
<?php

$curl = curl_init();

curl_setopt_array($curl, [
	CURLOPT_URL => "https://developer.kdocs.cn/api/v1/openapi/office/extract/plaintext",
	CURLOPT_RETURNTRANSFER => true,
	CURLOPT_ENCODING => "",
	CURLOPT_MAXREDIRS => 10,
	CURLOPT_TIMEOUT => 30,
	CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
	CURLOPT_CUSTOMREQUEST => "POST",
	CURLOPT_POSTFIELDS => "{\"url\":\"https://xxx.com/xxx\",\"filename\":\"使用说明.docx\"}",
	CURLOPT_HTTPHEADER => [
		"Authorization: WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a",
		"Content-Md5: d41d8cd98f00b204e9800998ecf8427e",
		"Content-Type: application/json",
		"Date: Wed, 23 Jan 2013 06:43:08 GMT"
	],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
	echo "cURL Error #:" . $err;
} else {
	echo $response;
}
const data = JSON.stringify({
	"url": "https://xxx.com/xxx",
	"filename": "使用说明.docx"
});

const xhr = new XMLHttpRequest();
xhr.withCredentials = true;

xhr.addEventListener("readystatechange", function () {
	if (this.readyState === this.DONE) {
		console.log(this.responseText);
	}
});

xhr.open("POST", "https://developer.kdocs.cn/api/v1/openapi/office/extract/plaintext");
xhr.setRequestHeader("Date", "Wed, 23 Jan 2013 06:43:08 GMT");
xhr.setRequestHeader("Content-Md5", "d41d8cd98f00b204e9800998ecf8427e");
xhr.setRequestHeader("Content-Type", "application/json");
xhr.setRequestHeader("Authorization", "WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a");

xhr.send(data);
const http = require("https");

const options = {
	"method": "POST",
	"hostname": "developer.kdocs.cn",
	"port": null,
	"path": "/api/v1/openapi/office/extract/plaintext",
	"headers": {
		"Date": "Wed, 23 Jan 2013 06:43:08 GMT",
		"Content-Md5": "d41d8cd98f00b204e9800998ecf8427e",
		"Content-Type": "application/json",
		"Authorization": "WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a"
	}
};

const req = http.request(options, function (res) {
	const chunks = [];

	res.on("data", function (chunk) {
		chunks.push(chunk);
	});

	res.on("end", function () {
		const body = Buffer.concat(chunks);
		console.log(body.toString());
	});
});

req.write(JSON.stringify({url: 'https://xxx.com/xxx', filename: '使用说明.docx'}));
req.end();
CURL *hnd = curl_easy_init();

curl_easy_setopt(hnd, CURLOPT_CUSTOMREQUEST, "POST");
curl_easy_setopt(hnd, CURLOPT_URL, "https://developer.kdocs.cn/api/v1/openapi/office/extract/plaintext");

struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Date: Wed, 23 Jan 2013 06:43:08 GMT");
headers = curl_slist_append(headers, "Content-Md5: d41d8cd98f00b204e9800998ecf8427e");
headers = curl_slist_append(headers, "Content-Type: application/json");
headers = curl_slist_append(headers, "Authorization: WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a");
curl_easy_setopt(hnd, CURLOPT_HTTPHEADER, headers);

curl_easy_setopt(hnd, CURLOPT_POSTFIELDS, "{\"url\":\"https://xxx.com/xxx\",\"filename\":\"使用说明.docx\"}");

CURLcode ret = curl_easy_perform(hnd);
var client = new RestClient("https://developer.kdocs.cn/api/v1/openapi/office/extract/plaintext");
var request = new RestRequest(Method.POST);
request.AddHeader("Date", "Wed, 23 Jan 2013 06:43:08 GMT");
request.AddHeader("Content-Md5", "d41d8cd98f00b204e9800998ecf8427e");
request.AddHeader("Content-Type", "application/json");
request.AddHeader("Authorization", "WPS-2:SX20220101ABCDEF:ac59dac1460772a04b3a97d7ef78409f28241e3a");
request.AddParameter("application/json", "{\"url\":\"https://xxx.com/xxx\",\"filename\":\"使用说明.docx\"}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);

返回示例

{
  "code": 0,
  "data": {
    "task_id": "open:zjsiwfuotpbqblrlfwtkfkioargjbla"
  },
  "result": "ok"
}

查询异步任务结果

携带 task_id查询结果

其中 result 对象说明

参数必须类型说明
base_64_text
string
base64 编码后的纯文本内容

返回示例

{
  "code": 0,
  "data": {
    "status": "success",
    "progress": 100,
    "result": {
      "base_64_text": "5qyi6L+O5L2/55So6YeR5bGx5paH5qGj5byA5pS+5bmz5Y+w"
    }
  }
}

错误码

请参考错误码说明